media plugin update

2025-12-06 14:36:48 +00:00 · 2024-12-04 00:36:34 +08:00
--- a/config.py
+++ b/config.py
@@ -311,7 +311,7 @@ NUM_CUSTOM_BASIC_BTN = 4


 # 媒体智能体的服务地址（这是一个huggingface空间，请前往huggingface复制该空间，然后把自己新的空间地址填在这里）
-DAAS_SERVER_URL = "https://hamercity-bbdown.hf.space/stream"
+DAAS_SERVER_URLS = [ "https://hamercity-bbdown.hf.space/stream" ] + [ f"https://hamercity-bbdown-{x}.hf.space/stream" for x in range(1, 3) ]



--- a/crazy_functions/Internet_GPT.py
+++ b/crazy_functions/Internet_GPT.py
@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
 from functools import lru_cache
 from itertools import zip_longest
 from check_proxy import check_proxy
-from toolbox import CatchException, update_ui, get_conf
+from toolbox import CatchException, update_ui, get_conf, update_ui_lastest_msg
 from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
 from request_llms.bridge_all import model_info
 from request_llms.bridge_all import predict_no_ui_long_connection
@@ -193,6 +193,38 @@ def scrape_text(url, proxies) -> str:
    text = "\n".join(chunk for chunk in chunks if chunk)
    return text

+def internet_search_with_analysis_prompt(prompt, analysis_prompt, llm_kwargs, chatbot):
+    from toolbox import get_conf
+    proxies = get_conf('proxies')
+    categories = 'general'
+    searxng_url = None  # 使用默认的searxng_url
+    engines = None  # 使用默认的搜索引擎
+    yield from update_ui_lastest_msg(lastmsg=f"检索中: {prompt} ...", chatbot=chatbot, history=[], delay=1)
+    urls = searxng_request(prompt, proxies, categories, searxng_url, engines=engines)
+    yield from update_ui_lastest_msg(lastmsg=f"依次访问搜索到的网站 ...", chatbot=chatbot, history=[], delay=1)
+    if len(urls) == 0:
+        return None
+    max_search_result = 5   # 最多收纳多少个网页的结果
+    history = []
+    for index, url in enumerate(urls[:max_search_result]):
+        yield from update_ui_lastest_msg(lastmsg=f"依次访问搜索到的网站: {url['link']} ...", chatbot=chatbot, history=[], delay=1)
+        res = scrape_text(url['link'], proxies)
+        prefix = f"第{index}份搜索结果 [源自{url['source'][0]}搜索] （{url['title'][:25]}）："
+        history.extend([prefix, res])
+    i_say = f"从以上搜索结果中抽取信息，然后回答问题：{prompt} {analysis_prompt}"
+    i_say, history = input_clipping( # 裁剪输入，从最长的条目开始裁剪，防止爆token
+        inputs=i_say,
+        history=history,
+        max_token_limit=8192
+    )
+    gpt_say = predict_no_ui_long_connection(
+        inputs=i_say,
+        llm_kwargs=llm_kwargs,
+        history=history,
+        sys_prompt="请从搜索结果中抽取信息，对最相关的两个搜索结果进行总结，然后回答问题。",
+        console_slience=False,
+    )
+    return gpt_say

@CatchException
 def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
--- a/crazy_functions/VideoResource_GPT.py
+++ b/crazy_functions/VideoResource_GPT.py
@@ -78,18 +78,36 @@ def download_video(bvid, user_name, chatbot, history):
    # return
    return downloaded_files + downloaded_files_part2

+
+class Strategy(BaseModel):
+    thought: str = Field(description="analysis of the user's wish, for example, can you recall the name of the resource?")
+    which_methods: str = Field(description="Which method to use to find the necessary information? choose from 'method_1' and 'method_2'.")
+    method_1_search_keywords: str = Field(description="Generate keywords to search the internet if you choose method 1, otherwise empty.")
+    method_2_generate_keywords: str = Field(description="Generate keywords for video download engine if you choose method 2, otherwise empty.")
+
+
@CatchException
 def 多媒体任务(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
    user_wish: str = txt
+    # query demos: 
+    #   - "我想找一首歌，里面有句歌词是“turn your face towards the sun”"
+    #   - "一首歌，第一句是红豆生南国"
+    #   - "一首音乐，中国航天任务专用的那首"
+    #   - "戴森球计划在熔岩星球的音乐"
+    #   - "hanser的百变什么精"
+    #   - "打大圣残躯时的bgm"
+    #   - "渊下宫战斗音乐"

    # 搜索
    chatbot.append((txt, "检索中, 请稍等..."))
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
    # 结构化生成
    rf_req = dedent(f"""
    The user wish to get the following resource:
        {user_wish}
-    Generate reseach keywords (less than 5 keywords) accordingly.
+    You task is to help the user to search it on google.
+    Generate search keywords (less than 7 keywords).
    """)
    gpt_json_io = GptJsonIO(Query)
    inputs = rf_req + gpt_json_io.format_instructions
@@ -97,12 +115,39 @@ def 多媒体任务(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
    analyze_res = run_gpt_fn(inputs, "")
    logger.info(analyze_res)
    query: Query = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
+    internet_search_keyword = query.search_keyword
+
+    yield from update_ui_lastest_msg(lastmsg=f"发起互联网检索: {internet_search_keyword} ...", chatbot=chatbot, history=[], delay=1)
+    from crazy_functions.Internet_GPT import internet_search_with_analysis_prompt
+    result = yield from internet_search_with_analysis_prompt(
+        prompt=internet_search_keyword,
+        analysis_prompt="请根据搜索结果分析，获取用户需要找的资源的名称、作者、出处等信息。",
+        llm_kwargs=llm_kwargs,
+        chatbot=chatbot
+    )
+
+    yield from update_ui_lastest_msg(lastmsg=f"互联网检索结论: {result} \n\n 正在生成进一步检索方案 ...", chatbot=chatbot, history=[], delay=1)
+    rf_req = dedent(f"""
+    The user wish to get the following resource:
+        {user_wish}
+    Meanwhile, you can access another expert's opinion on the user's wish:
+        {result}
+    Generate search keywords (less than 5 keywords) for video download engine accordingly.
+    """)
+    gpt_json_io = GptJsonIO(Query)
+    inputs = rf_req + gpt_json_io.format_instructions
+    run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
+    analyze_res = run_gpt_fn(inputs, "")
+    logger.info(analyze_res)
+    query: Query = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
+    video_engine_keywords = query.search_keyword
+
    # 关键词展示
-    chatbot.append((None, f"检索关键词已确认: {query.search_keyword}。筛选中, 请稍等..."))
+    chatbot.append((None, f"检索关键词已确认: {video_engine_keywords}。筛选中, 请稍等..."))
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

    # 获取候选资源
-    candadate_dictionary: dict =  get_video_resource(query.search_keyword)
+    candadate_dictionary: dict =  get_video_resource(video_engine_keywords)
    candadate_dictionary_as_str = json.dumps(candadate_dictionary, ensure_ascii=False, indent=4)

    # 展示候选资源
@@ -145,3 +190,11 @@ def 多媒体任务(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
    if video_resource and video_resource.bvid:
        logger.info(video_resource)
        yield from download_video(video_resource.bvid, chatbot.get_user(), chatbot, history)
+
+
+
+
+        
+@CatchException
+def debug(bvid, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
+    yield from download_video(bvid, chatbot.get_user(), chatbot, history)
--- a/crazy_functions/media_fns/get_media.py
+++ b/crazy_functions/media_fns/get_media.py
@@ -1,13 +1,15 @@
 from toolbox import update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
 from shared_utils.docker_as_service_api import stream_daas
 from shared_utils.docker_as_service_api import DockerServiceApiComModel
+import random

 def download_video(video_id, only_audio, user_name, chatbot, history):
    from toolbox import get_log_folder
    chatbot.append([None, "Processing..."])
    yield from update_ui(chatbot, history)
    client_command = f'{video_id} --audio-only' if only_audio else video_id
-    server_url = get_conf('DAAS_SERVER_URL')
+    server_urls = get_conf('DAAS_SERVER_URLS')
+    server_url = random.choice(server_urls)
    docker_service_api_com_model = DockerServiceApiComModel(client_command=client_command)
    save_file_dir = get_log_folder(user_name, plugin_name='media_downloader')
    for output_manifest in stream_daas(docker_service_api_com_model, server_url, save_file_dir):
@@ -31,7 +33,9 @@ def download_video(video_id, only_audio, user_name, chatbot, history):
 def search_videos(keywords):
    from toolbox import get_log_folder
    client_command = keywords
-    server_url = get_conf('DAAS_SERVER_URL').replace('stream', 'search')
+    server_urls = get_conf('DAAS_SERVER_URLS')
+    server_url = random.choice(server_urls)
+    server_url = server_url.replace('stream', 'search')
    docker_service_api_com_model = DockerServiceApiComModel(client_command=client_command)
    save_file_dir = get_log_folder("default_user", plugin_name='media_downloader')
    for output_manifest in stream_daas(docker_service_api_com_model, server_url, save_file_dir):
--- a/tests/test_media.py
+++ b/tests/test_media.py
@@ -0,0 +1,67 @@
+"""
+对项目中的各个插件进行测试。运行方法：直接运行 python tests/test_plugins.py
+"""
+
+import init_test
+import os, sys
+
+
+if __name__ == "__main__":
+    from test_utils import plugin_test
+
+    plugin_test(plugin='crazy_functions.VideoResource_GPT->多媒体任务', main_input="我想找一首歌，里面有句歌词是“turn your face towards the sun”")
+
+    # plugin_test(plugin='crazy_functions.Internet_GPT->连接网络回答问题', main_input="谁是应急食品？")
+
+    # plugin_test(plugin='crazy_functions.函数动态生成->函数动态生成', main_input='交换图像的蓝色通道和红色通道', advanced_arg={"file_path_arg": "./build/ants.jpg"})
+
+    # plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2307.07522")
+
+    # plugin_test(plugin='crazy_functions.PDF_Translate->批量翻译PDF文档', main_input='build/pdf/t1.pdf')
+
+    # plugin_test(
+    #     plugin="crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF",
+    #     main_input="G:/SEAFILE_LOCAL/50503047/我的资料库/学位/paperlatex/aaai/Fu_8368_with_appendix",
+    # )
+
+    # plugin_test(plugin='crazy_functions.虚空终端->虚空终端', main_input='修改api-key为sk-jhoejriotherjep')
+
+    # plugin_test(plugin='crazy_functions.批量翻译PDF文档_NOUGAT->批量翻译PDF文档', main_input='crazy_functions/test_project/pdf_and_word/aaai.pdf')
+
+    # plugin_test(plugin='crazy_functions.虚空终端->虚空终端', main_input='调用插件，对C:/Users/fuqingxu/Desktop/旧文件/gpt/chatgpt_academic/crazy_functions/latex_fns中的python文件进行解析')
+
+    # plugin_test(plugin='crazy_functions.命令行助手->命令行助手', main_input='查看当前的docker容器列表')
+
+    # plugin_test(plugin='crazy_functions.SourceCode_Analyse->解析一个Python项目', main_input="crazy_functions/test_project/python/dqn")
+
+    # plugin_test(plugin='crazy_functions.SourceCode_Analyse->解析一个C项目', main_input="crazy_functions/test_project/cpp/cppipc")
+
+    # plugin_test(plugin='crazy_functions.Latex_Project_Polish->Latex英文润色', main_input="crazy_functions/test_project/latex/attention")
+
+    # plugin_test(plugin='crazy_functions.Markdown_Translate->Markdown中译英', main_input="README.md")
+
+    # plugin_test(plugin='crazy_functions.PDF_Translate->批量翻译PDF文档', main_input='crazy_functions/test_project/pdf_and_word/aaai.pdf')
+
+    # plugin_test(plugin='crazy_functions.谷歌检索小助手->谷歌检索小助手', main_input="https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG=")
+
+    # plugin_test(plugin='crazy_functions.总结word文档->总结word文档', main_input="crazy_functions/test_project/pdf_and_word")
+
+    # plugin_test(plugin='crazy_functions.下载arxiv论文翻译摘要->下载arxiv论文并翻译摘要', main_input="1812.10695")
+
+    # plugin_test(plugin='crazy_functions.联网的ChatGPT->连接网络回答问题', main_input="谁是应急食品？")
+
+    # plugin_test(plugin='crazy_functions.解析JupyterNotebook->解析ipynb文件', main_input="crazy_functions/test_samples")
+
+    # plugin_test(plugin='crazy_functions.数学动画生成manim->动画生成', main_input="A ball split into 2, and then split into 4, and finally split into 8.")
+
+    # for lang in ["English", "French", "Japanese", "Korean", "Russian", "Italian", "German", "Portuguese", "Arabic"]:
+    #     plugin_test(plugin='crazy_functions.Markdown_Translate->Markdown翻译指定语言', main_input="README.md", advanced_arg={"advanced_arg": lang})
+
+    # plugin_test(plugin='crazy_functions.知识库文件注入->知识库文件注入', main_input="./")
+
+    # plugin_test(plugin='crazy_functions.知识库文件注入->读取知识库作答', main_input="What is the installation method？")
+
+    # plugin_test(plugin='crazy_functions.知识库文件注入->读取知识库作答', main_input="远程云服务器部署？")
+
+    # plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2210.03629")
+