media plugin update

2025-12-06 14:36:48 +00:00 · 2024-12-04 00:36:34 +08:00
--- a/crazy_functions/Internet_GPT.py
+++ b/crazy_functions/Internet_GPT.py
@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
 from functools import lru_cache
 from itertools import zip_longest
 from check_proxy import check_proxy
-from toolbox import CatchException, update_ui, get_conf
+from toolbox import CatchException, update_ui, get_conf, update_ui_lastest_msg
 from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
 from request_llms.bridge_all import model_info
 from request_llms.bridge_all import predict_no_ui_long_connection
@@ -193,6 +193,38 @@ def scrape_text(url, proxies) -> str:
    text = "\n".join(chunk for chunk in chunks if chunk)
    return text

+def internet_search_with_analysis_prompt(prompt, analysis_prompt, llm_kwargs, chatbot):
+    from toolbox import get_conf
+    proxies = get_conf('proxies')
+    categories = 'general'
+    searxng_url = None  # 使用默认的searxng_url
+    engines = None  # 使用默认的搜索引擎
+    yield from update_ui_lastest_msg(lastmsg=f"检索中: {prompt} ...", chatbot=chatbot, history=[], delay=1)
+    urls = searxng_request(prompt, proxies, categories, searxng_url, engines=engines)
+    yield from update_ui_lastest_msg(lastmsg=f"依次访问搜索到的网站 ...", chatbot=chatbot, history=[], delay=1)
+    if len(urls) == 0:
+        return None
+    max_search_result = 5   # 最多收纳多少个网页的结果
+    history = []
+    for index, url in enumerate(urls[:max_search_result]):
+        yield from update_ui_lastest_msg(lastmsg=f"依次访问搜索到的网站: {url['link']} ...", chatbot=chatbot, history=[], delay=1)
+        res = scrape_text(url['link'], proxies)
+        prefix = f"第{index}份搜索结果 [源自{url['source'][0]}搜索] （{url['title'][:25]}）："
+        history.extend([prefix, res])
+    i_say = f"从以上搜索结果中抽取信息，然后回答问题：{prompt} {analysis_prompt}"
+    i_say, history = input_clipping( # 裁剪输入，从最长的条目开始裁剪，防止爆token
+        inputs=i_say,
+        history=history,
+        max_token_limit=8192
+    )
+    gpt_say = predict_no_ui_long_connection(
+        inputs=i_say,
+        llm_kwargs=llm_kwargs,
+        history=history,
+        sys_prompt="请从搜索结果中抽取信息，对最相关的两个搜索结果进行总结，然后回答问题。",
+        console_slience=False,
+    )
+    return gpt_say

@CatchException
 def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
--- a/crazy_functions/VideoResource_GPT.py
+++ b/crazy_functions/VideoResource_GPT.py
@@ -78,18 +78,36 @@ def download_video(bvid, user_name, chatbot, history):
    # return
    return downloaded_files + downloaded_files_part2

+
+class Strategy(BaseModel):
+    thought: str = Field(description="analysis of the user's wish, for example, can you recall the name of the resource?")
+    which_methods: str = Field(description="Which method to use to find the necessary information? choose from 'method_1' and 'method_2'.")
+    method_1_search_keywords: str = Field(description="Generate keywords to search the internet if you choose method 1, otherwise empty.")
+    method_2_generate_keywords: str = Field(description="Generate keywords for video download engine if you choose method 2, otherwise empty.")
+
+
@CatchException
 def 多媒体任务(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
    user_wish: str = txt
+    # query demos: 
+    #   - "我想找一首歌，里面有句歌词是“turn your face towards the sun”"
+    #   - "一首歌，第一句是红豆生南国"
+    #   - "一首音乐，中国航天任务专用的那首"
+    #   - "戴森球计划在熔岩星球的音乐"
+    #   - "hanser的百变什么精"
+    #   - "打大圣残躯时的bgm"
+    #   - "渊下宫战斗音乐"

    # 搜索
    chatbot.append((txt, "检索中, 请稍等..."))
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
    # 结构化生成
    rf_req = dedent(f"""
    The user wish to get the following resource:
        {user_wish}
-    Generate reseach keywords (less than 5 keywords) accordingly.
+    You task is to help the user to search it on google.
+    Generate search keywords (less than 7 keywords).
    """)
    gpt_json_io = GptJsonIO(Query)
    inputs = rf_req + gpt_json_io.format_instructions
@@ -97,12 +115,39 @@ def 多媒体任务(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
    analyze_res = run_gpt_fn(inputs, "")
    logger.info(analyze_res)
    query: Query = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
+    internet_search_keyword = query.search_keyword
+
+    yield from update_ui_lastest_msg(lastmsg=f"发起互联网检索: {internet_search_keyword} ...", chatbot=chatbot, history=[], delay=1)
+    from crazy_functions.Internet_GPT import internet_search_with_analysis_prompt
+    result = yield from internet_search_with_analysis_prompt(
+        prompt=internet_search_keyword,
+        analysis_prompt="请根据搜索结果分析，获取用户需要找的资源的名称、作者、出处等信息。",
+        llm_kwargs=llm_kwargs,
+        chatbot=chatbot
+    )
+
+    yield from update_ui_lastest_msg(lastmsg=f"互联网检索结论: {result} \n\n 正在生成进一步检索方案 ...", chatbot=chatbot, history=[], delay=1)
+    rf_req = dedent(f"""
+    The user wish to get the following resource:
+        {user_wish}
+    Meanwhile, you can access another expert's opinion on the user's wish:
+        {result}
+    Generate search keywords (less than 5 keywords) for video download engine accordingly.
+    """)
+    gpt_json_io = GptJsonIO(Query)
+    inputs = rf_req + gpt_json_io.format_instructions
+    run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
+    analyze_res = run_gpt_fn(inputs, "")
+    logger.info(analyze_res)
+    query: Query = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
+    video_engine_keywords = query.search_keyword
+
    # 关键词展示
-    chatbot.append((None, f"检索关键词已确认: {query.search_keyword}。筛选中, 请稍等..."))
+    chatbot.append((None, f"检索关键词已确认: {video_engine_keywords}。筛选中, 请稍等..."))
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

    # 获取候选资源
-    candadate_dictionary: dict =  get_video_resource(query.search_keyword)
+    candadate_dictionary: dict =  get_video_resource(video_engine_keywords)
    candadate_dictionary_as_str = json.dumps(candadate_dictionary, ensure_ascii=False, indent=4)

    # 展示候选资源
@@ -144,4 +189,12 @@ def 多媒体任务(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro

    if video_resource and video_resource.bvid:
        logger.info(video_resource)
-        yield from download_video(video_resource.bvid, chatbot.get_user(), chatbot, history)
+        yield from download_video(video_resource.bvid, chatbot.get_user(), chatbot, history)
+
+
+
+
+        
+@CatchException
+def debug(bvid, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
+    yield from download_video(bvid, chatbot.get_user(), chatbot, history)
--- a/crazy_functions/media_fns/get_media.py
+++ b/crazy_functions/media_fns/get_media.py
@@ -1,13 +1,15 @@
 from toolbox import update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
 from shared_utils.docker_as_service_api import stream_daas
 from shared_utils.docker_as_service_api import DockerServiceApiComModel
+import random

 def download_video(video_id, only_audio, user_name, chatbot, history):
    from toolbox import get_log_folder
    chatbot.append([None, "Processing..."])
    yield from update_ui(chatbot, history)
    client_command = f'{video_id} --audio-only' if only_audio else video_id
-    server_url = get_conf('DAAS_SERVER_URL')
+    server_urls = get_conf('DAAS_SERVER_URLS')
+    server_url = random.choice(server_urls)
    docker_service_api_com_model = DockerServiceApiComModel(client_command=client_command)
    save_file_dir = get_log_folder(user_name, plugin_name='media_downloader')
    for output_manifest in stream_daas(docker_service_api_com_model, server_url, save_file_dir):
@@ -31,7 +33,9 @@ def download_video(video_id, only_audio, user_name, chatbot, history):
 def search_videos(keywords):
    from toolbox import get_log_folder
    client_command = keywords
-    server_url = get_conf('DAAS_SERVER_URL').replace('stream', 'search')
+    server_urls = get_conf('DAAS_SERVER_URLS')
+    server_url = random.choice(server_urls)
+    server_url = server_url.replace('stream', 'search')
    docker_service_api_com_model = DockerServiceApiComModel(client_command=client_command)
    save_file_dir = get_log_folder("default_user", plugin_name='media_downloader')
    for output_manifest in stream_daas(docker_service_api_com_model, server_url, save_file_dir):