update by pull

2025-12-06 22:46:48 +00:00 · 2023-04-19 18:26:48 +08:00
--- a/crazy_functions/Latex全文润色.py
+++ b/crazy_functions/Latex全文润色.py
@@ -11,9 +11,8 @@ class PaperFileGroup():
        self.sp_file_tag = []

        # count_token
-        import tiktoken
-        from toolbox import get_conf
-        enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
+        from request_llm.bridge_all import model_info
+        enc = model_info["gpt-3.5-turbo"]['tokenizer']
        def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
        self.get_token_num = get_token_num

--- a/crazy_functions/Latex全文翻译.py
+++ b/crazy_functions/Latex全文翻译.py
@@ -11,9 +11,8 @@ class PaperFileGroup():
        self.sp_file_tag = []

        # count_token
-        import tiktoken
-        from toolbox import get_conf
-        enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
+        from request_llm.bridge_all import model_info
+        enc = model_info["gpt-3.5-turbo"]['tokenizer']
        def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
        self.get_token_num = get_token_num

--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -0,0 +1,92 @@
+"""
+这是什么？
+    这个文件用于函数插件的单元测试
+    运行方法 python crazy_functions/crazy_functions_test.py
+"""
+
+def validate_path():
+    import os, sys
+    dir_name = os.path.dirname(__file__)
+    root_dir_assume = os.path.abspath(os.path.dirname(__file__) +  '/..')
+    os.chdir(root_dir_assume)
+    sys.path.append(root_dir_assume)
+    
+validate_path() # validate path so you can run from base directory
+
+from toolbox import get_conf, ChatBotWithCookies
+proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
+    get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
+
+llm_kwargs = {
+    'api_key': API_KEY,
+    'llm_model': LLM_MODEL,
+    'top_p':1.0, 
+    'max_length': None,
+    'temperature':1.0,
+}
+plugin_kwargs = { }
+chatbot = ChatBotWithCookies(llm_kwargs)
+history = []
+system_prompt = "Serve me as a writing and programming assistant."
+web_port = 1024
+
+
+def test_解析一个Python项目():
+    from crazy_functions.解析项目源代码 import 解析一个Python项目
+    txt = "crazy_functions/test_project/python/dqn"
+    for cookies, cb, hist, msg in 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        print(cb)
+
+def test_解析一个Cpp项目():
+    from crazy_functions.解析项目源代码 import 解析一个C项目
+    txt = "crazy_functions/test_project/cpp/cppipc"
+    for cookies, cb, hist, msg in 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        print(cb)
+
+def test_Latex英文润色():
+    from crazy_functions.Latex全文润色 import Latex英文润色
+    txt = "crazy_functions/test_project/latex/attention"
+    for cookies, cb, hist, msg in Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        print(cb)
+
+def test_Markdown中译英():
+    from crazy_functions.批量Markdown翻译 import Markdown中译英
+    txt = "README.md"
+    for cookies, cb, hist, msg in Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        print(cb)
+
+def test_批量翻译PDF文档():
+    from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
+    txt = "crazy_functions/test_project/pdf_and_word"
+    for cookies, cb, hist, msg in 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        print(cb)
+
+def test_谷歌检索小助手():
+    from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
+    txt = "https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG="
+    for cookies, cb, hist, msg in 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        print(cb)
+
+def test_总结word文档():
+    from crazy_functions.总结word文档 import 总结word文档
+    txt = "crazy_functions/test_project/pdf_and_word"
+    for cookies, cb, hist, msg in 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        print(cb)
+
+def test_下载arxiv论文并翻译摘要():
+    from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
+    txt = "1812.10695"
+    for cookies, cb, hist, msg in 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        print(cb)
+
+test_解析一个Python项目()
+test_Latex英文润色()
+test_Markdown中译英()
+test_批量翻译PDF文档()
+test_谷歌检索小助手()
+test_总结word文档()
+test_下载arxiv论文并翻译摘要()
+test_解析一个Cpp项目()
+
+input("程序完成，回车退出。")
+print("退出。")
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -2,9 +2,9 @@ import traceback
 from toolbox import update_ui, get_conf

 def input_clipping(inputs, history, max_token_limit):
-    import tiktoken
    import numpy as np
-    enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
+    from request_llm.bridge_all import model_info
+    enc = model_info["gpt-3.5-turbo"]['tokenizer']
    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))

    mode = 'input-and-history'
@@ -61,12 +61,12 @@ def request_gpt_model_in_new_thread_with_ui_alive(
    """
    import time
    from concurrent.futures import ThreadPoolExecutor
-    from request_llm.bridge_chatgpt import predict_no_ui_long_connection
+    from request_llm.bridge_all import predict_no_ui_long_connection
    # 用户反馈
    chatbot.append([inputs_show_user, ""])
    yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
    executor = ThreadPoolExecutor(max_workers=16)
-    mutable = ["", time.time()]
+    mutable = ["", time.time(), ""]
    def _req_gpt(inputs, history, sys_prompt):
        retry_op = retry_times_at_unknown_error
        exceeded_cnt = 0
@@ -105,7 +105,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
                if retry_op > 0:
                    retry_op -= 1
                    mutable[0] += f"[Local Message] 重试中，请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}：\n\n"
-                    if "Rate limit reached" in tb_str:
+                    if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
                        time.sleep(30)
                    time.sleep(5)
                    continue # 返回重试
@@ -167,13 +167,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
    """
    import time, random
    from concurrent.futures import ThreadPoolExecutor
-    from request_llm.bridge_chatgpt import predict_no_ui_long_connection
+    from request_llm.bridge_all import predict_no_ui_long_connection
    assert len(inputs_array) == len(history_array)
    assert len(inputs_array) == len(sys_prompt_array)
    if max_workers == -1: # 读取配置文件
        try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
        except: max_workers = 8
        if max_workers <= 0 or max_workers >= 20: max_workers = 8
+    # 屏蔽掉 chatglm的多线程，可能会导致严重卡顿
+    if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
+        max_workers = 1
+        
    executor = ThreadPoolExecutor(max_workers=max_workers)
    n_frag = len(inputs_array)
    # 用户反馈
@@ -230,9 +234,9 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
                if retry_op > 0: 
                    retry_op -= 1
                    wait = random.randint(5, 20)
-                    if "Rate limit reached" in tb_str: 
+                    if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
                        wait = wait * 3
-                        fail_info = "OpenAI请求速率限制 "
+                        fail_info = "OpenAI绑定信用卡可解除频率限制 "
                    else:
                        fail_info = ""
                    # 也许等待十几秒后，情况会好转
--- a/crazy_functions/代码重写为全英文_多线程.py
+++ b/crazy_functions/代码重写为全英文_多线程.py
@@ -1,5 +1,5 @@
 import threading
-from request_llm.bridge_chatgpt import predict_no_ui_long_connection
+from request_llm.bridge_all import predict_no_ui_long_connection
 from toolbox import update_ui
 from toolbox import CatchException, write_results_to_file, report_execption
 from .crazy_utils import breakdown_txt_to_satisfy_token_limit
@@ -59,9 +59,8 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_

    # 第5步：Token限制下的截断与处理
    MAX_TOKEN = 3000
-    import tiktoken
-    from toolbox import get_conf
-    enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
+    from request_llm.bridge_all import model_info
+    enc = model_info["gpt-3.5-turbo"]['tokenizer']
    def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))


--- a/crazy_functions/总结word文档.py
+++ b/crazy_functions/总结word文档.py
@@ -8,8 +8,6 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
    import time, os
    # pip install python-docx 用于docx格式，跨平台
    # pip install pywin32 用于doc格式，仅支持Win平台
-
-    print('begin analysis on:', file_manifest)
    for index, fp in enumerate(file_manifest):
        if fp.split(".")[-1] == "docx":
            from docx import Document
@@ -29,18 +27,20 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
            word.Quit()

        print(file_content)
-
-        prefix = "接下来请你逐文件分析下面的论文文件，" if index == 0 else ""
        # private_upload里面的文件名在解压zip后容易出现乱码（rar和7z格式正常），故可以只分析文章内容，不输入文件名
-        i_say = prefix + f'请对下面的文章片段用中英文做概述，文件名是{os.path.relpath(fp, project_folder)},' \
-                         f'文章内容是 ```{file_content}```'
-        i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 假设你是论文审稿专家，请对下面的文章片段做概述: {os.path.abspath(fp)}'
-        chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
-        if not fast_debug:
-            msg = '正常'
-            # ** gpt request **
+        from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
+        from request_llm.bridge_all import model_info
+        max_token = model_info[llm_kwargs['llm_model']]['max_token']
+        TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4
+        paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
+            txt=file_content,  
+            get_token_fn=model_info[llm_kwargs['llm_model']]['token_cnt'], 
+            limit=TOKEN_LIMIT_PER_FRAGMENT
+        )
+        this_paper_history = []
+        for i, paper_frag in enumerate(paper_fragments):
+            i_say = f'请对下面的文章片段用中文做概述，文件名是{os.path.relpath(fp, project_folder)}，文章内容是 ```{paper_frag}```'
+            i_say_show_user = f'请对下面的文章片段做概述: {os.path.abspath(fp)}的第{i+1}/{len(paper_fragments)}个片段。'
            gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
                inputs=i_say, 
                inputs_show_user=i_say_show_user, 
@@ -48,46 +48,34 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
                chatbot=chatbot, 
                history=[],
                sys_prompt="总结文章。"
-            )  # 带超时倒计时
+            )
+
            chatbot[-1] = (i_say_show_user, gpt_say)
-            history.append(i_say_show_user)
-            history.append(gpt_say)
-            yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
-            if not fast_debug: time.sleep(2)
+            history.extend([i_say_show_user,gpt_say])
+            this_paper_history.extend([i_say_show_user,gpt_say])

-    """
-    # 可按需启用
-    i_say = f'根据你上述的分析，对全文进行概括，用学术性语言写一段中文摘要，然后再写一篇英文的。'
-    chatbot.append((i_say, "[Local Message] waiting gpt response."))
-    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        # 已经对该文章的所有片段总结完毕，如果文章被切分了，
+        if len(paper_fragments) > 1:
+            i_say = f"根据以上的对话，总结文章{os.path.abspath(fp)}的主要内容。"
+            gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+                inputs=i_say, 
+                inputs_show_user=i_say, 
+                llm_kwargs=llm_kwargs,
+                chatbot=chatbot, 
+                history=this_paper_history,
+                sys_prompt="总结文章。"
+            )

+            history.extend([i_say,gpt_say])
+            this_paper_history.extend([i_say,gpt_say])

-    i_say = f'我想让你做一个论文写作导师。您的任务是使用人工智能工具（例如自然语言处理）提供有关如何改进其上述文章的反馈。' \
-            f'您还应该利用您在有效写作技巧方面的修辞知识和经验来建议作者可以更好地以书面形式表达他们的想法和想法的方法。' \
-            f'根据你之前的分析，提出建议'
-    chatbot.append((i_say, "[Local Message] waiting gpt response."))
-    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-  
-    """
-
-    if not fast_debug:
-        msg = '正常'
-        # ** gpt request **
-        gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
-            inputs=i_say, 
-            inputs_show_user=i_say, 
-            llm_kwargs=llm_kwargs,
-            chatbot=chatbot, 
-            history=history,
-            sys_prompt="总结文章。"
-        )  # 带超时倒计时
-        chatbot[-1] = (i_say, gpt_say)
-        history.append(i_say)
-        history.append(gpt_say)
-        yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
        res = write_results_to_file(history)
        chatbot.append(("完成了吗？", res))
-        yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    res = write_results_to_file(history)
+    chatbot.append(("所有文件都总结完成了吗？", res))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面


@CatchException
@@ -123,11 +111,11 @@ def 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr
        return

    # 搜索需要处理的文件清单
-    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.docx', recursive=True)] + \
-                    [f for f in glob.glob(f'{project_folder}/**/*.doc', recursive=True)]
-    # [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
-    # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
-    # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
+    if txt.endswith('.docx') or txt.endswith('.doc'):
+        file_manifest = [txt]
+    else:
+        file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.docx', recursive=True)] + \
+                        [f for f in glob.glob(f'{project_folder}/**/*.doc', recursive=True)]

    # 如果没找到任何文件
    if len(file_manifest) == 0:
--- a/crazy_functions/批量Markdown翻译.py
+++ b/crazy_functions/批量Markdown翻译.py
@@ -11,9 +11,8 @@ class PaperFileGroup():
        self.sp_file_tag = []

        # count_token
-        import tiktoken
-        from toolbox import get_conf
-        enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
+        from request_llm.bridge_all import model_info
+        enc = model_info["gpt-3.5-turbo"]['tokenizer']
        def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
        self.get_token_num = get_token_num

@@ -51,7 +50,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
            pfg.file_contents.append(file_content)

    #  <-------- 拆分过长的Markdown文件 ----------> 
-    pfg.run_file_split(max_token_limit=2048)
+    pfg.run_file_split(max_token_limit=1500)
    n_split = len(pfg.sp_file_contents)

    #  <-------- 多线程润色开始 ----------> 
--- a/crazy_functions/批量翻译PDF文档_多线程.py
+++ b/crazy_functions/批量翻译PDF文档_多线程.py
@@ -68,8 +68,8 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,

        # 递归地切割PDF文件
        from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
-        from toolbox import get_conf
-        enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
+        from request_llm.bridge_all import model_info
+        enc = model_info["gpt-3.5-turbo"]['tokenizer']
        def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
        paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
            txt=file_content,  get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
--- a/crazy_functions/理解PDF文档内容.py
+++ b/crazy_functions/理解PDF文档内容.py
@@ -17,8 +17,8 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
    TOKEN_LIMIT_PER_FRAGMENT = 2500

    from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
-    from toolbox import get_conf
-    enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
+    from request_llm.bridge_all import model_info
+    enc = model_info["gpt-3.5-turbo"]['tokenizer']
    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
    paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
        txt=file_content,  get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
--- a/crazy_functions/解析项目源代码.py
+++ b/crazy_functions/解析项目源代码.py
@@ -12,7 +12,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
    sys_prompt_array = []
    report_part_1 = []

-    assert len(file_manifest) <= 1024, "源文件太多（超过1024个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
+    assert len(file_manifest) <= 512, "源文件太多（超过512个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
    ############################## <第一步，逐个文件分析，多线程> ##################################
    for index, fp in enumerate(file_manifest):
        # 读取文件
--- a/crazy_functions/询问多个大语言模型.py
+++ b/crazy_functions/询问多个大语言模型.py
@@ -0,0 +1,30 @@
+from toolbox import CatchException, update_ui
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+import datetime
+@CatchException
+def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，如温度和top_p等，一般原样传递下去就行
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+
+    # llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口，用&符号分隔
+    llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo' # 支持任意数量的llm接口，用&符号分隔
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=txt, inputs_show_user=txt, 
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, 
+        sys_prompt=system_prompt,
+        retry_times_at_unknown_error=0
+    )
+
+    history.append(txt)
+    history.append(gpt_say)
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新