Merge branch 'master' into huggingface

2025-12-07 06:56:48 +00:00 · 2023-04-29 03:53:32 +08:00
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -1,5 +1,4 @@
-import traceback
-from toolbox import update_ui, get_conf
+from toolbox import update_ui, get_conf, trimmed_format_exc

 def input_clipping(inputs, history, max_token_limit):
    import numpy as np
@@ -94,12 +93,12 @@ def request_gpt_model_in_new_thread_with_ui_alive(
                    continue # 返回重试
                else:
                    # 【选择放弃】
-                    tb_str = '```\n' + traceback.format_exc() + '```'
+                    tb_str = '```\n' + trimmed_format_exc() + '```'
                    mutable[0] += f"[Local Message] 警告，在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
                    return mutable[0] # 放弃
            except:
                # 【第三种情况】：其他错误：重试几次
-                tb_str = '```\n' + traceback.format_exc() + '```'
+                tb_str = '```\n' + trimmed_format_exc() + '```'
                print(tb_str)
                mutable[0] += f"[Local Message] 警告，在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
                if retry_op > 0:
@@ -173,7 +172,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
    if max_workers == -1: # 读取配置文件
        try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
        except: max_workers = 8
-        if max_workers <= 0 or max_workers >= 20: max_workers = 8
+        if max_workers <= 0: max_workers = 3
    # 屏蔽掉 chatglm的多线程，可能会导致严重卡顿
    if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
        max_workers = 1
@@ -220,14 +219,14 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
                    continue # 返回重试
                else:
                    # 【选择放弃】
-                    tb_str = '```\n' + traceback.format_exc() + '```'
+                    tb_str = '```\n' + trimmed_format_exc() + '```'
                    gpt_say += f"[Local Message] 警告，线程{index}在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
                    if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答：\n\n" + mutable[index][0]
                    mutable[index][2] = "输入过长已放弃"
                    return gpt_say # 放弃
            except:
                # 【第三种情况】：其他错误
-                tb_str = '```\n' + traceback.format_exc() + '```'
+                tb_str = '```\n' + trimmed_format_exc() + '```'
                print(tb_str)
                gpt_say += f"[Local Message] 警告，线程{index}在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
                if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答：\n\n" + mutable[index][0]
@@ -564,3 +563,46 @@ def read_and_clean_pdf_text(fp):
        #    print亮绿('***************************')

    return meta_txt, page_one_meta
+
+
+def get_files_from_everything(txt, type): # type='.md'
+    """
+    这个函数是用来获取指定目录下所有指定类型（如.md）的文件，并且对于网络上的文件，也可以获取它。
+    下面是对每个参数和返回值的说明：
+    参数 
+    - txt: 路径或网址，表示要搜索的文件或者文件夹路径或网络上的文件。 
+    - type: 字符串，表示要搜索的文件类型。默认是.md。
+    返回值 
+    - success: 布尔值，表示函数是否成功执行。 
+    - file_manifest: 文件路径列表，里面包含以指定类型为后缀名的所有文件的绝对路径。 
+    - project_folder: 字符串，表示文件所在的文件夹路径。如果是网络上的文件，就是临时文件夹的路径。
+    该函数详细注释已添加，请确认是否满足您的需要。
+    """
+    import glob, os
+
+    success = True
+    if txt.startswith('http'):
+        # 网络的远程文件
+        import requests
+        from toolbox import get_conf
+        proxies, = get_conf('proxies')
+        r = requests.get(txt, proxies=proxies)
+        with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content)
+        project_folder = './gpt_log/'
+        file_manifest = ['./gpt_log/temp'+type]
+    elif txt.endswith(type):
+        # 直接给定文件
+        file_manifest = [txt]
+        project_folder = os.path.dirname(txt)
+    elif os.path.exists(txt):
+        # 本地路径，递归搜索
+        project_folder = txt
+        file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
+        if len(file_manifest) == 0:
+            success = False
+    else:
+        project_folder = None
+        file_manifest = []
+        success = False
+
+    return success, file_manifest, project_folder
--- a/crazy_functions/对话历史存档.py
+++ b/crazy_functions/对话历史存档.py
@@ -1,7 +1,8 @@
 from toolbox import CatchException, update_ui
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+import re

-def write_chat_to_file(chatbot, file_name=None):
+def write_chat_to_file(chatbot, history=None, file_name=None):
    """
    将对话记录history以Markdown格式写入文件中。如果没有指定文件名，则使用当前时间生成文件名。
    """
@@ -11,20 +12,62 @@ def write_chat_to_file(chatbot, file_name=None):
        file_name = 'chatGPT对话历史' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
    os.makedirs('./gpt_log/', exist_ok=True)
    with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
+        from theme import advanced_css
+        f.write(f'<head><title>对话历史</title><style>{advanced_css}</style></head>')
        for i, contents in enumerate(chatbot):
-            for content in contents:
+            for j, content in enumerate(contents):
                try:    # 这个bug没找到触发条件，暂时先这样顶一下
                    if type(content) != str: content = str(content)
                except:
                    continue
                f.write(content)
-                f.write('\n\n')
+                if j == 0:
+                    f.write('<hr style="border-top: dotted 3px #ccc;">')
            f.write('<hr color="red"> \n\n')
-
+        f.write('<hr color="blue"> \n\n raw chat context:\n')
+        f.write('<code>')
+        for h in history:
+            f.write("\n>>>" + h)
+        f.write('</code>')
    res = '对话历史写入：' + os.path.abspath(f'./gpt_log/{file_name}')
    print(res)
    return res

+def gen_file_preview(file_name):
+    try:
+        with open(file_name, 'r', encoding='utf8') as f:
+            file_content = f.read()
+        # pattern to match the text between <head> and </head>
+        pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
+        file_content = re.sub(pattern, '', file_content)
+        html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
+        history = history.strip('<code>')
+        history = history.strip('</code>')
+        history = history.split("\n>>>")
+        return list(filter(lambda x:x!="", history))[0][:100]
+    except:
+        return ""
+
+def read_file_to_chat(chatbot, history, file_name):
+    with open(file_name, 'r', encoding='utf8') as f:
+        file_content = f.read()
+    # pattern to match the text between <head> and </head>
+    pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
+    file_content = re.sub(pattern, '', file_content)
+    html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
+    history = history.strip('<code>')
+    history = history.strip('</code>')
+    history = history.split("\n>>>")
+    history = list(filter(lambda x:x!="", history))
+    html = html.split('<hr color="red"> \n\n')
+    html = list(filter(lambda x:x!="", html))
+    chatbot.clear()
+    for i, h in enumerate(html):
+        i_say, gpt_say = h.split('<hr style="border-top: dotted 3px #ccc;">')
+        chatbot.append([i_say, gpt_say])
+    chatbot.append([f"存档文件详情？", f"[Local Message] 载入对话{len(html)}条，上下文{len(history)}条。"])
+    return chatbot, history    
+
@CatchException
 def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
    """
@@ -37,6 +80,64 @@ def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
    web_port        当前软件运行的端口号
    """

-    chatbot.append(("保存当前对话", f"[Local Message] {write_chat_to_file(chatbot)}"))
+    chatbot.append(("保存当前对话", 
+        f"[Local Message] {write_chat_to_file(chatbot, history)}，您可以调用“载入对话历史存档”还原当下的对话。\n警告！被保存的对话历史可以被使用该系统的任何人查阅。"))
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新

+def hide_cwd(str):
+    import os
+    current_path = os.getcwd()
+    replace_path = "."
+    return str.replace(current_path, replace_path)
+
+@CatchException
+def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    from .crazy_utils import get_files_from_everything
+    success, file_manifest, _ = get_files_from_everything(txt, type='.html')
+
+    if not success:
+        if txt == "": txt = '空空如也的输入栏'
+        import glob
+        local_history = "<br/>".join(["`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`" for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True)])
+        chatbot.append([f"正在查找对话历史文件（html格式）: {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件，您可以将任意一个文件路径粘贴到输入区，然后重试：<br/>{local_history}"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+
+    try:
+        chatbot, history = read_file_to_chat(chatbot, history, file_manifest[0])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    except:
+        chatbot.append([f"载入对话历史文件", f"对话历史文件损坏！"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+
+@CatchException
+def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+
+    import glob, os
+    local_history = "<br/>".join(["`"+hide_cwd(f)+"`" for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True)])
+    for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True):
+        os.remove(f)
+    chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    return
+
+
--- a/crazy_functions/批量Markdown翻译.py
+++ b/crazy_functions/批量Markdown翻译.py
@@ -84,7 +84,33 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面


+def get_files_from_everything(txt):
+    import glob, os

+    success = True
+    if txt.startswith('http'):
+        # 网络的远程文件
+        txt = txt.replace("https://github.com/", "https://raw.githubusercontent.com/")
+        txt = txt.replace("/blob/", "/")
+        import requests
+        from toolbox import get_conf
+        proxies, = get_conf('proxies')
+        r = requests.get(txt, proxies=proxies)
+        with open('./gpt_log/temp.md', 'wb+') as f: f.write(r.content)
+        project_folder = './gpt_log/'
+        file_manifest = ['./gpt_log/temp.md']
+    elif txt.endswith('.md'):
+        # 直接给定文件
+        file_manifest = [txt]
+        project_folder = os.path.dirname(txt)
+    elif os.path.exists(txt):
+        # 本地路径，递归搜索
+        project_folder = txt
+        file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
+    else:
+        success = False
+
+    return success, file_manifest, project_folder


@CatchException
@@ -98,6 +124,7 @@ def Markdown英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
    # 尝试导入依赖，如果缺少依赖，则给出安装建议
    try:
        import tiktoken
+        import glob, os
    except:
        report_execption(chatbot, history,
                         a=f"解析项目: {txt}",
@@ -105,19 +132,21 @@ def Markdown英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
    history = []    # 清空历史，以免输入溢出
-    import glob, os
-    if os.path.exists(txt):
-        project_folder = txt
-    else:
+
+    success, file_manifest, project_folder = get_files_from_everything(txt)
+
+    if not success:
+        # 什么都没有
        if txt == "": txt = '空空如也的输入栏'
        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
-    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
+
    if len(file_manifest) == 0:
        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
+
    yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')


@@ -135,6 +164,7 @@ def Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
    # 尝试导入依赖，如果缺少依赖，则给出安装建议
    try:
        import tiktoken
+        import glob, os
    except:
        report_execption(chatbot, history,
                         a=f"解析项目: {txt}",
@@ -142,18 +172,13 @@ def Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
    history = []    # 清空历史，以免输入溢出
-    import glob, os
-    if os.path.exists(txt):
-        project_folder = txt
-    else:
+    success, file_manifest, project_folder = get_files_from_everything(txt)
+    if not success:
+        # 什么都没有
        if txt == "": txt = '空空如也的输入栏'
        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
-    if txt.endswith('.md'):
-        file_manifest = [txt]
-    else:
-        file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
    if len(file_manifest) == 0:
        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
--- a/crazy_functions/解析项目源代码.py
+++ b/crazy_functions/解析项目源代码.py
@@ -1,5 +1,6 @@
 from toolbox import update_ui
 from toolbox import CatchException, report_execption, write_results_to_file
+from .crazy_utils import input_clipping

 def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
    import os, copy
@@ -61,13 +62,15 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
        previous_iteration_files.extend([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
        previous_iteration_files_string = ', '.join(previous_iteration_files)
        current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
-        i_say = f'根据以上分析，对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能（包括{previous_iteration_files_string}）。'
+        i_say = f'用一张Markdown表格简要描述以下文件的功能：{previous_iteration_files_string}。根据以上分析，用一句话概括程序的整体功能。'
        inputs_show_user = f'根据以上分析，对程序的整体功能和构架重新做出概括，由于输入长度限制，可能需要分组处理，本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
        this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
        this_iteration_history.append(last_iteration_result)
+        # 裁剪input
+        inputs, this_iteration_history_feed = input_clipping(inputs=i_say, history=this_iteration_history, max_token_limit=2560)
        result = yield from request_gpt_model_in_new_thread_with_ui_alive(
-            inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
-            history=this_iteration_history,   # 迭代之前的分析
+            inputs=inputs, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
+            history=this_iteration_history_feed,   # 迭代之前的分析
            sys_prompt="你是一个程序架构分析师，正在分析一个项目的源代码。")
        report_part_2.extend([i_say, result])
        last_iteration_result = result