镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-07 06:56:48 +00:00
Merge branch 'master' into huggingface
这个提交包含在:
@@ -1,5 +1,4 @@
|
||||
import traceback
|
||||
from toolbox import update_ui, get_conf
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc
|
||||
|
||||
def input_clipping(inputs, history, max_token_limit):
|
||||
import numpy as np
|
||||
@@ -94,12 +93,12 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
||||
continue # 返回重试
|
||||
else:
|
||||
# 【选择放弃】
|
||||
tb_str = '```\n' + traceback.format_exc() + '```'
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
return mutable[0] # 放弃
|
||||
except:
|
||||
# 【第三种情况】:其他错误:重试几次
|
||||
tb_str = '```\n' + traceback.format_exc() + '```'
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
print(tb_str)
|
||||
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
if retry_op > 0:
|
||||
@@ -173,7 +172,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
if max_workers == -1: # 读取配置文件
|
||||
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
|
||||
except: max_workers = 8
|
||||
if max_workers <= 0 or max_workers >= 20: max_workers = 8
|
||||
if max_workers <= 0: max_workers = 3
|
||||
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
|
||||
if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
|
||||
max_workers = 1
|
||||
@@ -220,14 +219,14 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
continue # 返回重试
|
||||
else:
|
||||
# 【选择放弃】
|
||||
tb_str = '```\n' + traceback.format_exc() + '```'
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
||||
mutable[index][2] = "输入过长已放弃"
|
||||
return gpt_say # 放弃
|
||||
except:
|
||||
# 【第三种情况】:其他错误
|
||||
tb_str = '```\n' + traceback.format_exc() + '```'
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
print(tb_str)
|
||||
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
||||
@@ -564,3 +563,46 @@ def read_and_clean_pdf_text(fp):
|
||||
# print亮绿('***************************')
|
||||
|
||||
return meta_txt, page_one_meta
|
||||
|
||||
|
||||
def get_files_from_everything(txt, type): # type='.md'
|
||||
"""
|
||||
这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
|
||||
下面是对每个参数和返回值的说明:
|
||||
参数
|
||||
- txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
|
||||
- type: 字符串,表示要搜索的文件类型。默认是.md。
|
||||
返回值
|
||||
- success: 布尔值,表示函数是否成功执行。
|
||||
- file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
|
||||
- project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
|
||||
该函数详细注释已添加,请确认是否满足您的需要。
|
||||
"""
|
||||
import glob, os
|
||||
|
||||
success = True
|
||||
if txt.startswith('http'):
|
||||
# 网络的远程文件
|
||||
import requests
|
||||
from toolbox import get_conf
|
||||
proxies, = get_conf('proxies')
|
||||
r = requests.get(txt, proxies=proxies)
|
||||
with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content)
|
||||
project_folder = './gpt_log/'
|
||||
file_manifest = ['./gpt_log/temp'+type]
|
||||
elif txt.endswith(type):
|
||||
# 直接给定文件
|
||||
file_manifest = [txt]
|
||||
project_folder = os.path.dirname(txt)
|
||||
elif os.path.exists(txt):
|
||||
# 本地路径,递归搜索
|
||||
project_folder = txt
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
success = False
|
||||
else:
|
||||
project_folder = None
|
||||
file_manifest = []
|
||||
success = False
|
||||
|
||||
return success, file_manifest, project_folder
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from toolbox import CatchException, update_ui
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
import re
|
||||
|
||||
def write_chat_to_file(chatbot, file_name=None):
|
||||
def write_chat_to_file(chatbot, history=None, file_name=None):
|
||||
"""
|
||||
将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
|
||||
"""
|
||||
@@ -11,20 +12,62 @@ def write_chat_to_file(chatbot, file_name=None):
|
||||
file_name = 'chatGPT对话历史' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
|
||||
os.makedirs('./gpt_log/', exist_ok=True)
|
||||
with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
|
||||
from theme import advanced_css
|
||||
f.write(f'<head><title>对话历史</title><style>{advanced_css}</style></head>')
|
||||
for i, contents in enumerate(chatbot):
|
||||
for content in contents:
|
||||
for j, content in enumerate(contents):
|
||||
try: # 这个bug没找到触发条件,暂时先这样顶一下
|
||||
if type(content) != str: content = str(content)
|
||||
except:
|
||||
continue
|
||||
f.write(content)
|
||||
f.write('\n\n')
|
||||
if j == 0:
|
||||
f.write('<hr style="border-top: dotted 3px #ccc;">')
|
||||
f.write('<hr color="red"> \n\n')
|
||||
|
||||
f.write('<hr color="blue"> \n\n raw chat context:\n')
|
||||
f.write('<code>')
|
||||
for h in history:
|
||||
f.write("\n>>>" + h)
|
||||
f.write('</code>')
|
||||
res = '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}')
|
||||
print(res)
|
||||
return res
|
||||
|
||||
def gen_file_preview(file_name):
|
||||
try:
|
||||
with open(file_name, 'r', encoding='utf8') as f:
|
||||
file_content = f.read()
|
||||
# pattern to match the text between <head> and </head>
|
||||
pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
|
||||
file_content = re.sub(pattern, '', file_content)
|
||||
html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
|
||||
history = history.strip('<code>')
|
||||
history = history.strip('</code>')
|
||||
history = history.split("\n>>>")
|
||||
return list(filter(lambda x:x!="", history))[0][:100]
|
||||
except:
|
||||
return ""
|
||||
|
||||
def read_file_to_chat(chatbot, history, file_name):
|
||||
with open(file_name, 'r', encoding='utf8') as f:
|
||||
file_content = f.read()
|
||||
# pattern to match the text between <head> and </head>
|
||||
pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
|
||||
file_content = re.sub(pattern, '', file_content)
|
||||
html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
|
||||
history = history.strip('<code>')
|
||||
history = history.strip('</code>')
|
||||
history = history.split("\n>>>")
|
||||
history = list(filter(lambda x:x!="", history))
|
||||
html = html.split('<hr color="red"> \n\n')
|
||||
html = list(filter(lambda x:x!="", html))
|
||||
chatbot.clear()
|
||||
for i, h in enumerate(html):
|
||||
i_say, gpt_say = h.split('<hr style="border-top: dotted 3px #ccc;">')
|
||||
chatbot.append([i_say, gpt_say])
|
||||
chatbot.append([f"存档文件详情?", f"[Local Message] 载入对话{len(html)}条,上下文{len(history)}条。"])
|
||||
return chatbot, history
|
||||
|
||||
@CatchException
|
||||
def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
@@ -37,6 +80,64 @@ def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
|
||||
chatbot.append(("保存当前对话", f"[Local Message] {write_chat_to_file(chatbot)}"))
|
||||
chatbot.append(("保存当前对话",
|
||||
f"[Local Message] {write_chat_to_file(chatbot, history)},您可以调用“载入对话历史存档”还原当下的对话。\n警告!被保存的对话历史可以被使用该系统的任何人查阅。"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
|
||||
def hide_cwd(str):
|
||||
import os
|
||||
current_path = os.getcwd()
|
||||
replace_path = "."
|
||||
return str.replace(current_path, replace_path)
|
||||
|
||||
@CatchException
|
||||
def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
from .crazy_utils import get_files_from_everything
|
||||
success, file_manifest, _ = get_files_from_everything(txt, type='.html')
|
||||
|
||||
if not success:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
import glob
|
||||
local_history = "<br/>".join(["`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`" for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True)])
|
||||
chatbot.append([f"正在查找对话历史文件(html格式): {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
try:
|
||||
chatbot, history = read_file_to_chat(chatbot, history, file_manifest[0])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
except:
|
||||
chatbot.append([f"载入对话历史文件", f"对话历史文件损坏!"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
@CatchException
|
||||
def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
|
||||
import glob, os
|
||||
local_history = "<br/>".join(["`"+hide_cwd(f)+"`" for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True)])
|
||||
for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True):
|
||||
os.remove(f)
|
||||
chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
|
||||
|
||||
@@ -84,7 +84,33 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
def get_files_from_everything(txt):
|
||||
import glob, os
|
||||
|
||||
success = True
|
||||
if txt.startswith('http'):
|
||||
# 网络的远程文件
|
||||
txt = txt.replace("https://github.com/", "https://raw.githubusercontent.com/")
|
||||
txt = txt.replace("/blob/", "/")
|
||||
import requests
|
||||
from toolbox import get_conf
|
||||
proxies, = get_conf('proxies')
|
||||
r = requests.get(txt, proxies=proxies)
|
||||
with open('./gpt_log/temp.md', 'wb+') as f: f.write(r.content)
|
||||
project_folder = './gpt_log/'
|
||||
file_manifest = ['./gpt_log/temp.md']
|
||||
elif txt.endswith('.md'):
|
||||
# 直接给定文件
|
||||
file_manifest = [txt]
|
||||
project_folder = os.path.dirname(txt)
|
||||
elif os.path.exists(txt):
|
||||
# 本地路径,递归搜索
|
||||
project_folder = txt
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
|
||||
else:
|
||||
success = False
|
||||
|
||||
return success, file_manifest, project_folder
|
||||
|
||||
|
||||
@CatchException
|
||||
@@ -98,6 +124,7 @@ def Markdown英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import tiktoken
|
||||
import glob, os
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}",
|
||||
@@ -105,19 +132,21 @@ def Markdown英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt)
|
||||
|
||||
if not success:
|
||||
# 什么都没有
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
|
||||
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh')
|
||||
|
||||
|
||||
@@ -135,6 +164,7 @@ def Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
import tiktoken
|
||||
import glob, os
|
||||
except:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}",
|
||||
@@ -142,18 +172,13 @@ def Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt)
|
||||
if not success:
|
||||
# 什么都没有
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
if txt.endswith('.md'):
|
||||
file_manifest = [txt]
|
||||
else:
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_execption, write_results_to_file
|
||||
from .crazy_utils import input_clipping
|
||||
|
||||
def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import os, copy
|
||||
@@ -61,13 +62,15 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
previous_iteration_files.extend([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
|
||||
previous_iteration_files_string = ', '.join(previous_iteration_files)
|
||||
current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
|
||||
i_say = f'根据以上分析,对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能(包括{previous_iteration_files_string})。'
|
||||
i_say = f'用一张Markdown表格简要描述以下文件的功能:{previous_iteration_files_string}。根据以上分析,用一句话概括程序的整体功能。'
|
||||
inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
|
||||
this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
|
||||
this_iteration_history.append(last_iteration_result)
|
||||
# 裁剪input
|
||||
inputs, this_iteration_history_feed = input_clipping(inputs=i_say, history=this_iteration_history, max_token_limit=2560)
|
||||
result = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
|
||||
history=this_iteration_history, # 迭代之前的分析
|
||||
inputs=inputs, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
|
||||
history=this_iteration_history_feed, # 迭代之前的分析
|
||||
sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。")
|
||||
report_part_2.extend([i_say, result])
|
||||
last_iteration_result = result
|
||||
|
||||
在新工单中引用
屏蔽一个用户