镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-07 06:56:48 +00:00
again
这个提交包含在:
107
crazy_functions/Langchain知识库.py
普通文件
107
crazy_functions/Langchain知识库.py
普通文件
@@ -0,0 +1,107 @@
|
||||
from toolbox import CatchException, update_ui, ProxyNetworkActivate
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
|
||||
|
||||
|
||||
|
||||
@CatchException
|
||||
def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
chatbot.append(("这是什么功能?", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# resolve deps
|
||||
try:
|
||||
from zh_langchain import construct_vector_store
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
from .crazy_utils import knowledge_archive_interface
|
||||
except Exception as e:
|
||||
chatbot.append(
|
||||
["依赖不足",
|
||||
"导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]
|
||||
)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
from .crazy_utils import try_install_deps
|
||||
try_install_deps(['zh_langchain==0.2.1'])
|
||||
|
||||
# < --------------------读取参数--------------- >
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
kai_id = plugin_kwargs.get("advanced_arg", 'default')
|
||||
|
||||
# < --------------------读取文件--------------- >
|
||||
file_manifest = []
|
||||
spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
|
||||
for sp in spl:
|
||||
_, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
|
||||
file_manifest += file_manifest_tmp
|
||||
|
||||
if len(file_manifest) == 0:
|
||||
chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# < -------------------预热文本向量化模组--------------- >
|
||||
chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
print('Checking Text2vec ...')
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
with ProxyNetworkActivate(): # 临时地激活代理网络
|
||||
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
||||
|
||||
# < -------------------构建知识库--------------- >
|
||||
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
print('Establishing knowledge archive ...')
|
||||
with ProxyNetworkActivate(): # 临时地激活代理网络
|
||||
kai = knowledge_archive_interface()
|
||||
kai.feed_archive(file_manifest=file_manifest, id=kai_id)
|
||||
kai_files = kai.get_loaded_file()
|
||||
kai_files = '<br/>'.join(kai_files)
|
||||
# chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"])
|
||||
# yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
# chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id()
|
||||
# chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答'
|
||||
# chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"])
|
||||
chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
|
||||
@CatchException
|
||||
def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1):
|
||||
# resolve deps
|
||||
try:
|
||||
from zh_langchain import construct_vector_store
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
from .crazy_utils import knowledge_archive_interface
|
||||
except Exception as e:
|
||||
chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
from .crazy_utils import try_install_deps
|
||||
try_install_deps(['zh_langchain==0.2.1'])
|
||||
|
||||
# < ------------------- --------------- >
|
||||
kai = knowledge_archive_interface()
|
||||
|
||||
if 'langchain_plugin_embedding' in chatbot._cookies:
|
||||
resp, prompt = kai.answer_with_archive_by_id(txt, chatbot._cookies['langchain_plugin_embedding'])
|
||||
else:
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
kai_id = plugin_kwargs.get("advanced_arg", 'default')
|
||||
resp, prompt = kai.answer_with_archive_by_id(txt, kai_id)
|
||||
|
||||
chatbot.append((txt, '[Local Message] ' + prompt))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=prompt, inputs_show_user=txt,
|
||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
||||
sys_prompt=system_prompt
|
||||
)
|
||||
history.extend((prompt, gpt_say))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
@@ -238,3 +238,6 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
|
||||
|
||||
|
||||
|
||||
|
||||
300
crazy_functions/Latex输出PDF结果.py
普通文件
300
crazy_functions/Latex输出PDF结果.py
普通文件
@@ -0,0 +1,300 @@
|
||||
from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone
|
||||
from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
|
||||
from functools import partial
|
||||
import glob, os, requests, time
|
||||
pj = os.path.join
|
||||
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
|
||||
|
||||
# =================================== 工具函数 ===============================================
|
||||
专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
|
||||
def switch_prompt(pfg, mode, more_requirement):
|
||||
"""
|
||||
Generate prompts and system prompts based on the mode for proofreading or translating.
|
||||
Args:
|
||||
- pfg: Proofreader or Translator instance.
|
||||
- mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
|
||||
|
||||
Returns:
|
||||
- inputs_array: A list of strings containing prompts for users to respond to.
|
||||
- sys_prompt_array: A list of strings containing prompts for system prompts.
|
||||
"""
|
||||
n_split = len(pfg.sp_file_contents)
|
||||
if mode == 'proofread_en':
|
||||
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
|
||||
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
|
||||
r"Answer me only with the revised text:" +
|
||||
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
||||
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
|
||||
elif mode == 'translate_zh':
|
||||
inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
|
||||
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
|
||||
r"Answer me only with the translated text:" +
|
||||
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
||||
sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
|
||||
else:
|
||||
assert False, "未知指令"
|
||||
return inputs_array, sys_prompt_array
|
||||
|
||||
def desend_to_extracted_folder_if_exist(project_folder):
|
||||
"""
|
||||
Descend into the extracted folder if it exists, otherwise return the original folder.
|
||||
|
||||
Args:
|
||||
- project_folder: A string specifying the folder path.
|
||||
|
||||
Returns:
|
||||
- A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
|
||||
"""
|
||||
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
|
||||
if len(maybe_dir) == 0: return project_folder
|
||||
if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
|
||||
return project_folder
|
||||
|
||||
def move_project(project_folder, arxiv_id=None):
|
||||
"""
|
||||
Create a new work folder and copy the project folder to it.
|
||||
|
||||
Args:
|
||||
- project_folder: A string specifying the folder path of the project.
|
||||
|
||||
Returns:
|
||||
- A string specifying the path to the new work folder.
|
||||
"""
|
||||
import shutil, time
|
||||
time.sleep(2) # avoid time string conflict
|
||||
if arxiv_id is not None:
|
||||
new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
|
||||
else:
|
||||
new_workfolder = f'gpt_log/{gen_time_str()}'
|
||||
try:
|
||||
shutil.rmtree(new_workfolder)
|
||||
except:
|
||||
pass
|
||||
|
||||
# align subfolder if there is a folder wrapper
|
||||
items = glob.glob(pj(project_folder,'*'))
|
||||
if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
|
||||
if os.path.isdir(items[0]): project_folder = items[0]
|
||||
|
||||
shutil.copytree(src=project_folder, dst=new_workfolder)
|
||||
return new_workfolder
|
||||
|
||||
def arxiv_download(chatbot, history, txt):
|
||||
def check_cached_translation_pdf(arxiv_id):
|
||||
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
|
||||
if not os.path.exists(translation_dir):
|
||||
os.makedirs(translation_dir)
|
||||
target_file = pj(translation_dir, 'translate_zh.pdf')
|
||||
if os.path.exists(target_file):
|
||||
promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
|
||||
return target_file
|
||||
return False
|
||||
def is_float(s):
|
||||
try:
|
||||
float(s)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
|
||||
txt = 'https://arxiv.org/abs/' + txt.strip()
|
||||
if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
|
||||
txt = 'https://arxiv.org/abs/' + txt[:10]
|
||||
if not txt.startswith('https://arxiv.org'):
|
||||
return txt, None
|
||||
|
||||
# <-------------- inspect format ------------->
|
||||
chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
time.sleep(1) # 刷新界面
|
||||
|
||||
url_ = txt # https://arxiv.org/abs/1707.06690
|
||||
if not txt.startswith('https://arxiv.org/abs/'):
|
||||
msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}"
|
||||
yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
|
||||
return msg, None
|
||||
# <-------------- set format ------------->
|
||||
arxiv_id = url_.split('/abs/')[-1]
|
||||
if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
|
||||
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
|
||||
if cached_translation_pdf: return cached_translation_pdf, arxiv_id
|
||||
|
||||
url_tar = url_.replace('/abs/', '/e-print/')
|
||||
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
|
||||
extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
|
||||
os.makedirs(translation_dir, exist_ok=True)
|
||||
|
||||
# <-------------- download arxiv source file ------------->
|
||||
dst = pj(translation_dir, arxiv_id+'.tar')
|
||||
if os.path.exists(dst):
|
||||
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
|
||||
else:
|
||||
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
|
||||
proxies, = get_conf('proxies')
|
||||
r = requests.get(url_tar, proxies=proxies)
|
||||
with open(dst, 'wb+') as f:
|
||||
f.write(r.content)
|
||||
# <-------------- extract file ------------->
|
||||
yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
|
||||
from toolbox import extract_archive
|
||||
extract_archive(file_path=dst, dest_dir=extract_dst)
|
||||
return extract_dst, arxiv_id
|
||||
# ========================================= 插件主程序1 =====================================================
|
||||
|
||||
|
||||
@CatchException
|
||||
def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
# <-------------- information about this plugin ------------->
|
||||
chatbot.append([ "函数插件功能?",
|
||||
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# <-------------- more requirements ------------->
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
more_req = plugin_kwargs.get("advanced_arg", "")
|
||||
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
||||
|
||||
# <-------------- check deps ------------->
|
||||
try:
|
||||
import glob, os, time, subprocess
|
||||
subprocess.Popen(['pdflatex', '-version'])
|
||||
from .latex_utils import Latex精细分解与转化, 编译Latex
|
||||
except Exception as e:
|
||||
chatbot.append([ f"解析项目: {txt}",
|
||||
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
|
||||
# <-------------- clear history and read input ------------->
|
||||
history = []
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
|
||||
# <-------------- if is a zip/tar file ------------->
|
||||
project_folder = desend_to_extracted_folder_if_exist(project_folder)
|
||||
|
||||
|
||||
# <-------------- move latex project away from temp folder ------------->
|
||||
project_folder = move_project(project_folder, arxiv_id=None)
|
||||
|
||||
|
||||
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
|
||||
if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
|
||||
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
|
||||
|
||||
|
||||
# <-------------- compile PDF ------------->
|
||||
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en',
|
||||
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
|
||||
|
||||
|
||||
# <-------------- zip PDF ------------->
|
||||
zip_res = zip_result(project_folder)
|
||||
if success:
|
||||
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||
else:
|
||||
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
|
||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||
|
||||
# <-------------- we are done ------------->
|
||||
return success
|
||||
|
||||
|
||||
# ========================================= 插件主程序2 =====================================================
|
||||
|
||||
@CatchException
|
||||
def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
# <-------------- information about this plugin ------------->
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# <-------------- more requirements ------------->
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
more_req = plugin_kwargs.get("advanced_arg", "")
|
||||
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
||||
|
||||
# <-------------- check deps ------------->
|
||||
try:
|
||||
import glob, os, time, subprocess
|
||||
subprocess.Popen(['pdflatex', '-version'])
|
||||
from .latex_utils import Latex精细分解与转化, 编译Latex
|
||||
except Exception as e:
|
||||
chatbot.append([ f"解析项目: {txt}",
|
||||
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
|
||||
# <-------------- clear history and read input ------------->
|
||||
history = []
|
||||
txt, arxiv_id = yield from arxiv_download(chatbot, history, txt)
|
||||
if txt.endswith('.pdf'):
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
|
||||
# <-------------- if is a zip/tar file ------------->
|
||||
project_folder = desend_to_extracted_folder_if_exist(project_folder)
|
||||
|
||||
|
||||
# <-------------- move latex project away from temp folder ------------->
|
||||
project_folder = move_project(project_folder, arxiv_id)
|
||||
|
||||
|
||||
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
|
||||
if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
|
||||
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_)
|
||||
|
||||
|
||||
# <-------------- compile PDF ------------->
|
||||
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh',
|
||||
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
|
||||
|
||||
# <-------------- zip PDF ------------->
|
||||
zip_res = zip_result(project_folder)
|
||||
if success:
|
||||
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||
else:
|
||||
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
|
||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||
|
||||
|
||||
# <-------------- we are done ------------->
|
||||
return success
|
||||
@@ -3,6 +3,8 @@
|
||||
这个文件用于函数插件的单元测试
|
||||
运行方法 python crazy_functions/crazy_functions_test.py
|
||||
"""
|
||||
|
||||
# ==============================================================================================================================
|
||||
|
||||
def validate_path():
|
||||
import os, sys
|
||||
@@ -10,10 +12,16 @@ def validate_path():
|
||||
root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..')
|
||||
os.chdir(root_dir_assume)
|
||||
sys.path.append(root_dir_assume)
|
||||
|
||||
validate_path() # validate path so you can run from base directory
|
||||
|
||||
# ==============================================================================================================================
|
||||
|
||||
from colorful import *
|
||||
from toolbox import get_conf, ChatBotWithCookies
|
||||
import contextlib
|
||||
import os
|
||||
import sys
|
||||
from functools import wraps
|
||||
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
|
||||
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
|
||||
|
||||
@@ -30,7 +38,43 @@ history = []
|
||||
system_prompt = "Serve me as a writing and programming assistant."
|
||||
web_port = 1024
|
||||
|
||||
# ==============================================================================================================================
|
||||
|
||||
def silence_stdout(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
_original_stdout = sys.stdout
|
||||
sys.stdout = open(os.devnull, 'w')
|
||||
for q in func(*args, **kwargs):
|
||||
sys.stdout = _original_stdout
|
||||
yield q
|
||||
sys.stdout = open(os.devnull, 'w')
|
||||
sys.stdout.close()
|
||||
sys.stdout = _original_stdout
|
||||
return wrapper
|
||||
|
||||
class CLI_Printer():
|
||||
def __init__(self) -> None:
|
||||
self.pre_buf = ""
|
||||
|
||||
def print(self, buf):
|
||||
bufp = ""
|
||||
for index, chat in enumerate(buf):
|
||||
a, b = chat
|
||||
bufp += sprint亮靛('[Me]:' + a) + '\n'
|
||||
bufp += '[GPT]:' + b
|
||||
if index < len(buf)-1:
|
||||
bufp += '\n'
|
||||
|
||||
if self.pre_buf!="" and bufp.startswith(self.pre_buf):
|
||||
print(bufp[len(self.pre_buf):], end='')
|
||||
else:
|
||||
print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'+bufp, end='')
|
||||
self.pre_buf = bufp
|
||||
return
|
||||
|
||||
cli_printer = CLI_Printer()
|
||||
# ==============================================================================================================================
|
||||
def test_解析一个Python项目():
|
||||
from crazy_functions.解析项目源代码 import 解析一个Python项目
|
||||
txt = "crazy_functions/test_project/python/dqn"
|
||||
@@ -116,6 +160,56 @@ def test_Markdown多语言():
|
||||
for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
print(cb)
|
||||
|
||||
def test_Langchain知识库():
|
||||
from crazy_functions.Langchain知识库 import 知识库问答
|
||||
txt = "./"
|
||||
chatbot = ChatBotWithCookies(llm_kwargs)
|
||||
for cookies, cb, hist, msg in silence_stdout(知识库问答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
cli_printer.print(cb) # print(cb)
|
||||
|
||||
chatbot = ChatBotWithCookies(cookies)
|
||||
from crazy_functions.Langchain知识库 import 读取知识库作答
|
||||
txt = "What is the installation method?"
|
||||
for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
cli_printer.print(cb) # print(cb)
|
||||
|
||||
def test_Langchain知识库读取():
|
||||
from crazy_functions.Langchain知识库 import 读取知识库作答
|
||||
txt = "远程云服务器部署?"
|
||||
for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
cli_printer.print(cb) # print(cb)
|
||||
|
||||
def test_Latex():
|
||||
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF
|
||||
|
||||
# txt = r"https://arxiv.org/abs/1706.03762"
|
||||
# txt = r"https://arxiv.org/abs/1902.03185"
|
||||
# txt = r"https://arxiv.org/abs/2305.18290"
|
||||
# txt = r"https://arxiv.org/abs/2305.17608"
|
||||
# txt = r"https://arxiv.org/abs/2211.16068" # ACE
|
||||
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
|
||||
# txt = r"https://arxiv.org/abs/2002.09253"
|
||||
# txt = r"https://arxiv.org/abs/2306.07831"
|
||||
# txt = r"https://arxiv.org/abs/2212.10156"
|
||||
# txt = r"https://arxiv.org/abs/2211.11559"
|
||||
# txt = r"https://arxiv.org/abs/2303.08774"
|
||||
txt = r"https://arxiv.org/abs/2303.12712"
|
||||
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
|
||||
|
||||
|
||||
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
cli_printer.print(cb) # print(cb)
|
||||
|
||||
|
||||
|
||||
# txt = "2302.02948.tar"
|
||||
# print(txt)
|
||||
# main_tex, work_folder = Latex预处理(txt)
|
||||
# print('main tex:', main_tex)
|
||||
# res = 编译Latex(main_tex, work_folder)
|
||||
# # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
# cli_printer.print(cb) # print(cb)
|
||||
|
||||
|
||||
|
||||
# test_解析一个Python项目()
|
||||
@@ -129,7 +223,9 @@ def test_Markdown多语言():
|
||||
# test_联网回答问题()
|
||||
# test_解析ipynb文件()
|
||||
# test_数学动画生成manim()
|
||||
test_Markdown多语言()
|
||||
|
||||
input("程序完成,回车退出。")
|
||||
print("退出。")
|
||||
# test_Langchain知识库()
|
||||
# test_Langchain知识库读取()
|
||||
if __name__ == "__main__":
|
||||
test_Latex()
|
||||
input("程序完成,回车退出。")
|
||||
print("退出。")
|
||||
@@ -1,4 +1,5 @@
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc
|
||||
import threading
|
||||
|
||||
def input_clipping(inputs, history, max_token_limit):
|
||||
import numpy as np
|
||||
@@ -606,3 +607,142 @@ def get_files_from_everything(txt, type): # type='.md'
|
||||
success = False
|
||||
|
||||
return success, file_manifest, project_folder
|
||||
|
||||
|
||||
|
||||
|
||||
def Singleton(cls):
|
||||
_instance = {}
|
||||
|
||||
def _singleton(*args, **kargs):
|
||||
if cls not in _instance:
|
||||
_instance[cls] = cls(*args, **kargs)
|
||||
return _instance[cls]
|
||||
|
||||
return _singleton
|
||||
|
||||
|
||||
@Singleton
|
||||
class knowledge_archive_interface():
|
||||
def __init__(self) -> None:
|
||||
self.threadLock = threading.Lock()
|
||||
self.current_id = ""
|
||||
self.kai_path = None
|
||||
self.qa_handle = None
|
||||
self.text2vec_large_chinese = None
|
||||
|
||||
def get_chinese_text2vec(self):
|
||||
if self.text2vec_large_chinese is None:
|
||||
# < -------------------预热文本向量化模组--------------- >
|
||||
from toolbox import ProxyNetworkActivate
|
||||
print('Checking Text2vec ...')
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
with ProxyNetworkActivate(): # 临时地激活代理网络
|
||||
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
||||
|
||||
return self.text2vec_large_chinese
|
||||
|
||||
|
||||
def feed_archive(self, file_manifest, id="default"):
|
||||
self.threadLock.acquire()
|
||||
# import uuid
|
||||
self.current_id = id
|
||||
from zh_langchain import construct_vector_store
|
||||
self.qa_handle, self.kai_path = construct_vector_store(
|
||||
vs_id=self.current_id,
|
||||
files=file_manifest,
|
||||
sentence_size=100,
|
||||
history=[],
|
||||
one_conent="",
|
||||
one_content_segmentation="",
|
||||
text2vec = self.get_chinese_text2vec(),
|
||||
)
|
||||
self.threadLock.release()
|
||||
|
||||
def get_current_archive_id(self):
|
||||
return self.current_id
|
||||
|
||||
def get_loaded_file(self):
|
||||
return self.qa_handle.get_loaded_file()
|
||||
|
||||
def answer_with_archive_by_id(self, txt, id):
|
||||
self.threadLock.acquire()
|
||||
if not self.current_id == id:
|
||||
self.current_id = id
|
||||
from zh_langchain import construct_vector_store
|
||||
self.qa_handle, self.kai_path = construct_vector_store(
|
||||
vs_id=self.current_id,
|
||||
files=[],
|
||||
sentence_size=100,
|
||||
history=[],
|
||||
one_conent="",
|
||||
one_content_segmentation="",
|
||||
text2vec = self.get_chinese_text2vec(),
|
||||
)
|
||||
VECTOR_SEARCH_SCORE_THRESHOLD = 0
|
||||
VECTOR_SEARCH_TOP_K = 4
|
||||
CHUNK_SIZE = 512
|
||||
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
|
||||
query = txt,
|
||||
vs_path = self.kai_path,
|
||||
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
||||
vector_search_top_k=VECTOR_SEARCH_TOP_K,
|
||||
chunk_conent=True,
|
||||
chunk_size=CHUNK_SIZE,
|
||||
text2vec = self.get_chinese_text2vec(),
|
||||
)
|
||||
self.threadLock.release()
|
||||
return resp, prompt
|
||||
|
||||
def try_install_deps(deps):
|
||||
for dep in deps:
|
||||
import subprocess, sys
|
||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
|
||||
|
||||
|
||||
class construct_html():
|
||||
def __init__(self) -> None:
|
||||
self.css = """
|
||||
.row {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.column {
|
||||
flex: 1;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.table-header {
|
||||
font-weight: bold;
|
||||
border-bottom: 1px solid black;
|
||||
}
|
||||
|
||||
.table-row {
|
||||
border-bottom: 1px solid lightgray;
|
||||
}
|
||||
|
||||
.table-cell {
|
||||
padding: 5px;
|
||||
}
|
||||
"""
|
||||
self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
|
||||
|
||||
|
||||
def add_row(self, a, b):
|
||||
tmp = """
|
||||
<div class="row table-row">
|
||||
<div class="column table-cell">REPLACE_A</div>
|
||||
<div class="column table-cell">REPLACE_B</div>
|
||||
</div>
|
||||
"""
|
||||
from toolbox import markdown_convertion
|
||||
tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
|
||||
tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
|
||||
self.html_string += tmp
|
||||
|
||||
|
||||
def save_file(self, file_name):
|
||||
with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
|
||||
f.write(self.html_string.encode('utf-8', 'ignore').decode())
|
||||
|
||||
|
||||
773
crazy_functions/latex_utils.py
普通文件
773
crazy_functions/latex_utils.py
普通文件
@@ -0,0 +1,773 @@
|
||||
from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
|
||||
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
|
||||
import os, shutil
|
||||
import re
|
||||
import numpy as np
|
||||
pj = os.path.join
|
||||
|
||||
"""
|
||||
========================================================================
|
||||
Part One
|
||||
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
|
||||
========================================================================
|
||||
"""
|
||||
PRESERVE = 0
|
||||
TRANSFORM = 1
|
||||
|
||||
def set_forbidden_text(text, mask, pattern, flags=0):
|
||||
"""
|
||||
Add a preserve text area in this paper
|
||||
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
|
||||
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
|
||||
everything between "\begin{equation}" and "\end{equation}"
|
||||
"""
|
||||
if isinstance(pattern, list): pattern = '|'.join(pattern)
|
||||
pattern_compile = re.compile(pattern, flags)
|
||||
for res in pattern_compile.finditer(text):
|
||||
mask[res.span()[0]:res.span()[1]] = PRESERVE
|
||||
return text, mask
|
||||
|
||||
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
|
||||
"""
|
||||
Add a preserve text area in this paper (text become untouchable for GPT).
|
||||
count the number of the braces so as to catch compelete text area.
|
||||
e.g.
|
||||
\caption{blablablablabla\texbf{blablabla}blablabla.}
|
||||
"""
|
||||
pattern_compile = re.compile(pattern, flags)
|
||||
for res in pattern_compile.finditer(text):
|
||||
brace_level = -1
|
||||
p = begin = end = res.regs[0][0]
|
||||
for _ in range(1024*16):
|
||||
if text[p] == '}' and brace_level == 0: break
|
||||
elif text[p] == '}': brace_level -= 1
|
||||
elif text[p] == '{': brace_level += 1
|
||||
p += 1
|
||||
end = p+1
|
||||
mask[begin:end] = PRESERVE
|
||||
return text, mask
|
||||
|
||||
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
|
||||
"""
|
||||
Move area out of preserve area (make text editable for GPT)
|
||||
count the number of the braces so as to catch compelete text area.
|
||||
e.g.
|
||||
\caption{blablablablabla\texbf{blablabla}blablabla.}
|
||||
"""
|
||||
pattern_compile = re.compile(pattern, flags)
|
||||
for res in pattern_compile.finditer(text):
|
||||
brace_level = 0
|
||||
p = begin = end = res.regs[1][0]
|
||||
for _ in range(1024*16):
|
||||
if text[p] == '}' and brace_level == 0: break
|
||||
elif text[p] == '}': brace_level -= 1
|
||||
elif text[p] == '{': brace_level += 1
|
||||
p += 1
|
||||
end = p
|
||||
mask[begin:end] = TRANSFORM
|
||||
if forbid_wrapper:
|
||||
mask[res.regs[0][0]:begin] = PRESERVE
|
||||
mask[end:res.regs[0][1]] = PRESERVE
|
||||
return text, mask
|
||||
|
||||
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
|
||||
"""
|
||||
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
|
||||
Add it to preserve area
|
||||
"""
|
||||
pattern_compile = re.compile(pattern, flags)
|
||||
def search_with_line_limit(text, mask):
|
||||
for res in pattern_compile.finditer(text):
|
||||
cmd = res.group(1) # begin{what}
|
||||
this = res.group(2) # content between begin and end
|
||||
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
|
||||
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
|
||||
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
|
||||
if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
|
||||
this, this_mask = search_with_line_limit(this, this_mask)
|
||||
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
|
||||
else:
|
||||
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
|
||||
return text, mask
|
||||
return search_with_line_limit(text, mask)
|
||||
|
||||
class LinkedListNode():
|
||||
"""
|
||||
Linked List Node
|
||||
"""
|
||||
def __init__(self, string, preserve=True) -> None:
|
||||
self.string = string
|
||||
self.preserve = preserve
|
||||
self.next = None
|
||||
# self.begin_line = 0
|
||||
# self.begin_char = 0
|
||||
|
||||
def convert_to_linklist(text, mask):
|
||||
root = LinkedListNode("", preserve=True)
|
||||
current_node = root
|
||||
for c, m, i in zip(text, mask, range(len(text))):
|
||||
if (m==PRESERVE and current_node.preserve) \
|
||||
or (m==TRANSFORM and not current_node.preserve):
|
||||
# add
|
||||
current_node.string += c
|
||||
else:
|
||||
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
|
||||
current_node = current_node.next
|
||||
return root
|
||||
"""
|
||||
========================================================================
|
||||
Latex Merge File
|
||||
========================================================================
|
||||
"""
|
||||
|
||||
def 寻找Latex主文件(file_manifest, mode):
|
||||
"""
|
||||
在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
|
||||
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
|
||||
"""
|
||||
canidates = []
|
||||
for texf in file_manifest:
|
||||
if os.path.basename(texf).startswith('merge'):
|
||||
continue
|
||||
with open(texf, 'r', encoding='utf8') as f:
|
||||
file_content = f.read()
|
||||
if r'\documentclass' in file_content:
|
||||
canidates.append(texf)
|
||||
else:
|
||||
continue
|
||||
|
||||
if len(canidates) == 0:
|
||||
raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
|
||||
elif len(canidates) == 1:
|
||||
return canidates[0]
|
||||
else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
|
||||
canidates_score = []
|
||||
# 给出一些判定模板文档的词作为扣分项
|
||||
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
|
||||
expected_words = ['\input', '\ref', '\cite']
|
||||
for texf in canidates:
|
||||
canidates_score.append(0)
|
||||
with open(texf, 'r', encoding='utf8') as f:
|
||||
file_content = f.read()
|
||||
for uw in unexpected_words:
|
||||
if uw in file_content:
|
||||
canidates_score[-1] -= 1
|
||||
for uw in expected_words:
|
||||
if uw in file_content:
|
||||
canidates_score[-1] += 1
|
||||
select = np.argmax(canidates_score) # 取评分最高者返回
|
||||
return canidates[select]
|
||||
|
||||
def rm_comments(main_file):
|
||||
new_file_remove_comment_lines = []
|
||||
for l in main_file.splitlines():
|
||||
# 删除整行的空注释
|
||||
if l.lstrip().startswith("%"):
|
||||
pass
|
||||
else:
|
||||
new_file_remove_comment_lines.append(l)
|
||||
main_file = '\n'.join(new_file_remove_comment_lines)
|
||||
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
|
||||
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
|
||||
return main_file
|
||||
|
||||
def merge_tex_files_(project_foler, main_file, mode):
|
||||
"""
|
||||
Merge Tex project recrusively
|
||||
"""
|
||||
main_file = rm_comments(main_file)
|
||||
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
|
||||
f = s.group(1)
|
||||
fp = os.path.join(project_foler, f)
|
||||
if os.path.exists(fp):
|
||||
# e.g., \input{srcs/07_appendix.tex}
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as fx:
|
||||
c = fx.read()
|
||||
else:
|
||||
# e.g., \input{srcs/07_appendix}
|
||||
with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
|
||||
c = fx.read()
|
||||
c = merge_tex_files_(project_foler, c, mode)
|
||||
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
|
||||
return main_file
|
||||
|
||||
def merge_tex_files(project_foler, main_file, mode):
|
||||
"""
|
||||
Merge Tex project recrusively
|
||||
P.S. 顺便把CTEX塞进去以支持中文
|
||||
P.S. 顺便把Latex的注释去除
|
||||
"""
|
||||
main_file = merge_tex_files_(project_foler, main_file, mode)
|
||||
main_file = rm_comments(main_file)
|
||||
|
||||
if mode == 'translate_zh':
|
||||
# find paper documentclass
|
||||
pattern = re.compile(r'\\documentclass.*\n')
|
||||
match = pattern.search(main_file)
|
||||
assert match is not None, "Cannot find documentclass statement!"
|
||||
position = match.end()
|
||||
add_ctex = '\\usepackage{ctex}\n'
|
||||
add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
|
||||
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
|
||||
# fontset=windows
|
||||
import platform
|
||||
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
|
||||
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
|
||||
# find paper abstract
|
||||
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
|
||||
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
|
||||
match_opt1 = pattern_opt1.search(main_file)
|
||||
match_opt2 = pattern_opt2.search(main_file)
|
||||
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
|
||||
return main_file
|
||||
|
||||
|
||||
|
||||
"""
|
||||
========================================================================
|
||||
Post process
|
||||
========================================================================
|
||||
"""
|
||||
def mod_inbraket(match):
|
||||
"""
|
||||
为啥chatgpt会把cite里面的逗号换成中文逗号呀
|
||||
"""
|
||||
# get the matched string
|
||||
cmd = match.group(1)
|
||||
str_to_modify = match.group(2)
|
||||
# modify the matched string
|
||||
str_to_modify = str_to_modify.replace(':', ':') # 前面是中文冒号,后面是英文冒号
|
||||
str_to_modify = str_to_modify.replace(',', ',') # 前面是中文逗号,后面是英文逗号
|
||||
# str_to_modify = 'BOOM'
|
||||
return "\\" + cmd + "{" + str_to_modify + "}"
|
||||
|
||||
def fix_content(final_tex, node_string):
|
||||
"""
|
||||
Fix common GPT errors to increase success rate
|
||||
"""
|
||||
final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
|
||||
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
|
||||
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
|
||||
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
|
||||
|
||||
if "Traceback" in final_tex and "[Local Message]" in final_tex:
|
||||
final_tex = node_string # 出问题了,还原原文
|
||||
if node_string.count('\\begin') != final_tex.count('\\begin'):
|
||||
final_tex = node_string # 出问题了,还原原文
|
||||
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
|
||||
# walk and replace any _ without \
|
||||
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
|
||||
|
||||
def compute_brace_level(string):
|
||||
# this function count the number of { and }
|
||||
brace_level = 0
|
||||
for c in string:
|
||||
if c == "{": brace_level += 1
|
||||
elif c == "}": brace_level -= 1
|
||||
return brace_level
|
||||
def join_most(tex_t, tex_o):
|
||||
# this function join translated string and original string when something goes wrong
|
||||
p_t = 0
|
||||
p_o = 0
|
||||
def find_next(string, chars, begin):
|
||||
p = begin
|
||||
while p < len(string):
|
||||
if string[p] in chars: return p, string[p]
|
||||
p += 1
|
||||
return None, None
|
||||
while True:
|
||||
res1, char = find_next(tex_o, ['{','}'], p_o)
|
||||
if res1 is None: break
|
||||
res2, char = find_next(tex_t, [char], p_t)
|
||||
if res2 is None: break
|
||||
p_o = res1 + 1
|
||||
p_t = res2 + 1
|
||||
return tex_t[:p_t] + tex_o[p_o:]
|
||||
|
||||
if compute_brace_level(final_tex) != compute_brace_level(node_string):
|
||||
# 出问题了,还原部分原文,保证括号正确
|
||||
final_tex = join_most(final_tex, node_string)
|
||||
return final_tex
|
||||
|
||||
def split_subprocess(txt, project_folder, return_dict, opts):
|
||||
"""
|
||||
break down latex file to a linked list,
|
||||
each node use a preserve flag to indicate whether it should
|
||||
be proccessed by GPT.
|
||||
"""
|
||||
text = txt
|
||||
mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
|
||||
|
||||
# 吸收title与作者以上的部分
|
||||
text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
|
||||
# 吸收iffalse注释
|
||||
text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
|
||||
# 吸收在42行以内的begin-end组合
|
||||
text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
|
||||
# 吸收匿名公式
|
||||
text, mask = set_forbidden_text(text, mask, [ r"\$\$(.*?)\$\$", r"\\\[.*?\\\]" ], re.DOTALL)
|
||||
# 吸收其他杂项
|
||||
text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ])
|
||||
text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ])
|
||||
text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
|
||||
text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
|
||||
text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
|
||||
# reverse 操作必须放在最后
|
||||
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
||||
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
||||
root = convert_to_linklist(text, mask)
|
||||
|
||||
# 修复括号
|
||||
node = root
|
||||
while True:
|
||||
string = node.string
|
||||
if node.preserve:
|
||||
node = node.next
|
||||
if node is None: break
|
||||
continue
|
||||
def break_check(string):
|
||||
str_stack = [""] # (lv, index)
|
||||
for i, c in enumerate(string):
|
||||
if c == '{':
|
||||
str_stack.append('{')
|
||||
elif c == '}':
|
||||
if len(str_stack) == 1:
|
||||
print('stack fix')
|
||||
return i
|
||||
str_stack.pop(-1)
|
||||
else:
|
||||
str_stack[-1] += c
|
||||
return -1
|
||||
bp = break_check(string)
|
||||
|
||||
if bp == -1:
|
||||
pass
|
||||
elif bp == 0:
|
||||
node.string = string[:1]
|
||||
q = LinkedListNode(string[1:], False)
|
||||
q.next = node.next
|
||||
node.next = q
|
||||
else:
|
||||
node.string = string[:bp]
|
||||
q = LinkedListNode(string[bp:], False)
|
||||
q.next = node.next
|
||||
node.next = q
|
||||
|
||||
node = node.next
|
||||
if node is None: break
|
||||
|
||||
# 屏蔽空行和太短的句子
|
||||
node = root
|
||||
while True:
|
||||
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
|
||||
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
|
||||
node = node.next
|
||||
if node is None: break
|
||||
node = root
|
||||
while True:
|
||||
if node.next and node.preserve and node.next.preserve:
|
||||
node.string += node.next.string
|
||||
node.next = node.next.next
|
||||
node = node.next
|
||||
if node is None: break
|
||||
|
||||
# 将前后断行符脱离
|
||||
node = root
|
||||
prev_node = None
|
||||
while True:
|
||||
if not node.preserve:
|
||||
lstriped_ = node.string.lstrip().lstrip('\n')
|
||||
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
|
||||
prev_node.string += node.string[:-len(lstriped_)]
|
||||
node.string = lstriped_
|
||||
rstriped_ = node.string.rstrip().rstrip('\n')
|
||||
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
|
||||
node.next.string = node.string[len(rstriped_):] + node.next.string
|
||||
node.string = rstriped_
|
||||
# =====
|
||||
prev_node = node
|
||||
node = node.next
|
||||
if node is None: break
|
||||
# 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM)
|
||||
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
|
||||
segment_parts_for_gpt = []
|
||||
nodes = []
|
||||
node = root
|
||||
while True:
|
||||
nodes.append(node)
|
||||
show_html = node.string.replace('\n','<br/>')
|
||||
if not node.preserve:
|
||||
segment_parts_for_gpt.append(node.string)
|
||||
f.write(f'<p style="color:black;">#{show_html}#</p>')
|
||||
else:
|
||||
f.write(f'<p style="color:red;">{show_html}</p>')
|
||||
node = node.next
|
||||
if node is None: break
|
||||
|
||||
for n in nodes: n.next = None # break
|
||||
return_dict['nodes'] = nodes
|
||||
return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
|
||||
return return_dict
|
||||
|
||||
|
||||
|
||||
class LatexPaperSplit():
|
||||
"""
|
||||
break down latex file to a linked list,
|
||||
each node use a preserve flag to indicate whether it should
|
||||
be proccessed by GPT.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.nodes = None
|
||||
self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
|
||||
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
|
||||
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
|
||||
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
|
||||
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
|
||||
|
||||
def merge_result(self, arr, mode, msg):
|
||||
"""
|
||||
Merge the result after the GPT process completed
|
||||
"""
|
||||
result_string = ""
|
||||
p = 0
|
||||
for node in self.nodes:
|
||||
if node.preserve:
|
||||
result_string += node.string
|
||||
else:
|
||||
result_string += fix_content(arr[p], node.string)
|
||||
p += 1
|
||||
if mode == 'translate_zh':
|
||||
pattern = re.compile(r'\\begin\{abstract\}.*\n')
|
||||
match = pattern.search(result_string)
|
||||
if not match:
|
||||
# match \abstract{xxxx}
|
||||
pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
|
||||
match = pattern_compile.search(result_string)
|
||||
position = match.regs[1][0]
|
||||
else:
|
||||
# match \begin{abstract}xxxx\end{abstract}
|
||||
position = match.end()
|
||||
result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
|
||||
return result_string
|
||||
|
||||
def split(self, txt, project_folder, opts):
|
||||
"""
|
||||
break down latex file to a linked list,
|
||||
each node use a preserve flag to indicate whether it should
|
||||
be proccessed by GPT.
|
||||
P.S. use multiprocessing to avoid timeout error
|
||||
"""
|
||||
import multiprocessing
|
||||
manager = multiprocessing.Manager()
|
||||
return_dict = manager.dict()
|
||||
p = multiprocessing.Process(
|
||||
target=split_subprocess,
|
||||
args=(txt, project_folder, return_dict, opts))
|
||||
p.start()
|
||||
p.join()
|
||||
p.close()
|
||||
self.nodes = return_dict['nodes']
|
||||
self.sp = return_dict['segment_parts_for_gpt']
|
||||
return self.sp
|
||||
|
||||
|
||||
|
||||
class LatexPaperFileGroup():
|
||||
"""
|
||||
use tokenizer to break down text according to max_token_limit
|
||||
"""
|
||||
def __init__(self):
|
||||
self.file_paths = []
|
||||
self.file_contents = []
|
||||
self.sp_file_contents = []
|
||||
self.sp_file_index = []
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
def run_file_split(self, max_token_limit=1900):
|
||||
"""
|
||||
use tokenizer to break down text according to max_token_limit
|
||||
"""
|
||||
for index, file_content in enumerate(self.file_contents):
|
||||
if self.get_token_num(file_content) < max_token_limit:
|
||||
self.sp_file_contents.append(file_content)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
|
||||
print('Segmentation: done')
|
||||
|
||||
def merge_result(self):
|
||||
self.file_result = ["" for _ in range(len(self.file_paths))]
|
||||
for r, k in zip(self.sp_file_result, self.sp_file_index):
|
||||
self.file_result[k] += r
|
||||
|
||||
def write_result(self):
|
||||
manifest = []
|
||||
for path, res in zip(self.file_paths, self.file_result):
|
||||
with open(path + '.polish.tex', 'w', encoding='utf8') as f:
|
||||
manifest.append(path + '.polish.tex')
|
||||
f.write(res)
|
||||
return manifest
|
||||
|
||||
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
|
||||
|
||||
# write html
|
||||
try:
|
||||
import shutil
|
||||
from .crazy_utils import construct_html
|
||||
from toolbox import gen_time_str
|
||||
ch = construct_html()
|
||||
orig = ""
|
||||
trans = ""
|
||||
final = []
|
||||
for c,r in zip(sp_file_contents, sp_file_result):
|
||||
final.append(c)
|
||||
final.append(r)
|
||||
for i, k in enumerate(final):
|
||||
if i%2==0:
|
||||
orig = k
|
||||
if i%2==1:
|
||||
trans = k
|
||||
ch.add_row(a=orig, b=trans)
|
||||
create_report_file_name = f"{gen_time_str()}.trans.html"
|
||||
ch.save_file(create_report_file_name)
|
||||
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
|
||||
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
|
||||
except:
|
||||
from toolbox import trimmed_format_exc
|
||||
print('writing html result failed:', trimmed_format_exc())
|
||||
|
||||
def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
|
||||
import time, os, re
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
|
||||
|
||||
# <-------- 寻找主tex文件 ---------->
|
||||
maintex = 寻找Latex主文件(file_manifest, mode)
|
||||
chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
time.sleep(3)
|
||||
|
||||
# <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
|
||||
main_tex_basename = os.path.basename(maintex)
|
||||
assert main_tex_basename.endswith('.tex')
|
||||
main_tex_basename_bare = main_tex_basename[:-4]
|
||||
may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
|
||||
if os.path.exists(may_exist_bbl):
|
||||
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
|
||||
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
|
||||
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
|
||||
|
||||
with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
|
||||
content = f.read()
|
||||
merged_content = merge_tex_files(project_folder, content, mode)
|
||||
|
||||
with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f:
|
||||
f.write(merged_content)
|
||||
|
||||
# <-------- 精细切分latex文件 ---------->
|
||||
chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
lps = LatexPaperSplit()
|
||||
res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
|
||||
|
||||
# <-------- 拆分过长的latex片段 ---------->
|
||||
pfg = LatexPaperFileGroup()
|
||||
for index, r in enumerate(res):
|
||||
pfg.file_paths.append('segment-' + str(index))
|
||||
pfg.file_contents.append(r)
|
||||
|
||||
pfg.run_file_split(max_token_limit=1024)
|
||||
n_split = len(pfg.sp_file_contents)
|
||||
|
||||
# <-------- 根据需要切换prompt ---------->
|
||||
inputs_array, sys_prompt_array = switch_prompt(pfg, mode)
|
||||
inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag]
|
||||
|
||||
if os.path.exists(pj(project_folder,'temp.pkl')):
|
||||
|
||||
# <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ---------->
|
||||
pfg = objload(file=pj(project_folder,'temp.pkl'))
|
||||
|
||||
else:
|
||||
# <-------- gpt 多线程请求 ---------->
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array=inputs_array,
|
||||
inputs_show_user_array=inputs_show_user_array,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[[""] for _ in range(n_split)],
|
||||
sys_prompt_array=sys_prompt_array,
|
||||
# max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
|
||||
scroller_max_len = 40
|
||||
)
|
||||
|
||||
# <-------- 文本碎片重组为完整的tex片段 ---------->
|
||||
pfg.sp_file_result = []
|
||||
for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents):
|
||||
pfg.sp_file_result.append(gpt_say)
|
||||
pfg.merge_result()
|
||||
|
||||
# <-------- 临时存储用于调试 ---------->
|
||||
pfg.get_token_num = None
|
||||
objdump(pfg, file=pj(project_folder,'temp.pkl'))
|
||||
|
||||
write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
|
||||
|
||||
# <-------- 写出文件 ---------->
|
||||
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
|
||||
final_tex = lps.merge_result(pfg.file_result, mode, msg)
|
||||
with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
|
||||
if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
|
||||
|
||||
|
||||
# <-------- 整理结果, 退出 ---------->
|
||||
chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF'))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# <-------- 返回 ---------->
|
||||
return project_folder + f'/merge_{mode}.tex'
|
||||
|
||||
|
||||
|
||||
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
|
||||
try:
|
||||
with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
log = f.read()
|
||||
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
file_lines = f.readlines()
|
||||
import re
|
||||
buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
|
||||
buggy_lines = [int(l) for l in buggy_lines]
|
||||
buggy_lines = sorted(buggy_lines)
|
||||
print("removing lines that has errors", buggy_lines)
|
||||
file_lines.pop(buggy_lines[0]-1)
|
||||
with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
|
||||
f.writelines(file_lines)
|
||||
return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
|
||||
except:
|
||||
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
||||
return False, -1, [-1]
|
||||
|
||||
|
||||
def compile_latex_with_timeout(command, timeout=60):
|
||||
import subprocess
|
||||
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
try:
|
||||
stdout, stderr = process.communicate(timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
stdout, stderr = process.communicate()
|
||||
print("Process timed out!")
|
||||
return False
|
||||
return True
|
||||
|
||||
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
|
||||
import os, time
|
||||
current_dir = os.getcwd()
|
||||
n_fix = 1
|
||||
max_try = 32
|
||||
chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
|
||||
chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
|
||||
yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
|
||||
|
||||
while True:
|
||||
import os
|
||||
|
||||
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
|
||||
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
|
||||
|
||||
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
|
||||
# 只有第二步成功,才能继续下面的步骤
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
|
||||
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
|
||||
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir)
|
||||
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
|
||||
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir)
|
||||
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
|
||||
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
|
||||
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
|
||||
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
|
||||
|
||||
if mode!='translate_zh':
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
|
||||
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
|
||||
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
|
||||
os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
|
||||
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
|
||||
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
|
||||
|
||||
# <--------------------->
|
||||
os.chdir(current_dir)
|
||||
|
||||
# <---------- 检查结果 ----------->
|
||||
results_ = ""
|
||||
original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
|
||||
modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf'))
|
||||
diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf'))
|
||||
results_ += f"原始PDF编译是否成功: {original_pdf_success};"
|
||||
results_ += f"转化PDF编译是否成功: {modified_pdf_success};"
|
||||
results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
|
||||
yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
|
||||
|
||||
if diff_pdf_success:
|
||||
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
|
||||
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
||||
if modified_pdf_success:
|
||||
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
|
||||
if os.path.exists(pj(work_folder, '..', 'translation')):
|
||||
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
|
||||
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
||||
return True # 成功啦
|
||||
else:
|
||||
if n_fix>=max_try: break
|
||||
n_fix += 1
|
||||
can_retry, main_file_modified, buggy_lines = remove_buggy_lines(
|
||||
file_path=pj(work_folder_modified, f'{main_file_modified}.tex'),
|
||||
log_path=pj(work_folder_modified, f'{main_file_modified}.log'),
|
||||
tex_name=f'{main_file_modified}.tex',
|
||||
tex_name_pure=f'{main_file_modified}',
|
||||
n_fix=n_fix,
|
||||
work_folder_modified=work_folder_modified,
|
||||
)
|
||||
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
if not can_retry: break
|
||||
|
||||
os.chdir(current_dir)
|
||||
return False # 失败啦
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from toolbox import CatchException, update_ui
|
||||
from toolbox import CatchException, update_ui, promote_file_to_downloadzone
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
import re
|
||||
|
||||
@@ -29,9 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
|
||||
for h in history:
|
||||
f.write("\n>>>" + h)
|
||||
f.write('</code>')
|
||||
res = '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}')
|
||||
print(res)
|
||||
return res
|
||||
promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot)
|
||||
return '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}')
|
||||
|
||||
def gen_file_preview(file_name):
|
||||
try:
|
||||
|
||||
@@ -8,7 +8,7 @@ def inspect_dependency(chatbot, history):
|
||||
import manim
|
||||
return True
|
||||
except:
|
||||
chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manimgl```"])
|
||||
chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manim manimgl```"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return False
|
||||
|
||||
|
||||
@@ -13,7 +13,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
||||
# 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割)
|
||||
# 的长度必须小于 2500 个 Token
|
||||
file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF
|
||||
|
||||
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
||||
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
||||
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
||||
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
|
||||
102
crazy_functions/联网的ChatGPT_bing版.py
普通文件
102
crazy_functions/联网的ChatGPT_bing版.py
普通文件
@@ -0,0 +1,102 @@
|
||||
from toolbox import CatchException, update_ui
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from request_llm.bridge_all import model_info
|
||||
|
||||
|
||||
def bing_search(query, proxies=None):
|
||||
query = query
|
||||
url = f"https://cn.bing.com/search?q={query}"
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
|
||||
response = requests.get(url, headers=headers, proxies=proxies)
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
results = []
|
||||
for g in soup.find_all('li', class_='b_algo'):
|
||||
anchors = g.find_all('a')
|
||||
if anchors:
|
||||
link = anchors[0]['href']
|
||||
if not link.startswith('http'):
|
||||
continue
|
||||
title = g.find('h2').text
|
||||
item = {'title': title, 'link': link}
|
||||
results.append(item)
|
||||
|
||||
for r in results:
|
||||
print(r['link'])
|
||||
return results
|
||||
|
||||
|
||||
def scrape_text(url, proxies) -> str:
|
||||
"""Scrape text from a webpage
|
||||
|
||||
Args:
|
||||
url (str): The URL to scrape text from
|
||||
|
||||
Returns:
|
||||
str: The scraped text
|
||||
"""
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
|
||||
'Content-Type': 'text/plain',
|
||||
}
|
||||
try:
|
||||
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
|
||||
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
|
||||
except:
|
||||
return "无法连接到该网页"
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
for script in soup(["script", "style"]):
|
||||
script.extract()
|
||||
text = soup.get_text()
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
text = "\n".join(chunk for chunk in chunks if chunk)
|
||||
return text
|
||||
|
||||
@CatchException
|
||||
def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
|
||||
"[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
|
||||
# ------------- < 第1步:爬取搜索引擎的结果 > -------------
|
||||
from toolbox import get_conf
|
||||
proxies, = get_conf('proxies')
|
||||
urls = bing_search(txt, proxies)
|
||||
history = []
|
||||
|
||||
# ------------- < 第2步:依次访问网页 > -------------
|
||||
max_search_result = 8 # 最多收纳多少个网页的结果
|
||||
for index, url in enumerate(urls[:max_search_result]):
|
||||
res = scrape_text(url['link'], proxies)
|
||||
history.extend([f"第{index}份搜索结果:", res])
|
||||
chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
|
||||
# ------------- < 第3步:ChatGPT综合 > -------------
|
||||
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
|
||||
i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
|
||||
inputs=i_say,
|
||||
history=history,
|
||||
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
|
||||
)
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=i_say, inputs_show_user=i_say,
|
||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
|
||||
)
|
||||
chatbot[-1] = (i_say, gpt_say)
|
||||
history.append(i_say);history.append(gpt_say)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||
|
||||
131
crazy_functions/虚空终端.py
普通文件
131
crazy_functions/虚空终端.py
普通文件
@@ -0,0 +1,131 @@
|
||||
from toolbox import CatchException, update_ui, gen_time_str
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from .crazy_utils import input_clipping
|
||||
|
||||
|
||||
prompt = """
|
||||
I have to achieve some functionalities by calling one of the functions below.
|
||||
Your job is to find the correct funtion to use to satisfy my requirement,
|
||||
and then write python code to call this function with correct parameters.
|
||||
|
||||
These are functions you are allowed to choose from:
|
||||
1.
|
||||
功能描述: 总结音视频内容
|
||||
调用函数: ConcludeAudioContent(txt, llm_kwargs)
|
||||
参数说明:
|
||||
txt: 音频文件的路径
|
||||
llm_kwargs: 模型参数, 永远给定None
|
||||
2.
|
||||
功能描述: 将每次对话记录写入Markdown格式的文件中
|
||||
调用函数: WriteMarkdown()
|
||||
3.
|
||||
功能描述: 将指定目录下的PDF文件从英文翻译成中文
|
||||
调用函数: BatchTranslatePDFDocuments_MultiThreaded(txt, llm_kwargs)
|
||||
参数说明:
|
||||
txt: PDF文件所在的路径
|
||||
llm_kwargs: 模型参数, 永远给定None
|
||||
4.
|
||||
功能描述: 根据文本使用GPT模型生成相应的图像
|
||||
调用函数: ImageGeneration(txt, llm_kwargs)
|
||||
参数说明:
|
||||
txt: 图像生成所用到的提示文本
|
||||
llm_kwargs: 模型参数, 永远给定None
|
||||
5.
|
||||
功能描述: 对输入的word文档进行摘要生成
|
||||
调用函数: SummarizingWordDocuments(input_path, output_path)
|
||||
参数说明:
|
||||
input_path: 待处理的word文档路径
|
||||
output_path: 摘要生成后的文档路径
|
||||
|
||||
|
||||
You should always anwser with following format:
|
||||
----------------
|
||||
Code:
|
||||
```
|
||||
class AutoAcademic(object):
|
||||
def __init__(self):
|
||||
self.selected_function = "FILL_CORRECT_FUNCTION_HERE" # e.g., "GenerateImage"
|
||||
self.txt = "FILL_MAIN_PARAMETER_HERE" # e.g., "荷叶上的蜻蜓"
|
||||
self.llm_kwargs = None
|
||||
```
|
||||
Explanation:
|
||||
只有GenerateImage和生成图像相关, 因此选择GenerateImage函数。
|
||||
----------------
|
||||
|
||||
Now, this is my requirement:
|
||||
|
||||
"""
|
||||
def get_fn_lib():
|
||||
return {
|
||||
"BatchTranslatePDFDocuments_MultiThreaded": ("crazy_functions.批量翻译PDF文档_多线程", "批量翻译PDF文档"),
|
||||
"SummarizingWordDocuments": ("crazy_functions.总结word文档", "总结word文档"),
|
||||
"ImageGeneration": ("crazy_functions.图片生成", "图片生成"),
|
||||
"TranslateMarkdownFromEnglishToChinese": ("crazy_functions.批量Markdown翻译", "Markdown中译英"),
|
||||
"SummaryAudioVideo": ("crazy_functions.总结音视频", "总结音视频"),
|
||||
}
|
||||
|
||||
def inspect_dependency(chatbot, history):
|
||||
return True
|
||||
|
||||
def eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
import subprocess, sys, os, shutil, importlib
|
||||
|
||||
with open('gpt_log/void_terminal_runtime.py', 'w', encoding='utf8') as f:
|
||||
f.write(code)
|
||||
|
||||
try:
|
||||
AutoAcademic = getattr(importlib.import_module('gpt_log.void_terminal_runtime', 'AutoAcademic'), 'AutoAcademic')
|
||||
# importlib.reload(AutoAcademic)
|
||||
auto_dict = AutoAcademic()
|
||||
selected_function = auto_dict.selected_function
|
||||
txt = auto_dict.txt
|
||||
fp, fn = get_fn_lib()[selected_function]
|
||||
fn_plugin = getattr(importlib.import_module(fp, fn), fn)
|
||||
yield from fn_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
|
||||
except:
|
||||
from toolbox import trimmed_format_exc
|
||||
chatbot.append(["执行错误", f"\n```\n{trimmed_format_exc()}\n```\n"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
def get_code_block(reply):
|
||||
import re
|
||||
pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
|
||||
matches = re.findall(pattern, reply) # find all code blocks in text
|
||||
if len(matches) != 1:
|
||||
raise RuntimeError("GPT is not generating proper code.")
|
||||
return matches[0].strip('python') # code block
|
||||
|
||||
@CatchException
|
||||
def 终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数, 暂时没有用武之地
|
||||
chatbot 聊天显示框的句柄, 用于显示给用户
|
||||
history 聊天历史, 前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
# 清空历史, 以免输入溢出
|
||||
history = []
|
||||
|
||||
# 基本信息:功能、贡献者
|
||||
chatbot.append(["函数插件功能?", "根据自然语言执行插件命令, 作者: binary-husky, 插件初始化中 ..."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# # 尝试导入依赖, 如果缺少依赖, 则给出安装建议
|
||||
# dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面
|
||||
# if not dep_ok: return
|
||||
|
||||
# 输入
|
||||
i_say = prompt + txt
|
||||
# 开始
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=i_say, inputs_show_user=txt,
|
||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
||||
sys_prompt=""
|
||||
)
|
||||
|
||||
# 将代码转为动画
|
||||
code = get_code_block(gpt_say)
|
||||
yield from eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
|
||||
在新工单中引用
屏蔽一个用户