Merge branch 'master' into huggingfacelocal

这个提交包含在:
binary-husky
2023-09-15 17:19:59 +08:00
当前提交 6a56fb7477
共有 41 个文件被更改,包括 496 次插入534 次删除

查看文件

@@ -1,6 +1,7 @@
from collections.abc import Callable, Iterable, Mapping
from typing import Any
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, promote_file_to_downloadzone, clear_file_downloadzone
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc
from toolbox import promote_file_to_downloadzone, get_log_folder
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import input_clipping, try_install_deps
from multiprocessing import Process, Pipe
@@ -92,7 +93,7 @@ def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
def make_module(code):
module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
with open(f'gpt_log/{module_file}.py', 'w', encoding='utf8') as f:
with open(f'{get_log_folder()}/{module_file}.py', 'w', encoding='utf8') as f:
f.write(code)
def get_class_name(class_string):
@@ -102,7 +103,7 @@ def make_module(code):
return class_name
class_name = get_class_name(code)
return f"gpt_log.{module_file}->{class_name}"
return f"{get_log_folder().replace('/', '.')}.{module_file}->{class_name}"
def init_module_instance(module):
import importlib
@@ -171,7 +172,7 @@ def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history
file_type = file_path.split('.')[-1]
# 粗心检查
if 'private_upload' in txt:
if is_the_upload_folder(txt):
chatbot.append([
"...",
f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)"

查看文件

@@ -1,4 +1,4 @@
from toolbox import CatchException, update_ui, ProxyNetworkActivate
from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
@@ -15,7 +15,12 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append(("这是什么功能?", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
# < --------------------读取参数--------------- >
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
chatbot.append((f"向`{kai_id}`知识库中添加文件。", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# resolve deps
@@ -24,17 +29,12 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from .crazy_utils import knowledge_archive_interface
except Exception as e:
chatbot.append(
["依赖不足",
"导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]
)
chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
from .crazy_utils import try_install_deps
try_install_deps(['zh_langchain==0.2.1', 'pypinyin'])
# < --------------------读取参数--------------- >
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
return
# < --------------------读取文件--------------- >
file_manifest = []
@@ -84,19 +84,18 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
from .crazy_utils import try_install_deps
try_install_deps(['zh_langchain==0.2.1'])
try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
return
# < ------------------- --------------- >
kai = knowledge_archive_interface()
if 'langchain_plugin_embedding' in chatbot._cookies:
resp, prompt = kai.answer_with_archive_by_id(txt, chatbot._cookies['langchain_plugin_embedding'])
else:
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
resp, prompt = kai.answer_with_archive_by_id(txt, kai_id)
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
resp, prompt = kai.answer_with_archive_by_id(txt, kai_id)
chatbot.append((txt, '[Local Message] ' + prompt))
chatbot.append((txt, f'[知识库 {kai_id}] ' + prompt))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt, inputs_show_user=txt,

查看文件

@@ -1,5 +1,5 @@
from toolbox import update_ui, trimmed_format_exc
from toolbox import CatchException, report_execption, write_results_to_file, zip_folder
from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
from toolbox import CatchException, report_execption, write_history_to_file, zip_folder
class PaperFileGroup():
@@ -51,7 +51,7 @@ class PaperFileGroup():
import os, time
folder = os.path.dirname(self.file_paths[0])
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
zip_folder(folder, './gpt_log/', f'{t}-polished.zip')
zip_folder(folder, get_log_folder(), f'{t}-polished.zip')
def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
@@ -126,7 +126,9 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
# <-------- 整理结果,退出 ---------->
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name)
res = write_history_to_file(gpt_response_collection, file_basename=create_report_file_name)
promote_file_to_downloadzone(res, chatbot=chatbot)
history = gpt_response_collection
chatbot.append((f"{fp}完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@@ -137,7 +139,7 @@ def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"])
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky注意,此插件不调用Latex,如果有Latex环境,请使用“Latex英文纠错+高亮”插件)"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议

查看文件

@@ -1,5 +1,5 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import update_ui, promote_file_to_downloadzone
from toolbox import CatchException, report_execption, write_history_to_file
fast_debug = False
class PaperFileGroup():
@@ -95,7 +95,8 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
# <-------- 整理结果,退出 ---------->
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name)
res = write_history_to_file(gpt_response_collection, create_report_file_name)
promote_file_to_downloadzone(res, chatbot=chatbot)
history = gpt_response_collection
chatbot.append((f"{fp}完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

查看文件

@@ -1,4 +1,4 @@
from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone
from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone
from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
from functools import partial
import glob, os, requests, time
@@ -65,7 +65,7 @@ def move_project(project_folder, arxiv_id=None):
if arxiv_id is not None:
new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
else:
new_workfolder = f'gpt_log/{gen_time_str()}'
new_workfolder = f'{get_log_folder()}/{gen_time_str()}'
try:
shutil.rmtree(new_workfolder)
except:

查看文件

@@ -1,5 +1,7 @@
from toolbox import update_ui, get_conf, trimmed_format_exc
from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
import threading
import os
import logging
def input_clipping(inputs, history, max_token_limit):
import numpy as np
@@ -469,14 +471,16 @@ def read_and_clean_pdf_text(fp):
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
############################## <第 2 步,获取正文主字体> ##################################
fsize_statiscs = {}
for span in meta_span:
if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
fsize_statiscs[span[1]] += span[2]
main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
if REMOVE_FOOT_NOTE:
give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
try:
fsize_statiscs = {}
for span in meta_span:
if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
fsize_statiscs[span[1]] += span[2]
main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
if REMOVE_FOOT_NOTE:
give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
except:
raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}')
############################## <第 3 步,切分和重新整合> ##################################
mega_sec = []
sec = []
@@ -703,49 +707,96 @@ class knowledge_archive_interface():
)
self.threadLock.release()
return resp, prompt
@Singleton
class nougat_interface():
def __init__(self):
self.threadLock = threading.Lock()
def try_install_deps(deps):
def nougat_with_timeout(self, command, cwd, timeout=3600):
import subprocess
logging.info(f'正在执行命令 {command}')
process = subprocess.Popen(command, shell=True, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def NOUGAT_parse_pdf(self, fp, chatbot, history):
from toolbox import update_ui_lastest_msg
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
chatbot=chatbot, history=history, delay=0)
self.threadLock.acquire()
import glob, threading, os
from toolbox import get_log_folder, gen_time_str
dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
os.makedirs(dst)
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度正在加载NOUGAT... 提示首次运行需要花费较长时间下载NOUGAT参数",
chatbot=chatbot, history=history, delay=0)
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
res = glob.glob(os.path.join(dst,'*.mmd'))
if len(res) == 0:
self.threadLock.release()
raise RuntimeError("Nougat解析论文失败。")
self.threadLock.release()
return res[0]
def try_install_deps(deps, reload_m=[]):
import subprocess, sys, importlib
for dep in deps:
import subprocess, sys
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
import site
importlib.reload(site)
for m in reload_m:
importlib.reload(__import__(m))
class construct_html():
def __init__(self) -> None:
self.css = """
HTML_CSS = """
.row {
display: flex;
flex-wrap: wrap;
}
.column {
flex: 1;
padding: 10px;
}
.table-header {
font-weight: bold;
border-bottom: 1px solid black;
}
.table-row {
border-bottom: 1px solid lightgray;
}
.table-cell {
padding: 5px;
}
"""
self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
"""
def add_row(self, a, b):
tmp = """
TABLE_CSS = """
<div class="row table-row">
<div class="column table-cell">REPLACE_A</div>
<div class="column table-cell">REPLACE_B</div>
</div>
"""
"""
class construct_html():
def __init__(self) -> None:
self.css = HTML_CSS
self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
def add_row(self, a, b):
tmp = TABLE_CSS
from toolbox import markdown_convertion
tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
@@ -753,6 +804,6 @@ class construct_html():
def save_file(self, file_name):
with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
f.write(self.html_string.encode('utf-8', 'ignore').decode())
return os.path.join(get_log_folder(), file_name)

查看文件

@@ -1,4 +1,4 @@
from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
from .latex_toolbox import PRESERVE, TRANSFORM
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
@@ -363,7 +363,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
if mode!='translate_zh':
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
@@ -439,9 +439,9 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
trans = k
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
res = ch.save_file(create_report_file_name)
shutil.copyfile(res, pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=res, chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())

查看文件

@@ -256,6 +256,7 @@ def find_main_tex_file(file_manifest, mode):
canidates_score.append(0)
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
file_content = rm_comments(file_content)
for uw in unexpected_words:
if uw in file_content:
canidates_score[-1] -= 1
@@ -290,7 +291,11 @@ def find_tex_file_ignore_case(fp):
import glob
for f in glob.glob(dir_name+'/*.tex'):
base_name_s = os.path.basename(fp)
if base_name_s.lower() == base_name.lower(): return f
base_name_f = os.path.basename(f)
if base_name_s.lower() == base_name_f.lower(): return f
# 试着加上.tex后缀试试
if not base_name_s.endswith('.tex'): base_name_s+='.tex'
if base_name_s.lower() == base_name_f.lower(): return f
return None
def merge_tex_files_(project_foler, main_file, mode):
@@ -301,9 +306,9 @@ def merge_tex_files_(project_foler, main_file, mode):
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
f = s.group(1)
fp = os.path.join(project_foler, f)
fp = find_tex_file_ignore_case(fp)
if fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
fp_ = find_tex_file_ignore_case(fp)
if fp_:
with open(fp_, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
else:
raise RuntimeError(f'找不到{fp},Tex源文件缺失')
c = merge_tex_files_(project_foler, c, mode)
@@ -423,7 +428,7 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
def merge_pdfs(pdf1_path, pdf2_path, output_path):
import PyPDF2
Percent = 0.8
Percent = 0.95
# Open the first PDF file
with open(pdf1_path, 'rb') as pdf1_file:
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)

查看文件

@@ -1,5 +1,6 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file, get_conf
from toolbox import update_ui, get_log_folder
from toolbox import write_history_to_file, promote_file_to_downloadzone
from toolbox import CatchException, report_execption, get_conf
import re, requests, unicodedata, os
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
def download_arxiv_(url_pdf):
@@ -28,7 +29,7 @@ def download_arxiv_(url_pdf):
if k in other_info['comment']:
title = k + ' ' + title
download_dir = './gpt_log/arxiv/'
download_dir = get_log_folder(plugin_name='arxiv')
os.makedirs(download_dir, exist_ok=True)
title_str = title.replace('?', '')\
@@ -40,9 +41,6 @@ def download_arxiv_(url_pdf):
requests_pdf_url = url_pdf
file_path = download_dir+title_str
# if os.path.exists(file_path):
# print('返回缓存文件')
# return './gpt_log/arxiv/'+title_str
print('下载中')
proxies, = get_conf('proxies')
@@ -61,7 +59,7 @@ def download_arxiv_(url_pdf):
.replace('\n', '')\
.replace(' ', ' ')\
.replace(' ', ' ')
return './gpt_log/arxiv/'+title_str, other_info
return file_path, other_info
def get_name(_url_):
@@ -184,11 +182,10 @@ def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, hi
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
# 写入文件
import shutil
# 重置文件的创建时间
shutil.copyfile(pdf_path, f'./gpt_log/{os.path.basename(pdf_path)}'); os.remove(pdf_path)
res = write_results_to_file(history)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
promote_file_to_downloadzone(pdf_path, chatbot=chatbot)
chatbot.append(("完成了吗?", res + "\n\nPDF文件也已经下载"))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

查看文件

@@ -1,138 +0,0 @@
import threading
from request_llm.bridge_all import predict_no_ui_long_connection
from toolbox import update_ui
from toolbox import CatchException, write_results_to_file, report_execption
from .crazy_utils import breakdown_txt_to_satisfy_token_limit
def extract_code_block_carefully(txt):
splitted = txt.split('```')
n_code_block_seg = len(splitted) - 1
if n_code_block_seg <= 1: return txt
# 剩下的情况都开头除去 ``` 结尾除去一次 ```
txt_out = '```'.join(splitted[1:-1])
return txt_out
def break_txt_into_half_at_some_linebreak(txt):
lines = txt.split('\n')
n_lines = len(lines)
pre = lines[:(n_lines//2)]
post = lines[(n_lines//2):]
return "\n".join(pre), "\n".join(post)
@CatchException
def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt, web_port):
# 第1步清空历史,以免输入溢出
history = []
# 第2步尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_execption(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 第3步集合文件
import time, glob, os, shutil, re
os.makedirs('gpt_log/generated_english_version', exist_ok=True)
os.makedirs('gpt_log/generated_english_version/crazy_functions', exist_ok=True)
file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
[f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
# file_manifest = ['./toolbox.py']
i_say_show_user_buffer = []
# 第4步随便显示点什么防止卡顿的感觉
for index, fp in enumerate(file_manifest):
# if 'test_project' in fp: continue
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
i_say_show_user =f'[{index}/{len(file_manifest)}] 接下来请将以下代码中包含的所有中文转化为英文,只输出转化后的英文代码,请用代码块输出代码: {os.path.abspath(fp)}'
i_say_show_user_buffer.append(i_say_show_user)
chatbot.append((i_say_show_user, "[Local Message] 等待多线程操作,中间过程不予显示."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 第5步Token限制下的截断与处理
MAX_TOKEN = 3000
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
# 第6步任务函数
mutable_return = [None for _ in file_manifest]
observe_window = [[""] for _ in file_manifest]
def thread_worker(fp,index):
if index > 10:
time.sleep(60)
print('Openai 限制免费用户每分钟20次请求,降低请求频率中。')
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
i_say_template = lambda fp, file_content: f'接下来请将以下代码中包含的所有中文转化为英文,只输出代码,文件名是{fp},文件代码是 ```{file_content}```'
try:
gpt_say = ""
# 分解代码文件
file_content_breakdown = breakdown_txt_to_satisfy_token_limit(file_content, get_token_fn, MAX_TOKEN)
for file_content_partial in file_content_breakdown:
i_say = i_say_template(fp, file_content_partial)
# # ** gpt request **
gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=observe_window[index])
gpt_say_partial = extract_code_block_carefully(gpt_say_partial)
gpt_say += gpt_say_partial
mutable_return[index] = gpt_say
except ConnectionAbortedError as token_exceed_err:
print('至少一个线程任务Token溢出而失败', e)
except Exception as e:
print('至少一个线程任务意外失败', e)
# 第7步所有线程同时开始执行任务函数
handles = [threading.Thread(target=thread_worker, args=(fp,index)) for index, fp in enumerate(file_manifest)]
for h in handles:
h.daemon = True
h.start()
chatbot.append(('开始了吗?', f'多线程操作已经开始'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 第8步循环轮询各个线程是否执行完毕
cnt = 0
while True:
cnt += 1
time.sleep(0.2)
th_alive = [h.is_alive() for h in handles]
if not any(th_alive): break
# 更好的UI视觉效果
observe_win = []
for thread_index, alive in enumerate(th_alive):
observe_win.append("[ ..."+observe_window[thread_index][0][-60:].replace('\n','').replace('```','...').replace(' ','.').replace('<br/>','.....').replace('$','.')+"... ]")
stat = [f'执行中: {obs}\n\n' if alive else '已完成\n\n' for alive, obs in zip(th_alive, observe_win)]
stat_str = ''.join(stat)
chatbot[-1] = (chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt%10+1)))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 第9步把结果写入文件
for index, h in enumerate(handles):
h.join() # 这里其实不需要join了,肯定已经都结束了
fp = file_manifest[index]
gpt_say = mutable_return[index]
i_say_show_user = i_say_show_user_buffer[index]
where_to_relocate = f'gpt_log/generated_english_version/{fp}'
if gpt_say is not None:
with open(where_to_relocate, 'w+', encoding='utf-8') as f:
f.write(gpt_say)
else: # 失败
shutil.copyfile(file_manifest[index], where_to_relocate)
chatbot.append((i_say_show_user, f'[Local Message] 已完成{os.path.abspath(fp)}的转化,\n\n存入{os.path.abspath(where_to_relocate)}'))
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
time.sleep(1)
# 第10步备份一个文件
res = write_results_to_file(history)
chatbot.append(("生成一份任务执行报告", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

查看文件

@@ -1,4 +1,4 @@
from toolbox import CatchException, update_ui, get_conf, select_api_key
from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import datetime
@@ -33,7 +33,7 @@ def gen_image(llm_kwargs, prompt, resolution="256x256"):
raise RuntimeError(response.content.decode())
# 文件保存到本地
r = requests.get(image_url, proxies=proxies)
file_path = 'gpt_log/image_gen/'
file_path = f'{get_log_folder()}/image_gen/'
os.makedirs(file_path, exist_ok=True)
file_name = 'Image' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.png'
with open(file_path+file_name, 'wb+') as f: f.write(r.content)

查看文件

@@ -1,4 +1,4 @@
from toolbox import CatchException, update_ui, promote_file_to_downloadzone
from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import re
@@ -10,8 +10,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
import time
if file_name is None:
file_name = 'chatGPT对话历史' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
os.makedirs('./gpt_log/', exist_ok=True)
with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
fp = os.path.join(get_log_folder(), file_name)
with open(fp, 'w', encoding='utf8') as f:
from themes.theme import advanced_css
f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
for i, contents in enumerate(chatbot):
@@ -29,8 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
for h in history:
f.write("\n>>>" + h)
f.write('</code>')
promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot)
return '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}')
promote_file_to_downloadzone(fp, rename_file=file_name, chatbot=chatbot)
return '对话历史写入:' + fp
def gen_file_preview(file_name):
try:
@@ -106,7 +106,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
if not success:
if txt == "": txt = '空空如也的输入栏'
import glob
local_history = "<br/>".join(["`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`" for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True)])
local_history = "<br/>".join(["`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`" for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True)])
chatbot.append([f"正在查找对话历史文件html格式: {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
@@ -132,8 +132,8 @@ def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot
"""
import glob, os
local_history = "<br/>".join(["`"+hide_cwd(f)+"`" for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True)])
for f in glob.glob(f'gpt_log/**/chatGPT对话历史*.html', recursive=True):
local_history = "<br/>".join(["`"+hide_cwd(f)+"`" for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True)])
for f in glob.glob(f'{get_log_folder()}/**/chatGPT对话历史*.html', recursive=True):
os.remove(f)
chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

查看文件

@@ -1,5 +1,6 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import CatchException, report_execption
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
@@ -71,11 +72,13 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
history.extend([i_say,gpt_say])
this_paper_history.extend([i_say,gpt_say])
res = write_results_to_file(history)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
res = write_results_to_file(history)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("所有文件都总结完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

查看文件

@@ -1,5 +1,6 @@
from toolbox import CatchException, report_execption, select_api_key, update_ui, write_results_to_file, get_conf
from toolbox import CatchException, report_execption, select_api_key, update_ui, get_conf
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import write_history_to_file, promote_file_to_downloadzone, get_log_folder
def split_audio_file(filename, split_duration=1000):
"""
@@ -15,7 +16,7 @@ def split_audio_file(filename, split_duration=1000):
"""
from moviepy.editor import AudioFileClip
import os
os.makedirs('gpt_log/mp3/cut/', exist_ok=True) # 创建存储切割音频的文件夹
os.makedirs(f"{get_log_folder(plugin_name='audio')}/mp3/cut/", exist_ok=True) # 创建存储切割音频的文件夹
# 读取音频文件
audio = AudioFileClip(filename)
@@ -31,8 +32,8 @@ def split_audio_file(filename, split_duration=1000):
start_time = split_points[i]
end_time = split_points[i + 1]
split_audio = audio.subclip(start_time, end_time)
split_audio.write_audiofile(f"gpt_log/mp3/cut/{filename[0]}_{i}.mp3")
filelist.append(f"gpt_log/mp3/cut/{filename[0]}_{i}.mp3")
split_audio.write_audiofile(f"{get_log_folder(plugin_name='audio')}/mp3/cut/{filename[0]}_{i}.mp3")
filelist.append(f"{get_log_folder(plugin_name='audio')}/mp3/cut/{filename[0]}_{i}.mp3")
audio.close()
return filelist
@@ -52,7 +53,7 @@ def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
'Authorization': f"Bearer {api_key}"
}
os.makedirs('gpt_log/mp3/', exist_ok=True)
os.makedirs(f"{get_log_folder(plugin_name='audio')}/mp3/", exist_ok=True)
for index, fp in enumerate(file_manifest):
audio_history = []
# 提取文件扩展名
@@ -60,8 +61,8 @@ def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
# 提取视频中的音频
if ext not in [".mp3", ".wav", ".m4a", ".mpga"]:
audio_clip = AudioFileClip(fp)
audio_clip.write_audiofile(f'gpt_log/mp3/output{index}.mp3')
fp = f'gpt_log/mp3/output{index}.mp3'
audio_clip.write_audiofile(f"{get_log_folder(plugin_name='audio')}/mp3/output{index}.mp3")
fp = f"{get_log_folder(plugin_name='audio')}/mp3/output{index}.mp3"
# 调用whisper模型音频转文字
voice = split_audio_file(fp)
for j, i in enumerate(voice):
@@ -113,18 +114,19 @@ def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
history=audio_history,
sys_prompt="总结文章。"
)
history.extend([i_say, gpt_say])
audio_history.extend([i_say, gpt_say])
res = write_results_to_file(history)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append((f"{index + 1}段音频完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 删除中间文件夹
import shutil
shutil.rmtree('gpt_log/mp3')
res = write_results_to_file(history)
shutil.rmtree(f"{get_log_folder(plugin_name='audio')}/mp3")
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("所有音频都总结完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history)

查看文件

@@ -1,7 +1,7 @@
import glob, time, os, re
import glob, time, os, re, logging
from toolbox import update_ui, trimmed_format_exc, gen_time_str, disable_auto_promotion
from toolbox import CatchException, report_execption, write_history_to_file
from toolbox import promote_file_to_downloadzone, get_log_folder
from toolbox import CatchException, report_execption, get_log_folder
from toolbox import write_history_to_file, promote_file_to_downloadzone
fast_debug = False
class PaperFileGroup():
@@ -34,7 +34,7 @@ class PaperFileGroup():
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.md")
print('Segmentation: done')
logging.info('Segmentation: done')
def merge_result(self):
self.file_result = ["" for _ in range(len(self.file_paths))]
@@ -101,7 +101,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
pfg.merge_result()
pfg.write_result(language)
except:
print(trimmed_format_exc())
logging.error(trimmed_format_exc())
# <-------- 整理结果,退出 ---------->
create_report_file_name = gen_time_str() + f"-chatgpt.md"
@@ -121,7 +121,7 @@ def get_files_from_everything(txt, preference=''):
proxies, = get_conf('proxies')
# 网络的远程文件
if preference == 'Github':
print('正在从github下载资源 ...')
logging.info('正在从github下载资源 ...')
if not txt.endswith('.md'):
# Make a request to the GitHub API to retrieve the repository information
url = txt.replace("https://github.com/", "https://api.github.com/repos/") + '/readme'

查看文件

@@ -1,5 +1,6 @@
from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import CatchException, report_execption
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import read_and_clean_pdf_text
from .crazy_utils import input_clipping
@@ -99,8 +100,8 @@ do not have too much repetitive information, numerical values using the original
_, final_results = input_clipping("", final_results, max_token_limit=3200)
yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了
res = write_results_to_file(file_write_buffer, file_name=gen_time_str())
promote_file_to_downloadzone(res.split('\t')[-1], chatbot=chatbot)
res = write_history_to_file(file_write_buffer)
promote_file_to_downloadzone(res, chatbot=chatbot)
yield from update_ui(chatbot=chatbot, history=final_results) # 刷新界面

查看文件

@@ -1,6 +1,7 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import CatchException, report_execption
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from toolbox import write_history_to_file, promote_file_to_downloadzone
fast_debug = False
@@ -115,7 +116,8 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_results_to_file(history)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

查看文件

@@ -86,31 +86,8 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
# 开始正式执行任务
yield from 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
def nougat_with_timeout(command, cwd, timeout=3600):
import subprocess
process = subprocess.Popen(command, shell=True, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def NOUGAT_parse_pdf(fp):
import glob
from toolbox import get_log_folder, gen_time_str
dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
os.makedirs(dst)
nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd())
res = glob.glob(os.path.join(dst,'*.mmd'))
if len(res) == 0:
raise RuntimeError("Nougat解析论文失败。")
return res[0]
def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import copy
@@ -119,9 +96,11 @@ def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwa
generated_conclusion_files = []
generated_html_files = []
DST_LANG = "中文"
from crazy_functions.crazy_utils import nougat_interface, construct_html
nougat_handle = nougat_interface()
for index, fp in enumerate(file_manifest):
chatbot.append(["当前进度:", f"正在解析论文,请稍候。第一次运行时,需要花费较长时间下载NOUGAT参数"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
fpp = NOUGAT_parse_pdf(fp)
fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history)
with open(fpp, 'r', encoding='utf8') as f:
article_content = f.readlines()
@@ -222,50 +201,3 @@ def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwa
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
class construct_html():
def __init__(self) -> None:
self.css = """
.row {
display: flex;
flex-wrap: wrap;
}
.column {
flex: 1;
padding: 10px;
}
.table-header {
font-weight: bold;
border-bottom: 1px solid black;
}
.table-row {
border-bottom: 1px solid lightgray;
}
.table-cell {
padding: 5px;
}
"""
self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
def add_row(self, a, b):
tmp = """
<div class="row table-row">
<div class="column table-cell">REPLACE_A</div>
<div class="column table-cell">REPLACE_B</div>
</div>
"""
from toolbox import markdown_convertion
tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
self.html_string += tmp
def save_file(self, file_name):
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
f.write(self.html_string.encode('utf-8', 'ignore').decode())
return os.path.join(get_log_folder(), file_name)

查看文件

@@ -1,6 +1,6 @@
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import CatchException, report_execption, get_log_folder
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
from toolbox import write_history_to_file, get_log_folder
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import read_and_clean_pdf_text
@@ -63,6 +63,7 @@ def 解析PDF_基于GROBID(file_manifest, project_folder, llm_kwargs, plugin_kwa
generated_conclusion_files = []
generated_html_files = []
DST_LANG = "中文"
from crazy_functions.crazy_utils import construct_html
for index, fp in enumerate(file_manifest):
chatbot.append(["当前进度:", f"正在连接GROBID服务,请稍候: {grobid_url}\n如果等待时间过长,请修改config中的GROBID_URL,可修改成本地GROBID服务。"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
article_dict = parse_pdf(fp, grobid_url)
@@ -166,6 +167,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
TOKEN_LIMIT_PER_FRAGMENT = 1280
generated_conclusion_files = []
generated_html_files = []
from crazy_functions.crazy_utils import construct_html
for index, fp in enumerate(file_manifest):
# 读取PDF文件
file_content, page_one = read_and_clean_pdf_text(fp)
@@ -216,10 +218,11 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
final = ["一、论文概况\n\n---\n\n", paper_meta_info.replace('# ', '### ') + '\n\n---\n\n', "二、论文翻译", ""]
final.extend(gpt_response_collection_md)
create_report_file_name = f"{os.path.basename(fp)}.trans.md"
res = write_results_to_file(final, file_name=create_report_file_name)
res = write_history_to_file(final, create_report_file_name)
promote_file_to_downloadzone(res, chatbot=chatbot)
# 更新UI
generated_conclusion_files.append(f'./gpt_log/{create_report_file_name}')
generated_conclusion_files.append(f'{get_log_folder()}/{create_report_file_name}')
chatbot.append((f"{fp}完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@@ -261,49 +264,3 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
class construct_html():
def __init__(self) -> None:
self.css = """
.row {
display: flex;
flex-wrap: wrap;
}
.column {
flex: 1;
padding: 10px;
}
.table-header {
font-weight: bold;
border-bottom: 1px solid black;
}
.table-row {
border-bottom: 1px solid lightgray;
}
.table-cell {
padding: 5px;
}
"""
self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
def add_row(self, a, b):
tmp = """
<div class="row table-row">
<div class="column table-cell">REPLACE_A</div>
<div class="column table-cell">REPLACE_B</div>
</div>
"""
from toolbox import markdown_convertion
tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
self.html_string += tmp
def save_file(self, file_name):
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
f.write(self.html_string.encode('utf-8', 'ignore').decode())
return os.path.join(get_log_folder(), file_name)

查看文件

@@ -1,5 +1,6 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import CatchException, report_execption
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
@@ -27,7 +28,8 @@ def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
if not fast_debug: time.sleep(2)
if not fast_debug:
res = write_results_to_file(history)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

查看文件

@@ -25,11 +25,12 @@ explain_msg = """
1. 请用**自然语言**描述您需要做什么。例如:
- 「请调用插件,为我翻译PDF论文,论文我刚刚放到上传区了」
- 「请调用插件翻译PDF论文,地址为https://aaa/bbb/ccc.pdf
- 「请调用插件翻译PDF论文,地址为https://openreview.net/pdf?id=rJl0r3R9KX
- 「把Arxiv论文翻译成中文PDF,arxiv论文的ID是1812.10695,记得用插件!」
- 「生成一张图片,图中鲜花怒放,绿草如茵,用插件实现」
- 「用插件翻译README,Github网址是https://github.com/facebookresearch/co-tracker」
- 「我不喜欢当前的界面颜色,修改配置,把主题THEME更换为THEME="High-Contrast"
- 「请调用插件,解析python源代码项目,代码我刚刚打包拖到上传区了」
- 「请问Transformer网络的结构是怎样的?」
2. 您可以打开插件下拉菜单以了解本项目的各种能力。
@@ -45,7 +46,7 @@ explain_msg = """
from pydantic import BaseModel, Field
from typing import List
from toolbox import CatchException, update_ui, gen_time_str
from toolbox import CatchException, update_ui, is_the_upload_folder
from toolbox import update_ui_lastest_msg, disable_auto_promotion
from request_llm.bridge_all import predict_no_ui_long_connection
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
@@ -111,7 +112,7 @@ def 虚空终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
# 用简单的关键词检测用户意图
is_certain, _ = analyze_intention_with_simple_rules(txt)
if txt.startswith('private_upload/') and len(txt) == 34:
if is_the_upload_folder(txt):
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=False)
appendix_msg = "\n\n**很好,您已经上传了文件**,现在请您描述您的需求。"

查看文件

@@ -1,5 +1,6 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import CatchException, report_execption
from toolbox import write_history_to_file, promote_file_to_downloadzone
fast_debug = True
@@ -110,7 +111,8 @@ def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------- 写入文件,退出 ---------->
res = write_results_to_file(history)
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

查看文件

@@ -1,12 +1,13 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import update_ui, promote_file_to_downloadzone, disable_auto_promotion
from toolbox import CatchException, report_execption, write_history_to_file
from .crazy_utils import input_clipping
def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import os, copy
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
msg = '正常'
disable_auto_promotion(chatbot=chatbot)
summary_batch_isolation = True
inputs_array = []
inputs_show_user_array = []
@@ -22,7 +23,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
file_content = f.read()
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {fp}'
# 装载请求内容
inputs_array.append(i_say)
inputs_show_user_array.append(i_say_show_user)
@@ -43,7 +44,8 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
# 全部文件解析完成,结果写入文件,准备对工程源代码进行汇总分析
report_part_1 = copy.deepcopy(gpt_response_collection)
history_to_return = report_part_1
res = write_results_to_file(report_part_1)
res = write_history_to_file(report_part_1)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
@@ -97,7 +99,8 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
############################## <END> ##################################
history_to_return.extend(report_part_2)
res = write_results_to_file(history_to_return)
res = write_history_to_file(history_to_return)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
@@ -106,9 +109,8 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
def 解析项目本身(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
history = [] # 清空历史,以免输入溢出
import glob
file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
[f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]+ \
[f for f in glob.glob('./request_llm/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
file_manifest = [f for f in glob.glob('./*.py')] + \
[f for f in glob.glob('./*/*.py')]
project_folder = './'
if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")

查看文件

@@ -1,7 +1,7 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import CatchException, report_execption
from toolbox import write_history_to_file, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
@@ -17,32 +17,29 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
if not fast_debug: time.sleep(2)
msg = '正常'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
time.sleep(2)
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt=system_prompt) # 带超时倒计时
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_results_to_file(history)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_history_to_file(history)
promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

查看文件

@@ -2,8 +2,8 @@
# @Time : 2023/4/19
# @Author : Spike
# @Descr :
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file, get_log_folder
from toolbox import update_ui, get_conf
from toolbox import CatchException
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
@@ -30,14 +30,13 @@ def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
@CatchException
def 清除缓存(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
chatbot.append(['清除本地缓存数据', '执行中. 删除 gpt_log & private_upload'])
chatbot.append(['清除本地缓存数据', '执行中. 删除数据'])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
import shutil, os
gpt_log_dir = os.path.join(os.path.dirname(__file__), '..', 'gpt_log')
private_upload_dir = os.path.join(os.path.dirname(__file__), '..', 'private_upload')
shutil.rmtree(gpt_log_dir, ignore_errors=True)
shutil.rmtree(private_upload_dir, ignore_errors=True)
PATH_PRIVATE_UPLOAD, PATH_LOGGING = get_conf('PATH_PRIVATE_UPLOAD', 'PATH_LOGGING')
shutil.rmtree(PATH_LOGGING, ignore_errors=True)
shutil.rmtree(PATH_PRIVATE_UPLOAD, ignore_errors=True)
chatbot.append(['清除本地缓存数据', '执行完成'])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面