镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-07 06:56:48 +00:00
比较提交
12 次代码提交
version3.4
...
version3.4
| 作者 | SHA1 | 提交日期 | |
|---|---|---|---|
|
|
37172906ef | ||
|
|
3b78e0538b | ||
|
|
d8f9ac71d0 | ||
|
|
aced272d3c | ||
|
|
aff77a086d | ||
|
|
49253c4dc6 | ||
|
|
1a00093015 | ||
|
|
64f76e7401 | ||
|
|
eb4c07997e | ||
|
|
99cf7205c3 | ||
|
|
d684b4cdb3 | ||
|
|
4290821a50 |
@@ -226,12 +226,20 @@ def get_crazy_functions():
|
|||||||
try:
|
try:
|
||||||
from crazy_functions.联网的ChatGPT import 连接网络回答问题
|
from crazy_functions.联网的ChatGPT import 连接网络回答问题
|
||||||
function_plugins.update({
|
function_plugins.update({
|
||||||
"连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌)": {
|
"连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
|
||||||
"Color": "stop",
|
"Color": "stop",
|
||||||
"AsButton": False, # 加入下拉菜单中
|
"AsButton": False, # 加入下拉菜单中
|
||||||
"Function": HotReload(连接网络回答问题)
|
"Function": HotReload(连接网络回答问题)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
|
||||||
|
function_plugins.update({
|
||||||
|
"连接网络回答问题(中文Bing版,输入问题后点击该插件)": {
|
||||||
|
"Color": "stop",
|
||||||
|
"AsButton": False, # 加入下拉菜单中
|
||||||
|
"Function": HotReload(连接bing搜索回答问题)
|
||||||
|
}
|
||||||
|
})
|
||||||
except:
|
except:
|
||||||
print('Load function plugin failed')
|
print('Load function plugin failed')
|
||||||
|
|
||||||
@@ -348,17 +356,28 @@ def get_crazy_functions():
|
|||||||
try:
|
try:
|
||||||
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
|
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
|
||||||
function_plugins.update({
|
function_plugins.update({
|
||||||
"[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": {
|
"Latex英文纠错+高亮修正位置 [需Latex]": {
|
||||||
"Color": "stop",
|
"Color": "stop",
|
||||||
"AsButton": False,
|
"AsButton": False,
|
||||||
# "AdvancedArgs": True,
|
"AdvancedArgs": True,
|
||||||
# "ArgsReminder": "",
|
"ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。",
|
||||||
"Function": HotReload(Latex英文纠错加PDF对比)
|
"Function": HotReload(Latex英文纠错加PDF对比)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
|
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
|
||||||
function_plugins.update({
|
function_plugins.update({
|
||||||
"Arixv翻译(输入arxivID) [需Latex]": {
|
"Arixv翻译(输入arxivID)[需Latex]": {
|
||||||
|
"Color": "stop",
|
||||||
|
"AsButton": False,
|
||||||
|
"AdvancedArgs": True,
|
||||||
|
"ArgsReminder":
|
||||||
|
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
|
||||||
|
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
|
||||||
|
"Function": HotReload(Latex翻译中文并重新编译PDF)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
function_plugins.update({
|
||||||
|
"本地论文翻译(上传Latex压缩包)[需Latex]": {
|
||||||
"Color": "stop",
|
"Color": "stop",
|
||||||
"AsButton": False,
|
"AsButton": False,
|
||||||
"AdvancedArgs": True,
|
"AdvancedArgs": True,
|
||||||
@@ -368,17 +387,6 @@ def get_crazy_functions():
|
|||||||
"Function": HotReload(Latex翻译中文并重新编译PDF)
|
"Function": HotReload(Latex翻译中文并重新编译PDF)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
# function_plugins.update({
|
|
||||||
# "本地论文翻译(上传Latex压缩包) [需Latex]": {
|
|
||||||
# "Color": "stop",
|
|
||||||
# "AsButton": False,
|
|
||||||
# "AdvancedArgs": True,
|
|
||||||
# "ArgsReminder":
|
|
||||||
# "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
|
|
||||||
# "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
|
|
||||||
# "Function": HotReload(Latex翻译中文并重新编译PDF)
|
|
||||||
# }
|
|
||||||
# })
|
|
||||||
except:
|
except:
|
||||||
print('Load function plugin failed')
|
print('Load function plugin failed')
|
||||||
|
|
||||||
|
|||||||
@@ -19,9 +19,9 @@ def switch_prompt(pfg, mode, more_requirement):
|
|||||||
- sys_prompt_array: A list of strings containing prompts for system prompts.
|
- sys_prompt_array: A list of strings containing prompts for system prompts.
|
||||||
"""
|
"""
|
||||||
n_split = len(pfg.sp_file_contents)
|
n_split = len(pfg.sp_file_contents)
|
||||||
if mode == 'proofread':
|
if mode == 'proofread_en':
|
||||||
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
|
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
|
||||||
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
|
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
|
||||||
r"Answer me only with the revised text:" +
|
r"Answer me only with the revised text:" +
|
||||||
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
||||||
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
|
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
|
||||||
@@ -70,6 +70,12 @@ def move_project(project_folder, arxiv_id=None):
|
|||||||
shutil.rmtree(new_workfolder)
|
shutil.rmtree(new_workfolder)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# align subfolder if there is a folder wrapper
|
||||||
|
items = glob.glob(pj(project_folder,'*'))
|
||||||
|
if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
|
||||||
|
if os.path.isdir(items[0]): project_folder = items[0]
|
||||||
|
|
||||||
shutil.copytree(src=project_folder, dst=new_workfolder)
|
shutil.copytree(src=project_folder, dst=new_workfolder)
|
||||||
return new_workfolder
|
return new_workfolder
|
||||||
|
|
||||||
@@ -141,7 +147,11 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
|
|||||||
chatbot.append([ "函数插件功能?",
|
chatbot.append([ "函数插件功能?",
|
||||||
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"])
|
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"])
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
|
# <-------------- more requirements ------------->
|
||||||
|
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||||
|
more_req = plugin_kwargs.get("advanced_arg", "")
|
||||||
|
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
||||||
|
|
||||||
# <-------------- check deps ------------->
|
# <-------------- check deps ------------->
|
||||||
try:
|
try:
|
||||||
@@ -180,13 +190,13 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
|
|||||||
|
|
||||||
|
|
||||||
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
|
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
|
||||||
if not os.path.exists(project_folder + '/merge_proofread.tex'):
|
if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
|
||||||
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||||
chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
|
chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
|
||||||
|
|
||||||
|
|
||||||
# <-------------- compile PDF ------------->
|
# <-------------- compile PDF ------------->
|
||||||
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread',
|
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en',
|
||||||
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
|
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
|
||||||
|
|
||||||
|
|
||||||
@@ -195,6 +205,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
|
|||||||
if success:
|
if success:
|
||||||
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
||||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||||
|
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||||
else:
|
else:
|
||||||
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
|
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
|
||||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||||
@@ -278,6 +289,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
|||||||
if success:
|
if success:
|
||||||
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
||||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||||
|
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||||
else:
|
else:
|
||||||
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
|
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
|
||||||
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
|
||||||
|
|||||||
@@ -190,10 +190,10 @@ def test_Latex():
|
|||||||
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
|
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
|
||||||
# txt = r"https://arxiv.org/abs/2002.09253"
|
# txt = r"https://arxiv.org/abs/2002.09253"
|
||||||
# txt = r"https://arxiv.org/abs/2306.07831"
|
# txt = r"https://arxiv.org/abs/2306.07831"
|
||||||
txt = r"https://arxiv.org/abs/2212.10156"
|
# txt = r"https://arxiv.org/abs/2212.10156"
|
||||||
# txt = r"https://arxiv.org/abs/2211.11559"
|
# txt = r"https://arxiv.org/abs/2211.11559"
|
||||||
# txt = r"https://arxiv.org/abs/2303.08774"
|
# txt = r"https://arxiv.org/abs/2303.08774"
|
||||||
# txt = r"https://arxiv.org/abs/2303.12712"
|
txt = r"https://arxiv.org/abs/2303.12712"
|
||||||
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
|
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -430,7 +430,7 @@ class LatexPaperSplit():
|
|||||||
"""
|
"""
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.nodes = None
|
self.nodes = None
|
||||||
self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
|
self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
|
||||||
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
|
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
|
||||||
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
|
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
|
||||||
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
|
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
|
||||||
@@ -532,11 +532,11 @@ class LatexPaperFileGroup():
|
|||||||
f.write(res)
|
f.write(res)
|
||||||
return manifest
|
return manifest
|
||||||
|
|
||||||
def write_html(sp_file_contents, sp_file_result, chatbot):
|
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
|
||||||
|
|
||||||
# write html
|
# write html
|
||||||
try:
|
try:
|
||||||
import copy
|
import shutil
|
||||||
from .crazy_utils import construct_html
|
from .crazy_utils import construct_html
|
||||||
from toolbox import gen_time_str
|
from toolbox import gen_time_str
|
||||||
ch = construct_html()
|
ch = construct_html()
|
||||||
@@ -554,6 +554,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot):
|
|||||||
ch.add_row(a=orig, b=trans)
|
ch.add_row(a=orig, b=trans)
|
||||||
create_report_file_name = f"{gen_time_str()}.trans.html"
|
create_report_file_name = f"{gen_time_str()}.trans.html"
|
||||||
ch.save_file(create_report_file_name)
|
ch.save_file(create_report_file_name)
|
||||||
|
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
|
||||||
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
|
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
|
||||||
except:
|
except:
|
||||||
from toolbox import trimmed_format_exc
|
from toolbox import trimmed_format_exc
|
||||||
@@ -634,7 +635,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
|
|||||||
pfg.get_token_num = None
|
pfg.get_token_num = None
|
||||||
objdump(pfg, file=pj(project_folder,'temp.pkl'))
|
objdump(pfg, file=pj(project_folder,'temp.pkl'))
|
||||||
|
|
||||||
write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)
|
write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
|
||||||
|
|
||||||
# <-------- 写出文件 ---------->
|
# <-------- 写出文件 ---------->
|
||||||
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
|
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
|
||||||
@@ -741,13 +742,15 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|||||||
results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
|
results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
|
||||||
yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
|
||||||
|
|
||||||
|
if diff_pdf_success:
|
||||||
|
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
|
||||||
|
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
||||||
if modified_pdf_success:
|
if modified_pdf_success:
|
||||||
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
os.chdir(current_dir)
|
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
|
||||||
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
|
|
||||||
if os.path.exists(pj(work_folder, '..', 'translation')):
|
if os.path.exists(pj(work_folder, '..', 'translation')):
|
||||||
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
|
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
|
||||||
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
|
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
||||||
return True # 成功啦
|
return True # 成功啦
|
||||||
else:
|
else:
|
||||||
if n_fix>=max_try: break
|
if n_fix>=max_try: break
|
||||||
|
|||||||
@@ -13,7 +13,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
|||||||
# 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割)
|
# 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割)
|
||||||
# 的长度必须小于 2500 个 Token
|
# 的长度必须小于 2500 个 Token
|
||||||
file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF
|
file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF
|
||||||
|
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
||||||
|
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
||||||
|
|
||||||
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
||||||
|
|
||||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||||
|
|||||||
102
crazy_functions/联网的ChatGPT_bing版.py
普通文件
102
crazy_functions/联网的ChatGPT_bing版.py
普通文件
@@ -0,0 +1,102 @@
|
|||||||
|
from toolbox import CatchException, update_ui
|
||||||
|
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from request_llm.bridge_all import model_info
|
||||||
|
|
||||||
|
|
||||||
|
def bing_search(query, proxies=None):
|
||||||
|
query = query
|
||||||
|
url = f"https://cn.bing.com/search?q={query}"
|
||||||
|
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
|
||||||
|
response = requests.get(url, headers=headers, proxies=proxies)
|
||||||
|
soup = BeautifulSoup(response.content, 'html.parser')
|
||||||
|
results = []
|
||||||
|
for g in soup.find_all('li', class_='b_algo'):
|
||||||
|
anchors = g.find_all('a')
|
||||||
|
if anchors:
|
||||||
|
link = anchors[0]['href']
|
||||||
|
if not link.startswith('http'):
|
||||||
|
continue
|
||||||
|
title = g.find('h2').text
|
||||||
|
item = {'title': title, 'link': link}
|
||||||
|
results.append(item)
|
||||||
|
|
||||||
|
for r in results:
|
||||||
|
print(r['link'])
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_text(url, proxies) -> str:
|
||||||
|
"""Scrape text from a webpage
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): The URL to scrape text from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The scraped text
|
||||||
|
"""
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
|
||||||
|
'Content-Type': 'text/plain',
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
|
||||||
|
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
|
||||||
|
except:
|
||||||
|
return "无法连接到该网页"
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
for script in soup(["script", "style"]):
|
||||||
|
script.extract()
|
||||||
|
text = soup.get_text()
|
||||||
|
lines = (line.strip() for line in text.splitlines())
|
||||||
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||||
|
text = "\n".join(chunk for chunk in chunks if chunk)
|
||||||
|
return text
|
||||||
|
|
||||||
|
@CatchException
|
||||||
|
def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||||
|
"""
|
||||||
|
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||||
|
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||||
|
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
||||||
|
chatbot 聊天显示框的句柄,用于显示给用户
|
||||||
|
history 聊天历史,前情提要
|
||||||
|
system_prompt 给gpt的静默提醒
|
||||||
|
web_port 当前软件运行的端口号
|
||||||
|
"""
|
||||||
|
history = [] # 清空历史,以免输入溢出
|
||||||
|
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
|
||||||
|
"[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||||
|
|
||||||
|
# ------------- < 第1步:爬取搜索引擎的结果 > -------------
|
||||||
|
from toolbox import get_conf
|
||||||
|
proxies, = get_conf('proxies')
|
||||||
|
urls = bing_search(txt, proxies)
|
||||||
|
history = []
|
||||||
|
|
||||||
|
# ------------- < 第2步:依次访问网页 > -------------
|
||||||
|
max_search_result = 8 # 最多收纳多少个网页的结果
|
||||||
|
for index, url in enumerate(urls[:max_search_result]):
|
||||||
|
res = scrape_text(url['link'], proxies)
|
||||||
|
history.extend([f"第{index}份搜索结果:", res])
|
||||||
|
chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||||
|
|
||||||
|
# ------------- < 第3步:ChatGPT综合 > -------------
|
||||||
|
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
|
||||||
|
i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
|
||||||
|
inputs=i_say,
|
||||||
|
history=history,
|
||||||
|
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
|
||||||
|
)
|
||||||
|
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||||
|
inputs=i_say, inputs_show_user=i_say,
|
||||||
|
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||||
|
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
|
||||||
|
)
|
||||||
|
chatbot[-1] = (i_say, gpt_say)
|
||||||
|
history.append(i_say);history.append(gpt_say)
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||||
|
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
from toolbox import CatchException, update_ui
|
from toolbox import CatchException, update_ui
|
||||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||||
import datetime
|
import datetime, re
|
||||||
|
|
||||||
@CatchException
|
@CatchException
|
||||||
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||||
"""
|
"""
|
||||||
@@ -18,12 +19,34 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|||||||
for i in range(5):
|
for i in range(5):
|
||||||
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
|
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
|
||||||
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
|
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
|
||||||
i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
|
i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?用中文列举两条,然后分别给出描述事件的两个英文单词。' + '当你给出关键词时,使用以下json格式:{"KeyWords":[EnglishKeyWord1,EnglishKeyWord2]}。'
|
||||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||||
inputs=i_say, inputs_show_user=i_say,
|
inputs=i_say, inputs_show_user=i_say,
|
||||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
||||||
sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
|
sys_prompt='输出格式示例:1908年,美国消防救援事业发展的“美国消防协会”成立。关键词:{"KeyWords":["Fire","American"]}。'
|
||||||
)
|
)
|
||||||
|
gpt_say = get_images(gpt_say)
|
||||||
chatbot[-1] = (i_say, gpt_say)
|
chatbot[-1] = (i_say, gpt_say)
|
||||||
history.append(i_say);history.append(gpt_say)
|
history.append(i_say);history.append(gpt_say)
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||||
|
|
||||||
|
|
||||||
|
def get_images(gpt_say):
|
||||||
|
def get_image_by_keyword(keyword):
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2)
|
||||||
|
for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"):
|
||||||
|
if "data-src" in image_element: break
|
||||||
|
return image_element["data-src"]
|
||||||
|
|
||||||
|
for keywords in re.findall('{"KeyWords":\[(.*?)\]}', gpt_say):
|
||||||
|
keywords = [n.strip('"') for n in keywords.split(',')]
|
||||||
|
try:
|
||||||
|
description = keywords[0]
|
||||||
|
url = get_image_by_keyword(keywords[0])
|
||||||
|
img_tag = f"\n\n"
|
||||||
|
gpt_say += img_tag
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return gpt_say
|
||||||
@@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot):
|
|||||||
else:
|
else:
|
||||||
report_files = find_recent_files('gpt_log')
|
report_files = find_recent_files('gpt_log')
|
||||||
if len(report_files) == 0:
|
if len(report_files) == 0:
|
||||||
return None, chatbot
|
return cookies, None, chatbot
|
||||||
# files.extend(report_files)
|
# files.extend(report_files)
|
||||||
file_links = ''
|
file_links = ''
|
||||||
for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
|
for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
|
||||||
|
|||||||
4
version
4
version
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"version": 3.41,
|
"version": 3.42,
|
||||||
"show_feature": true,
|
"show_feature": true,
|
||||||
"new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
|
"new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
|
||||||
}
|
}
|
||||||
|
|||||||
在新工单中引用
屏蔽一个用户