镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
Dev/aibot/bug fix (#2086)
* 添加为windows的环境打包以及一键启动脚本 (#2068) * 新增自动打包windows下的环境依赖 --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> * update requirements * update readme * idor-vuln-bug-fix * vuln-bug-fix: validate file size, default 500M * add tts test * remove welcome card when layout overflows --------- Co-authored-by: Menghuan <menghuan2003@outlook.com> Co-authored-by: binary-husky <qingxu.fu@outlook.com> Co-authored-by: aibot <hangyuntang@qq.com>
这个提交包含在:
@@ -172,7 +172,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
||||
user_request 当前用户的请求信息(IP地址等)
|
||||
"""
|
||||
from crazy_functions.crazy_utils import get_files_from_everything
|
||||
success, file_manifest, _ = get_files_from_everything(txt, type='.html')
|
||||
success, file_manifest, _ = get_files_from_everything(txt, type='.html',chatbot=chatbot)
|
||||
|
||||
if not success:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from shared_utils.fastapi_server import validate_path_safety
|
||||
from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
|
||||
from toolbox import CatchException, report_exception, write_history_to_file, zip_folder
|
||||
from loguru import logger
|
||||
@@ -155,6 +156,7 @@ def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
@@ -193,6 +195,7 @@ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
@@ -229,6 +232,7 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import glob, shutil, os, re
|
||||
from loguru import logger
|
||||
from shared_utils.fastapi_server import validate_path_safety
|
||||
from toolbox import update_ui, trimmed_format_exc, gen_time_str
|
||||
from toolbox import CatchException, report_exception, get_log_folder
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
@@ -118,7 +119,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
def get_files_from_everything(txt, preference=''):
|
||||
def get_files_from_everything(txt, preference='', chatbox=None):
|
||||
if txt == "": return False, None, None
|
||||
success = True
|
||||
if txt.startswith('http'):
|
||||
@@ -146,9 +147,11 @@ def get_files_from_everything(txt, preference=''):
|
||||
# 直接给定文件
|
||||
file_manifest = [txt]
|
||||
project_folder = os.path.dirname(txt)
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
elif os.path.exists(txt):
|
||||
# 本地路径,递归搜索
|
||||
project_folder = txt
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)]
|
||||
else:
|
||||
project_folder = None
|
||||
@@ -177,7 +180,7 @@ def Markdown英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
return
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, preference="Github")
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, preference="Github", chatbox=chatbot)
|
||||
|
||||
if not success:
|
||||
# 什么都没有
|
||||
|
||||
@@ -26,7 +26,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf', chatbot=chatbot)
|
||||
|
||||
# 检测输入参数,如没有给定输入参数,直接退出
|
||||
if (not success) and txt == "": txt = '空空如也的输入栏。提示:请先上传文件(把PDF文件拖入对话)。'
|
||||
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
import threading
|
||||
from loguru import logger
|
||||
from shared_utils.char_visual_effect import scolling_visual_effect
|
||||
from shared_utils.fastapi_server import validate_path_safety
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
|
||||
|
||||
def input_clipping(inputs, history, max_token_limit, return_clip_flags=False):
|
||||
@@ -539,7 +540,7 @@ def read_and_clean_pdf_text(fp):
|
||||
return meta_txt, page_one_meta
|
||||
|
||||
|
||||
def get_files_from_everything(txt, type): # type='.md'
|
||||
def get_files_from_everything(txt, type, chatbot=None): # type='.md'
|
||||
"""
|
||||
这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
|
||||
下面是对每个参数和返回值的说明:
|
||||
@@ -551,6 +552,7 @@ def get_files_from_everything(txt, type): # type='.md'
|
||||
- file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
|
||||
- project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
|
||||
该函数详细注释已添加,请确认是否满足您的需要。
|
||||
- chatbot 带Cookies的Chatbot类,为实现更多强大的功能做基础
|
||||
"""
|
||||
import glob, os
|
||||
|
||||
@@ -573,9 +575,13 @@ def get_files_from_everything(txt, type): # type='.md'
|
||||
# 直接给定文件
|
||||
file_manifest = [txt]
|
||||
project_folder = os.path.dirname(txt)
|
||||
if chatbot is not None:
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
elif os.path.exists(txt):
|
||||
# 本地路径,递归搜索
|
||||
project_folder = txt
|
||||
if chatbot is not None:
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
success = False
|
||||
|
||||
@@ -242,9 +242,7 @@ def 解析PDF_DOC2X_单文件(
|
||||
extract_archive(file_path=this_file_path, dest_dir=ex_folder)
|
||||
|
||||
# edit markdown files
|
||||
success, file_manifest, project_folder = get_files_from_everything(
|
||||
ex_folder, type=".md"
|
||||
)
|
||||
success, file_manifest, project_folder = get_files_from_everything(ex_folder, type='.md', chatbot=chatbot)
|
||||
for generated_fp in file_manifest:
|
||||
# 修正一些公式问题
|
||||
with open(generated_fp, "r", encoding="utf8") as f:
|
||||
|
||||
@@ -27,10 +27,10 @@ def extract_text_from_files(txt, chatbot, history):
|
||||
return False, final_result, page_one, file_manifest, excption #如输入区内容不是文件则直接返回输入区内容
|
||||
|
||||
#查找输入区内容中的文件
|
||||
file_pdf,pdf_manifest,folder_pdf = get_files_from_everything(txt, '.pdf')
|
||||
file_md,md_manifest,folder_md = get_files_from_everything(txt, '.md')
|
||||
file_word,word_manifest,folder_word = get_files_from_everything(txt, '.docx')
|
||||
file_doc,doc_manifest,folder_doc = get_files_from_everything(txt, '.doc')
|
||||
file_pdf,pdf_manifest,folder_pdf = get_files_from_everything(txt, '.pdf', chatbot=chatbot)
|
||||
file_md,md_manifest,folder_md = get_files_from_everything(txt, '.md', chatbot=chatbot)
|
||||
file_word,word_manifest,folder_word = get_files_from_everything(txt, '.docx', chatbot=chatbot)
|
||||
file_doc,doc_manifest,folder_doc = get_files_from_everything(txt, '.doc', chatbot=chatbot)
|
||||
|
||||
if file_doc:
|
||||
excption = "word"
|
||||
|
||||
@@ -104,6 +104,8 @@ def 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr
|
||||
# 检测输入参数,如没有给定输入参数,直接退出
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
from shared_utils.fastapi_server import validate_path_safety
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
||||
|
||||
@@ -61,7 +61,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
history = []
|
||||
|
||||
from crazy_functions.crazy_utils import get_files_from_everything
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf', chatbot=chatbot)
|
||||
if len(file_manifest) > 0:
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
try:
|
||||
@@ -73,7 +73,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade nougat-ocr tiktoken```。")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
success_mmd, file_manifest_mmd, _ = get_files_from_everything(txt, type='.mmd')
|
||||
success_mmd, file_manifest_mmd, _ = get_files_from_everything(txt, type='.mmd', chatbot=chatbot)
|
||||
success = success or success_mmd
|
||||
file_manifest += file_manifest_mmd
|
||||
chatbot.append(["文件列表:", ", ".join([e.split('/')[-1] for e in file_manifest])]);
|
||||
|
||||
@@ -87,6 +87,8 @@ def 理解PDF文档内容标准文件输入(txt, llm_kwargs, plugin_kwargs, chat
|
||||
# 检测输入参数,如没有给定输入参数,直接退出
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
from shared_utils.fastapi_server import validate_path_safety
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
else:
|
||||
if txt == "":
|
||||
txt = '空空如也的输入栏'
|
||||
|
||||
@@ -39,6 +39,8 @@ def 批量生成函数注释(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
from shared_utils.fastapi_server import validate_path_safety
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
|
||||
@@ -49,7 +49,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
file_manifest = []
|
||||
spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
|
||||
for sp in spl:
|
||||
_, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
|
||||
_, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}', chatbot=chatbot)
|
||||
file_manifest += file_manifest_tmp
|
||||
|
||||
if len(file_manifest) == 0:
|
||||
|
||||
@@ -126,6 +126,8 @@ def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
|
||||
import os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
from shared_utils.fastapi_server import validate_path_safety
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
else:
|
||||
if txt == "":
|
||||
txt = '空空如也的输入栏'
|
||||
|
||||
@@ -48,6 +48,8 @@ def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
from shared_utils.fastapi_server import validate_path_safety
|
||||
validate_path_safety(project_folder, chatbot.get_user())
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
|
||||
@@ -51,7 +51,7 @@ def validate_path_safety(path_or_url, user):
|
||||
from toolbox import get_conf, default_user_name
|
||||
from toolbox import FriendlyException
|
||||
PATH_PRIVATE_UPLOAD, PATH_LOGGING = get_conf('PATH_PRIVATE_UPLOAD', 'PATH_LOGGING')
|
||||
sensitive_path = None
|
||||
sensitive_path = None # 必须不能包含 '/',即不能是多级路径
|
||||
path_or_url = os.path.relpath(path_or_url)
|
||||
if path_or_url.startswith(PATH_LOGGING): # 日志文件(按用户划分)
|
||||
sensitive_path = PATH_LOGGING
|
||||
|
||||
17
toolbox.py
17
toolbox.py
@@ -499,6 +499,22 @@ def to_markdown_tabs(head: list, tabs: list, alignment=":---:", column=False, om
|
||||
|
||||
return tabs_list
|
||||
|
||||
def validate_file_size(files, max_size_mb=500):
|
||||
"""
|
||||
验证文件大小是否在允许范围内。
|
||||
:param files: 文件的完整路径的列表
|
||||
:param max_size_mb: 最大文件大小,单位为MB(默认500MB)
|
||||
:return: True 如果文件大小有效,否则抛出异常
|
||||
"""
|
||||
# 获取文件大小(字节)
|
||||
total_size = 0
|
||||
max_size_bytes = max_size_mb * 1024 * 1024
|
||||
for file in files:
|
||||
total_size += os.path.getsize(file.name)
|
||||
if total_size > max_size_bytes:
|
||||
raise ValueError(f"File size exceeds the allowed limit of {max_size_mb} MB. "
|
||||
f"Current size: {total_size / (1024 * 1024):.2f} MB")
|
||||
return True
|
||||
|
||||
def on_file_uploaded(
|
||||
request: gradio.Request, files:List[str], chatbot:ChatBotWithCookies,
|
||||
@@ -510,6 +526,7 @@ def on_file_uploaded(
|
||||
if len(files) == 0:
|
||||
return chatbot, txt
|
||||
|
||||
validate_file_size(files, max_size_mb=500)
|
||||
# 创建工作路径
|
||||
user_name = default_user_name if not request.username else request.username
|
||||
time_tag = gen_time_str()
|
||||
|
||||
在新工单中引用
屏蔽一个用户