From 47cedde954f2c0a65446c4c110e83d909a1982ce Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 18 Jun 2024 10:18:33 +0000 Subject: [PATCH] fix security issue GHSA-3jrq-66fm-w7xr --- .gitignore | 4 +- crazy_functions/Conversation_To_File.py | 98 ++++++++++++++++-------- crazy_functions/Latex_Function.py | 6 ++ crazy_functions/解析项目源代码.py | 14 +++- shared_utils/advanced_markdown_format.py | 2 +- shared_utils/fastapi_server.py | 22 ++++++ toolbox.py | 21 ++++- 7 files changed, 132 insertions(+), 35 deletions(-) diff --git a/.gitignore b/.gitignore index 3a5e180c..4fb8a7df 100644 --- a/.gitignore +++ b/.gitignore @@ -153,4 +153,6 @@ media flagged request_llms/ChatGLM-6b-onnx-u8s8 .pre-commit-config.yaml -themes/common.js.min.*.js \ No newline at end of file +themes/common.js.min.*.js +test* +objdump* \ No newline at end of file diff --git a/crazy_functions/Conversation_To_File.py b/crazy_functions/Conversation_To_File.py index a077fcf4..972f4ac0 100644 --- a/crazy_functions/Conversation_To_File.py +++ b/crazy_functions/Conversation_To_File.py @@ -10,27 +10,61 @@ def write_chat_to_file(chatbot, history=None, file_name=None): """ import os import time + from themes.theme import advanced_css + if file_name is None: file_name = f_prefix + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html' fp = os.path.join(get_log_folder(get_user(chatbot), plugin_name='chat_history'), file_name) + with open(fp, 'w', encoding='utf8') as f: - from themes.theme import advanced_css - f.write(f'对话历史') + from textwrap import dedent + form = dedent(""" + 对话存档 + +
+
+
+ {CHAT_PREVIEW} +
+
+
对话(原始数据)
+ {HISTORY_PREVIEW} +
+
+
+ + """) + + qa_from = dedent(""" +
+
{QUESTION}
+
+
{ANSWER}
+
+ """) + + history_from = dedent(""" +
+
{ENTRY}
+
+ """) + CHAT_PREVIEW_BUF = "" for i, contents in enumerate(chatbot): - for j, content in enumerate(contents): - try: # 这个bug没找到触发条件,暂时先这样顶一下 - if type(content) != str: content = str(content) - except: - continue - f.write(content) - if j == 0: - f.write('
') - f.write('
\n\n') - f.write('
\n\n raw chat context:\n') - f.write('') + question, answer = contents[0], contents[1] + if question is None: question = "" + try: question = str(question) + except: question = "" + if answer is None: answer = "" + try: answer = str(answer) + except: answer = "" + CHAT_PREVIEW_BUF += qa_from.format(QUESTION=question, ANSWER=answer) + + HISTORY_PREVIEW_BUF = "" for h in history: - f.write("\n>>>" + h) - f.write('') + HISTORY_PREVIEW_BUF += history_from.format(ENTRY=h) + html_content = form.format(CHAT_PREVIEW=CHAT_PREVIEW_BUF, HISTORY_PREVIEW=HISTORY_PREVIEW_BUF, CSS=advanced_css) + f.write(html_content) + promote_file_to_downloadzone(fp, rename_file=file_name, chatbot=chatbot) return '对话历史写入:' + fp @@ -41,7 +75,7 @@ def gen_file_preview(file_name): # pattern to match the text between and pattern = re.compile(r'.*?', flags=re.DOTALL) file_content = re.sub(pattern, '', file_content) - html, history = file_content.split('
\n\n raw chat context:\n') + html, history = file_content.split('
\n\n 对话数据 (无渲染):\n') history = history.strip('') history = history.strip('') history = history.split("\n>>>") @@ -52,21 +86,25 @@ def gen_file_preview(file_name): def read_file_to_chat(chatbot, history, file_name): with open(file_name, 'r', encoding='utf8') as f: file_content = f.read() - # pattern to match the text between and - pattern = re.compile(r'.*?', flags=re.DOTALL) - file_content = re.sub(pattern, '', file_content) - html, history = file_content.split('
\n\n raw chat context:\n') - history = history.strip('') - history = history.strip('') - history = history.split("\n>>>") - history = list(filter(lambda x:x!="", history)) - html = html.split('
\n\n') - html = list(filter(lambda x:x!="", html)) + from bs4 import BeautifulSoup + soup = BeautifulSoup(file_content, 'lxml') + # 提取QaBox信息 chatbot.clear() - for i, h in enumerate(html): - i_say, gpt_say = h.split('
') - chatbot.append([i_say, gpt_say]) - chatbot.append([f"存档文件详情?", f"[Local Message] 载入对话{len(html)}条,上下文{len(history)}条。"]) + qa_box_list = [] + qa_boxes = soup.find_all("div", class_="QaBox") + for box in qa_boxes: + question = box.find("div", class_="Question").get_text(strip=False) + answer = box.find("div", class_="Answer").get_text(strip=False) + qa_box_list.append({"Question": question, "Answer": answer}) + chatbot.append([question, answer]) + # 提取historyBox信息 + history_box_list = [] + history_boxes = soup.find_all("div", class_="historyBox") + for box in history_boxes: + entry = box.find("div", class_="entry").get_text(strip=False) + history_box_list.append(entry) + history = history_box_list + chatbot.append([None, f"[Local Message] 载入对话{len(qa_box_list)}条,上下文{len(history)}条。"]) return chatbot, history @CatchException diff --git a/crazy_functions/Latex_Function.py b/crazy_functions/Latex_Function.py index bc018e34..f3c8ddb5 100644 --- a/crazy_functions/Latex_Function.py +++ b/crazy_functions/Latex_Function.py @@ -271,6 +271,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo project_folder = desend_to_extracted_folder_if_exist(project_folder) # <-------------- move latex project away from temp folder -------------> + from shared_utils.fastapi_server import validate_path_safety + validate_path_safety(project_folder, chatbot.get_user()) project_folder = move_project(project_folder, arxiv_id=None) # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> @@ -365,6 +367,8 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, project_folder = desend_to_extracted_folder_if_exist(project_folder) # <-------------- move latex project away from temp folder -------------> + from shared_utils.fastapi_server import validate_path_safety + validate_path_safety(project_folder, chatbot.get_user()) project_folder = move_project(project_folder, arxiv_id) # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> @@ -503,6 +507,8 @@ def PDF翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, h project_folder = desend_to_extracted_folder_if_exist(project_folder) # <-------------- move latex project away from temp folder -------------> + from shared_utils.fastapi_server import validate_path_safety + validate_path_safety(project_folder, chatbot.get_user()) project_folder = move_project(project_folder) # <-------------- set a hash tag for repeat-checking -------------> diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/解析项目源代码.py index e510a73b..65e78249 100644 --- a/crazy_functions/解析项目源代码.py +++ b/crazy_functions/解析项目源代码.py @@ -1,6 +1,7 @@ from toolbox import update_ui, promote_file_to_downloadzone, disable_auto_promotion from toolbox import CatchException, report_exception, write_history_to_file -from .crazy_utils import input_clipping +from shared_utils.fastapi_server import validate_path_safety +from crazy_functions.crazy_utils import input_clipping def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): import os, copy @@ -128,6 +129,7 @@ def 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") @@ -146,6 +148,7 @@ def 解析一个Matlab项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") @@ -164,6 +167,7 @@ def 解析一个C项目的头文件(txt, llm_kwargs, plugin_kwargs, chatbot, his import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") @@ -184,6 +188,7 @@ def 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") @@ -206,6 +211,7 @@ def 解析一个Java项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}") @@ -228,6 +234,7 @@ def 解析一个前端项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}") @@ -257,6 +264,7 @@ def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}") @@ -278,6 +286,7 @@ def 解析一个Rust项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}") @@ -298,6 +307,7 @@ def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") @@ -320,6 +330,7 @@ def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s import glob, os if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") @@ -357,6 +368,7 @@ def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys import glob, os, re if os.path.exists(txt): project_folder = txt + validate_path_safety(project_folder, chatbot.get_user()) else: if txt == "": txt = '空空如也的输入栏' report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") diff --git a/shared_utils/advanced_markdown_format.py b/shared_utils/advanced_markdown_format.py index 5674e1da..68783246 100644 --- a/shared_utils/advanced_markdown_format.py +++ b/shared_utils/advanced_markdown_format.py @@ -213,7 +213,7 @@ def markdown_convertion_for_file(txt): """ from themes.theme import advanced_css pre = f""" - 对话历史 + PDF文档翻译
diff --git a/shared_utils/fastapi_server.py b/shared_utils/fastapi_server.py index b30a5aaa..758722f7 100644 --- a/shared_utils/fastapi_server.py +++ b/shared_utils/fastapi_server.py @@ -47,6 +47,28 @@ queue cocurrent effectiveness import os, requests, threading, time import uvicorn +def validate_path_safety(path_or_url, user): + from toolbox import get_conf, default_user_name + from toolbox import FriendlyException + PATH_PRIVATE_UPLOAD, PATH_LOGGING = get_conf('PATH_PRIVATE_UPLOAD', 'PATH_LOGGING') + sensitive_path = None + path_or_url = os.path.relpath(path_or_url) + if path_or_url.startswith(PATH_LOGGING): # 日志文件(按用户划分) + sensitive_path = PATH_LOGGING + elif path_or_url.startswith(PATH_PRIVATE_UPLOAD): # 用户的上传目录(按用户划分) + sensitive_path = PATH_PRIVATE_UPLOAD + elif path_or_url.startswith('tests'): # 一个常用的测试目录 + return True + else: + raise FriendlyException(f"输入文件的路径 ({path_or_url}) 存在,但位置非法。请将文件上传后再执行该任务。") # return False + if sensitive_path: + allowed_users = [user, 'autogen', default_user_name] # three user path that can be accessed + for user_allowed in allowed_users: + if f"{os.sep}".join(path_or_url.split(os.sep)[:2]) == os.path.join(sensitive_path, user_allowed): + return True + raise FriendlyException(f"输入文件的路径 ({path_or_url}) 存在,但属于其他用户。请将文件上传后再执行该任务。") # return False + return True + def _authorize_user(path_or_url, request, gradio_app): from toolbox import get_conf, default_user_name PATH_PRIVATE_UPLOAD, PATH_LOGGING = get_conf('PATH_PRIVATE_UPLOAD', 'PATH_LOGGING') diff --git a/toolbox.py b/toolbox.py index d42e6075..4a7091f3 100644 --- a/toolbox.py +++ b/toolbox.py @@ -10,6 +10,7 @@ import glob import logging import uuid from functools import wraps +from textwrap import dedent from shared_utils.config_loader import get_conf from shared_utils.config_loader import set_conf from shared_utils.config_loader import set_multi_conf @@ -193,9 +194,20 @@ def trimmed_format_exc(): replace_path = "." return str.replace(current_path, replace_path) + def trimmed_format_exc_markdown(): return '\n\n```\n' + trimmed_format_exc() + '```' + +class FriendlyException(Exception): + def generate_error_html(self): + return dedent(f""" +
+ {"
".join(self.args)} +
+ """) + + def CatchException(f): """ 装饰器函数,捕捉函数f中的异常并封装到一个生成器中返回,并显示到聊天当中。 @@ -206,13 +218,18 @@ def CatchException(f): chatbot_with_cookie:ChatBotWithCookies, history:list, *args, **kwargs): try: yield from f(main_input, llm_kwargs, plugin_kwargs, chatbot_with_cookie, history, *args, **kwargs) + except FriendlyException as e: + if len(chatbot_with_cookie) == 0: + chatbot_with_cookie.clear() + chatbot_with_cookie.append(["插件调度异常", None]) + chatbot_with_cookie[-1] = [chatbot_with_cookie[-1][0], e.generate_error_html()] + yield from update_ui(chatbot=chatbot_with_cookie, history=history, msg=f'异常') # 刷新界面 except Exception as e: - from toolbox import get_conf tb_str = '```\n' + trimmed_format_exc() + '```' if len(chatbot_with_cookie) == 0: chatbot_with_cookie.clear() chatbot_with_cookie.append(["插件调度异常", "异常原因"]) - chatbot_with_cookie[-1] = (chatbot_with_cookie[-1][0], f"[Local Message] 插件调用出错: \n\n{tb_str} \n") + chatbot_with_cookie[-1] = [chatbot_with_cookie[-1][0], f"[Local Message] 插件调用出错: \n\n{tb_str} \n"] yield from update_ui(chatbot=chatbot_with_cookie, history=history, msg=f'异常 {e}') # 刷新界面 return decorated