比较提交

...

6 次代码提交

作者 SHA1 备注 提交日期
binary-husky
82e125d439 log user name during chat 2024-11-10 16:50:24 +00:00
binary-husky
197287fc30 Enhance archive extraction with error handling for tar and gzip formats 2024-11-09 10:10:46 +00:00
Bingchen Jiang
c37fcc9299 Adding support to new openai apikey format (#2030) 2024-11-09 13:41:19 +08:00
binary-husky
91f5e6b8f7 resolve pickle security issue 2024-11-04 13:49:49 +00:00
hcy2206
4f0851f703 增加了对于glm-4-plus的支持 (#2014)
* 增加对于讯飞星火大模型Spark4.0的支持

* Create github action sync.yml

* 增加对于智谱glm-4-plus的支持

* feat: change arxiv io param

* catch comment source code exception

* upgrade auto comment

* add security patch

---------

Co-authored-by: GH Action - Upstream Sync <action@github.com>
Co-authored-by: binary-husky <qingxu.fu@outlook.com>
2024-11-03 22:41:16 +08:00
binary-husky
2821f27756 add security patch 2024-11-03 14:34:17 +00:00
共有 17 个文件被更改,包括 82 次插入33 次删除

4
.gitignore vendored
查看文件

@@ -160,4 +160,6 @@ test.*
temp.* temp.*
objdump* objdump*
*.min.*.js *.min.*.js
TODO TODO
experimental_mods
search_results

查看文件

@@ -6,12 +6,16 @@ class SafeUnpickler(pickle.Unpickler):
def get_safe_classes(self): def get_safe_classes(self):
from crazy_functions.latex_fns.latex_actions import LatexPaperFileGroup, LatexPaperSplit from crazy_functions.latex_fns.latex_actions import LatexPaperFileGroup, LatexPaperSplit
from crazy_functions.latex_fns.latex_toolbox import LinkedListNode from crazy_functions.latex_fns.latex_toolbox import LinkedListNode
from numpy.core.multiarray import scalar
from numpy import dtype
# 定义允许的安全类 # 定义允许的安全类
safe_classes = { safe_classes = {
# 在这里添加其他安全的类 # 在这里添加其他安全的类
'LatexPaperFileGroup': LatexPaperFileGroup, 'LatexPaperFileGroup': LatexPaperFileGroup,
'LatexPaperSplit': LatexPaperSplit, 'LatexPaperSplit': LatexPaperSplit,
'LinkedListNode': LinkedListNode, 'LinkedListNode': LinkedListNode,
'scalar': scalar,
'dtype': dtype,
} }
return safe_classes return safe_classes
@@ -22,8 +26,6 @@ class SafeUnpickler(pickle.Unpickler):
for class_name in self.safe_classes.keys(): for class_name in self.safe_classes.keys():
if (class_name in f'{module}.{name}'): if (class_name in f'{module}.{name}'):
match_class_name = class_name match_class_name = class_name
if module == 'numpy' or module.startswith('numpy.'):
return super().find_class(module, name)
if match_class_name is not None: if match_class_name is not None:
return self.safe_classes[match_class_name] return self.safe_classes[match_class_name]
# 如果尝试加载未授权的类,则抛出异常 # 如果尝试加载未授权的类,则抛出异常

查看文件

@@ -385,6 +385,14 @@ model_info = {
"tokenizer": tokenizer_gpt35, "tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35, "token_cnt": get_token_num_gpt35,
}, },
"glm-4-plus":{
"fn_with_ui": zhipu_ui,
"fn_without_ui": zhipu_noui,
"endpoint": None,
"max_token": 10124 * 8,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
# api_2d (此后不需要在此处添加api2d的接口了,因为下面的代码会自动添加) # api_2d (此后不需要在此处添加api2d的接口了,因为下面的代码会自动添加)
"api2d-gpt-4": { "api2d-gpt-4": {

查看文件

@@ -341,7 +341,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
# 前者是API2D的结束条件,后者是OPENAI的结束条件 # 前者是API2D的结束条件,后者是OPENAI的结束条件
if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0): if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
# 判定为数据流的结束,gpt_replying_buffer也写完了 # 判定为数据流的结束,gpt_replying_buffer也写完了
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
break break
# 处理数据流的主体 # 处理数据流的主体
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}" status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
@@ -375,7 +375,7 @@ def handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history):
try: try:
chunkjson = json.loads(response.content.decode()) chunkjson = json.loads(response.content.decode())
gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"] gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
history[-1] = gpt_replying_buffer history[-1] = gpt_replying_buffer
chatbot[-1] = (history[-2], history[-1]) chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

查看文件

@@ -184,7 +184,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
# 判定为数据流的结束,gpt_replying_buffer也写完了 # 判定为数据流的结束,gpt_replying_buffer也写完了
lastmsg = chatbot[-1][-1] + f"\n\n\n\n{llm_kwargs['llm_model']}调用结束,该模型不具备上下文对话能力,如需追问,请及时切换模型。」" lastmsg = chatbot[-1][-1] + f"\n\n\n\n{llm_kwargs['llm_model']}调用结束,该模型不具备上下文对话能力,如需追问,请及时切换模型。」"
yield from update_ui_lastest_msg(lastmsg, chatbot, history, delay=1) yield from update_ui_lastest_msg(lastmsg, chatbot, history, delay=1)
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
break break
# 处理数据流的主体 # 处理数据流的主体
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}" status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"

查看文件

@@ -216,7 +216,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
if need_to_pass: if need_to_pass:
pass pass
elif is_last_chunk: elif is_last_chunk:
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
# logger.info(f'[response] {gpt_replying_buffer}') # logger.info(f'[response] {gpt_replying_buffer}')
break break
else: else:

查看文件

@@ -223,7 +223,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
chatbot[-1] = (history[-2], history[-1]) chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
if chunkjson['event_type'] == 'stream-end': if chunkjson['event_type'] == 'stream-end':
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
history[-1] = gpt_replying_buffer history[-1] = gpt_replying_buffer
chatbot[-1] = (history[-2], history[-1]) chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面

查看文件

@@ -109,7 +109,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
gpt_replying_buffer += paraphrase['text'] # 使用 json 解析库进行处理 gpt_replying_buffer += paraphrase['text'] # 使用 json 解析库进行处理
chatbot[-1] = (inputs, gpt_replying_buffer) chatbot[-1] = (inputs, gpt_replying_buffer)
history[-1] = gpt_replying_buffer history[-1] = gpt_replying_buffer
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)
if error_match: if error_match:
history = history[-2] # 错误的不纳入对话 history = history[-2] # 错误的不纳入对话

查看文件

@@ -166,7 +166,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
history = history[:-2] history = history[:-2]
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
break break
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_bro_result) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_bro_result, user_name=chatbot.get_user())
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None,
console_slience=False): console_slience=False):

查看文件

@@ -337,7 +337,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
# 前者是API2D的结束条件,后者是OPENAI的结束条件 # 前者是API2D的结束条件,后者是OPENAI的结束条件
if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0): if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
# 判定为数据流的结束,gpt_replying_buffer也写完了 # 判定为数据流的结束,gpt_replying_buffer也写完了
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
break break
# 处理数据流的主体 # 处理数据流的主体
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}" status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
@@ -371,7 +371,7 @@ def handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history):
try: try:
chunkjson = json.loads(response.content.decode()) chunkjson = json.loads(response.content.decode())
gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"] gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer, user_name=chatbot.get_user())
history[-1] = gpt_replying_buffer history[-1] = gpt_replying_buffer
chatbot[-1] = (history[-2], history[-1]) chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

查看文件

@@ -59,7 +59,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
chatbot[-1] = (inputs, response) chatbot[-1] = (inputs, response)
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response, user_name=chatbot.get_user())
# 总结输出 # 总结输出
if response == f"[Local Message] 等待{model_name}响应中 ...": if response == f"[Local Message] 等待{model_name}响应中 ...":
response = f"[Local Message] {model_name}响应异常 ..." response = f"[Local Message] {model_name}响应异常 ..."

查看文件

@@ -68,5 +68,5 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
chatbot[-1] = [inputs, response] chatbot[-1] = [inputs, response]
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)
history.extend([inputs, response]) history.extend([inputs, response])
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response, user_name=chatbot.get_user())
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)

查看文件

@@ -97,5 +97,5 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
chatbot[-1] = [inputs, response] chatbot[-1] = [inputs, response]
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)
history.extend([inputs, response]) history.extend([inputs, response])
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response) log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=response, user_name=chatbot.get_user())
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)

查看文件

@@ -138,7 +138,9 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
app_block.is_sagemaker = False app_block.is_sagemaker = False
gradio_app = App.create_app(app_block) gradio_app = App.create_app(app_block)
for route in list(gradio_app.router.routes):
if route.path == "/proxy={url_path:path}":
gradio_app.router.routes.remove(route)
# --- --- replace gradio endpoint to forbid access to sensitive files --- --- # --- --- replace gradio endpoint to forbid access to sensitive files --- ---
if len(AUTHENTICATION) > 0: if len(AUTHENTICATION) > 0:
dependencies = [] dependencies = []
@@ -154,9 +156,13 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
@gradio_app.head("/file={path_or_url:path}", dependencies=dependencies) @gradio_app.head("/file={path_or_url:path}", dependencies=dependencies)
@gradio_app.get("/file={path_or_url:path}", dependencies=dependencies) @gradio_app.get("/file={path_or_url:path}", dependencies=dependencies)
async def file(path_or_url: str, request: fastapi.Request): async def file(path_or_url: str, request: fastapi.Request):
if len(AUTHENTICATION) > 0: if not _authorize_user(path_or_url, request, gradio_app):
if not _authorize_user(path_or_url, request, gradio_app): return "越权访问!"
return "越权访问!" stripped = path_or_url.lstrip().lower()
if stripped.startswith("https://") or stripped.startswith("http://"):
return "账户密码授权模式下, 禁止链接!"
if '../' in stripped:
return "非法路径!"
return await endpoint(path_or_url, request) return await endpoint(path_or_url, request)
from fastapi import Request, status from fastapi import Request, status
@@ -167,6 +173,26 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
response.delete_cookie('access-token') response.delete_cookie('access-token')
response.delete_cookie('access-token-unsecure') response.delete_cookie('access-token-unsecure')
return response return response
else:
dependencies = []
endpoint = None
for route in list(gradio_app.router.routes):
if route.path == "/file/{path:path}":
gradio_app.router.routes.remove(route)
if route.path == "/file={path_or_url:path}":
dependencies = route.dependencies
endpoint = route.endpoint
gradio_app.router.routes.remove(route)
@gradio_app.get("/file/{path:path}", dependencies=dependencies)
@gradio_app.head("/file={path_or_url:path}", dependencies=dependencies)
@gradio_app.get("/file={path_or_url:path}", dependencies=dependencies)
async def file(path_or_url: str, request: fastapi.Request):
stripped = path_or_url.lstrip().lower()
if stripped.startswith("https://") or stripped.startswith("http://"):
return "账户密码授权模式下, 禁止链接!"
if '../' in stripped:
return "非法路径!"
return await endpoint(path_or_url, request)
# --- --- enable TTS (text-to-speech) functionality --- --- # --- --- enable TTS (text-to-speech) functionality --- ---
TTS_TYPE = get_conf("TTS_TYPE") TTS_TYPE = get_conf("TTS_TYPE")

查看文件

@@ -104,17 +104,27 @@ def extract_archive(file_path, dest_dir):
logger.info("Successfully extracted zip archive to {}".format(dest_dir)) logger.info("Successfully extracted zip archive to {}".format(dest_dir))
elif file_extension in [".tar", ".gz", ".bz2"]: elif file_extension in [".tar", ".gz", ".bz2"]:
with tarfile.open(file_path, "r:*") as tarobj: try:
# 清理提取路径,移除任何不安全的元素 with tarfile.open(file_path, "r:*") as tarobj:
for member in tarobj.getmembers(): # 清理提取路径,移除任何不安全的元素
member_path = os.path.normpath(member.name) for member in tarobj.getmembers():
full_path = os.path.join(dest_dir, member_path) member_path = os.path.normpath(member.name)
full_path = os.path.abspath(full_path) full_path = os.path.join(dest_dir, member_path)
if not full_path.startswith(os.path.abspath(dest_dir) + os.sep): full_path = os.path.abspath(full_path)
raise Exception(f"Attempted Path Traversal in {member.name}") if not full_path.startswith(os.path.abspath(dest_dir) + os.sep):
raise Exception(f"Attempted Path Traversal in {member.name}")
tarobj.extractall(path=dest_dir) tarobj.extractall(path=dest_dir)
logger.info("Successfully extracted tar archive to {}".format(dest_dir)) logger.info("Successfully extracted tar archive to {}".format(dest_dir))
except tarfile.ReadError as e:
if file_extension == ".gz":
# 一些特别奇葩的项目,是一个gz文件,里面不是tar,只有一个tex文件
import gzip
with gzip.open(file_path, 'rb') as f_in:
with open(os.path.join(dest_dir, 'main.tex'), 'wb') as f_out:
f_out.write(f_in.read())
else:
raise e
# 第三方库,需要预先pip install rarfile # 第三方库,需要预先pip install rarfile
# 此外,Windows上还需要安装winrar软件,配置其Path环境变量,如"C:\Program Files\WinRAR"才可以 # 此外,Windows上还需要安装winrar软件,配置其Path环境变量,如"C:\Program Files\WinRAR"才可以

查看文件

@@ -14,6 +14,7 @@ openai_regex = re.compile(
r"sk-[a-zA-Z0-9_-]{92}$|" + r"sk-[a-zA-Z0-9_-]{92}$|" +
r"sk-proj-[a-zA-Z0-9_-]{48}$|"+ r"sk-proj-[a-zA-Z0-9_-]{48}$|"+
r"sk-proj-[a-zA-Z0-9_-]{124}$|"+ r"sk-proj-[a-zA-Z0-9_-]{124}$|"+
r"sk-proj-[a-zA-Z0-9_-]{156}$|"+ #新版apikey位数不匹配故修改此正则表达式
r"sess-[a-zA-Z0-9]{40}$" r"sess-[a-zA-Z0-9]{40}$"
) )
def is_openai_api_key(key): def is_openai_api_key(key):

查看文件

@@ -1029,7 +1029,7 @@ def check_repeat_upload(new_pdf_path, pdf_hash):
# 如果所有页的内容都相同,返回 True # 如果所有页的内容都相同,返回 True
return False, None return False, None
def log_chat(llm_model: str, input_str: str, output_str: str): def log_chat(llm_model: str, input_str: str, output_str: str, user_name: str=default_user_name):
try: try:
if output_str and input_str and llm_model: if output_str and input_str and llm_model:
uid = str(uuid.uuid4().hex) uid = str(uuid.uuid4().hex)
@@ -1038,8 +1038,8 @@ def log_chat(llm_model: str, input_str: str, output_str: str):
logger.bind(chat_msg=True).info(dedent( logger.bind(chat_msg=True).info(dedent(
""" """
╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
[UID] [UID/USER]
{uid} {uid}/{user_name}
[Model] [Model]
{llm_model} {llm_model}
[Query] [Query]
@@ -1047,6 +1047,6 @@ def log_chat(llm_model: str, input_str: str, output_str: str):
[Response] [Response]
{output_str} {output_str}
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
""").format(uid=uid, llm_model=llm_model, input_str=input_str, output_str=output_str)) """).format(uid=uid, user_name=user_name, llm_model=llm_model, input_str=input_str, output_str=output_str))
except: except:
logger.error(trimmed_format_exc()) logger.error(trimmed_format_exc())