From aa341fd268a6276b3a617d9511d4c986ba69fa9c Mon Sep 17 00:00:00 2001 From: spike <2411123479@qq.com> Date: Wed, 15 Nov 2023 10:09:42 +0800 Subject: [PATCH 01/88] =?UTF-8?q?=E9=80=82=E9=85=8D=E6=98=9F=E7=81=AB?= =?UTF-8?q?=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=9B=BE=E7=89=87=E7=90=86=E8=A7=A3?= =?UTF-8?q?=20=E5=A2=9E=E5=8A=A0=E4=B8=8A=E4=BC=A0=E5=9B=BE=E7=89=87view?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- request_llms/bridge_chatgpt_vision.py | 21 +------ request_llms/com_sparkapi.py | 33 +++++++--- toolbox.py | 89 +++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 29 deletions(-) diff --git a/request_llms/bridge_chatgpt_vision.py b/request_llms/bridge_chatgpt_vision.py index e84bc0b7..0384a051 100644 --- a/request_llms/bridge_chatgpt_vision.py +++ b/request_llms/bridge_chatgpt_vision.py @@ -16,28 +16,13 @@ import base64 import os import glob -from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, update_ui_lastest_msg, get_max_token +from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \ get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY') timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \ '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。' -def have_any_recent_upload_image_files(chatbot): - _5min = 5 * 60 - if chatbot is None: return False, None # chatbot is None - most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None) - if not most_recent_uploaded: return False, None # most_recent_uploaded is None - if time.time() - most_recent_uploaded["time"] < _5min: - most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None) - path = most_recent_uploaded['path'] - file_manifest = [f for f in glob.glob(f'{path}/**/*.jpg', recursive=True)] - file_manifest += [f for f in glob.glob(f'{path}/**/*.jpeg', recursive=True)] - file_manifest += [f for f in glob.glob(f'{path}/**/*.png', recursive=True)] - if len(file_manifest) == 0: return False, None - return True, file_manifest # most_recent_uploaded is new - else: - return False, None # most_recent_uploaded is too old def report_invalid_key(key): if get_conf("BLOCK_INVALID_APIKEY"): @@ -258,10 +243,6 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}") return chatbot, history -# Function to encode the image -def encode_image(image_path): - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode('utf-8') def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths): """ diff --git a/request_llms/com_sparkapi.py b/request_llms/com_sparkapi.py index 5c1a3a44..1221de11 100644 --- a/request_llms/com_sparkapi.py +++ b/request_llms/com_sparkapi.py @@ -1,4 +1,4 @@ -from toolbox import get_conf +from toolbox import get_conf, get_pictures_list, encode_image import base64 import datetime import hashlib @@ -65,6 +65,7 @@ class SparkRequestInstance(): self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat" self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat" self.gpt_url_v3 = "ws://spark-api.xf-yun.com/v3.1/chat" + self.gpt_url_img = "wss://spark-api.cn-huabei-1.xf-yun.com/v2.1/image" self.time_to_yield_event = threading.Event() self.time_to_exit_event = threading.Event() @@ -92,7 +93,11 @@ class SparkRequestInstance(): gpt_url = self.gpt_url_v3 else: gpt_url = self.gpt_url - + file_manifest = [] + if llm_kwargs.get('most_recent_uploaded'): + if llm_kwargs['most_recent_uploaded'].get('path'): + file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path']) + gpt_url = self.gpt_url_img wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url) websocket.enableTrace(False) wsUrl = wsParam.create_url() @@ -101,9 +106,8 @@ class SparkRequestInstance(): def on_open(ws): import _thread as thread thread.start_new_thread(run, (ws,)) - def run(ws, *args): - data = json.dumps(gen_params(ws.appid, *ws.all_args)) + data = json.dumps(gen_params(ws.appid, *ws.all_args, file_manifest)) ws.send(data) # 收到websocket消息的处理 @@ -142,9 +146,18 @@ class SparkRequestInstance(): ws.all_args = (inputs, llm_kwargs, history, system_prompt) ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) -def generate_message_payload(inputs, llm_kwargs, history, system_prompt): +def generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest): conversation_cnt = len(history) // 2 - messages = [{"role": "system", "content": system_prompt}] + messages = [] + if file_manifest: + base64_images = [] + for image_path in file_manifest: + base64_images.append(encode_image(image_path)) + for img_s in base64_images: + if img_s not in str(messages): + messages.append({"role": "user", "content": img_s, "content_type": "image"}) + else: + messages = [{"role": "system", "content": system_prompt}] if conversation_cnt: for index in range(0, 2*conversation_cnt, 2): what_i_have_asked = {} @@ -167,7 +180,7 @@ def generate_message_payload(inputs, llm_kwargs, history, system_prompt): return messages -def gen_params(appid, inputs, llm_kwargs, history, system_prompt): +def gen_params(appid, inputs, llm_kwargs, history, system_prompt, file_manifest): """ 通过appid和用户的提问来生成请参数 """ @@ -176,6 +189,8 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt): "sparkv2": "generalv2", "sparkv3": "generalv3", } + domains_select = domains[llm_kwargs['llm_model']] + if file_manifest: domains_select = 'image' data = { "header": { "app_id": appid, @@ -183,7 +198,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt): }, "parameter": { "chat": { - "domain": domains[llm_kwargs['llm_model']], + "domain": domains_select, "temperature": llm_kwargs["temperature"], "random_threshold": 0.5, "max_tokens": 4096, @@ -192,7 +207,7 @@ def gen_params(appid, inputs, llm_kwargs, history, system_prompt): }, "payload": { "message": { - "text": generate_message_payload(inputs, llm_kwargs, history, system_prompt) + "text": generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest) } } } diff --git a/toolbox.py b/toolbox.py index b7b762d7..2cfaf058 100644 --- a/toolbox.py +++ b/toolbox.py @@ -577,6 +577,64 @@ def del_outdated_uploads(outdate_time_seconds): except: pass return + +def html_local_file(file): + base_path = os.path.dirname(__file__) # 项目目录 + if os.path.exists(str(file)): + file = f'file={file.replace(base_path, ".")}' + return file + + +def html_local_img(__file, layout='left', max_width=None, max_height=None, md=True): + style = '' + if max_width is not None: + style += f"max-width: {max_width};" + if max_height is not None: + style += f"max-height: {max_height};" + __file = html_local_file(__file) + a = f'
' + if md: + a = f'![{__file}]({__file})' + return a + +def file_manifest_filter_type(file_list, filter_: list = None): + new_list = [] + if not filter_: filter_ = ['png', 'jpg', 'jpeg'] + for file in file_list: + if str(os.path.basename(file)).split('.')[-1] in filter_: + new_list.append(html_local_img(file, md=False)) + else: + new_list.append(file) + return new_list + +def to_markdown_tabs(head: list, tabs: list, alignment=':---:', column=False): + """ + Args: + head: 表头:[] + tabs: 表值:[[列1], [列2], [列3], [列4]] + alignment: :--- 左对齐, :---: 居中对齐, ---: 右对齐 + column: True to keep data in columns, False to keep data in rows (default). + Returns: + A string representation of the markdown table. + """ + if column: + transposed_tabs = list(map(list, zip(*tabs))) + else: + transposed_tabs = tabs + # Find the maximum length among the columns + max_len = max(len(column) for column in transposed_tabs) + + tab_format = "| %s " + tabs_list = "".join([tab_format % i for i in head]) + '|\n' + tabs_list += "".join([tab_format % alignment for i in head]) + '|\n' + + for i in range(max_len): + row_data = [tab[i] if i < len(tab) else '' for tab in transposed_tabs] + row_data = file_manifest_filter_type(row_data, filter_=None) + tabs_list += "".join([tab_format % i for i in row_data]) + '|\n' + + return tabs_list + def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkboxes, cookies): """ 当文件被上传时的回调函数 @@ -605,6 +663,7 @@ def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkbo # 整理文件集合 moved_files = [fp for fp in glob.glob(f'{target_path_base}/**/*', recursive=True)] + moved_files_str = to_markdown_tabs(head=['文件'], tabs=[moved_files]) if "浮动输入区" in checkboxes: txt, txt2 = "", target_path_base else: @@ -1151,6 +1210,36 @@ def get_chat_default_kwargs(): return default_chat_kwargs + +def get_pictures_list(path): + file_manifest = [f for f in glob.glob(f'{path}/**/*.jpg', recursive=True)] + file_manifest += [f for f in glob.glob(f'{path}/**/*.jpeg', recursive=True)] + file_manifest += [f for f in glob.glob(f'{path}/**/*.png', recursive=True)] + return file_manifest + + +import base64 +def have_any_recent_upload_image_files(chatbot): + _5min = 5 * 60 + if chatbot is None: return False, None # chatbot is None + most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None) + if not most_recent_uploaded: return False, None # most_recent_uploaded is None + if time.time() - most_recent_uploaded["time"] < _5min: + most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None) + path = most_recent_uploaded['path'] + file_manifest = get_pictures_list(path) + if len(file_manifest) == 0: return False, None + return True, file_manifest # most_recent_uploaded is new + else: + return False, None # most_recent_uploaded is too old + + +# Function to encode the image +def encode_image(image_path): + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode('utf-8') + + def get_max_token(llm_kwargs): from request_llms.bridge_all import model_info return model_info[llm_kwargs['llm_model']]['max_token'] From ea4e03b1d892d462f71bab76ee0bec65d541f6b7 Mon Sep 17 00:00:00 2001 From: spike <2411123479@qq.com> Date: Wed, 15 Nov 2023 10:27:40 +0800 Subject: [PATCH 02/88] =?UTF-8?q?llm=5Fkwargs=20=E5=A2=9E=E5=8A=A0most=5Fr?= =?UTF-8?q?ecent=5Fuploaded?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- toolbox.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/toolbox.py b/toolbox.py index 2cfaf058..13a96633 100644 --- a/toolbox.py +++ b/toolbox.py @@ -74,6 +74,7 @@ def ArgsGeneralWrapper(f): 'max_length': max_length, 'temperature':temperature, 'client_ip': request.client.host, + 'most_recent_uploaded': cookies.get('most_recent_uploaded') } plugin_kwargs = { "advanced_arg": plugin_advanced_arg, @@ -660,17 +661,15 @@ def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkbo this_file_path = pj(target_path_base, file_origin_name) shutil.move(file.name, this_file_path) upload_msg += extract_archive(file_path=this_file_path, dest_dir=this_file_path+'.extract') - - # 整理文件集合 - moved_files = [fp for fp in glob.glob(f'{target_path_base}/**/*', recursive=True)] - moved_files_str = to_markdown_tabs(head=['文件'], tabs=[moved_files]) + if "浮动输入区" in checkboxes: txt, txt2 = "", target_path_base else: txt, txt2 = target_path_base, "" - # 输出消息 - moved_files_str = '\t\n\n'.join(moved_files) + # 整理文件集合 输出消息 + moved_files = [fp for fp in glob.glob(f'{target_path_base}/**/*', recursive=True)] + moved_files_str = to_markdown_tabs(head=['文件'], tabs=[moved_files]) chatbot.append(['我上传了文件,请查收', f'[Local Message] 收到以下文件: \n\n{moved_files_str}' + f'\n\n调用路径参数已自动修正到: \n\n{txt}' + From cdca36f5d221bdcf3d129b27932b3977b332ec26 Mon Sep 17 00:00:00 2001 From: Marroh <893017927@qq.com> Date: Sun, 19 Nov 2023 23:42:07 +0800 Subject: [PATCH 03/88] =?UTF-8?q?=E7=A7=BB=E5=8A=A8import?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- toolbox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolbox.py b/toolbox.py index 13a96633..854d303f 100644 --- a/toolbox.py +++ b/toolbox.py @@ -4,6 +4,7 @@ import time import inspect import re import os +import base64 import gradio import shutil import glob @@ -1217,7 +1218,6 @@ def get_pictures_list(path): return file_manifest -import base64 def have_any_recent_upload_image_files(chatbot): _5min = 5 * 60 if chatbot is None: return False, None # chatbot is None From b470af7c7b3867cc79371bf54d960a8a7b219eb2 Mon Sep 17 00:00:00 2001 From: Marroh <893017927@qq.com> Date: Wed, 22 Nov 2023 13:20:56 +0800 Subject: [PATCH 04/88] =?UTF-8?q?=E9=81=B5=E5=BE=AAPEP=20328=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E5=A4=AA=E9=95=BF=E7=9A=84import?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- request_llms/bridge_chatgpt_vision.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/request_llms/bridge_chatgpt_vision.py b/request_llms/bridge_chatgpt_vision.py index 0384a051..ebcf9689 100644 --- a/request_llms/bridge_chatgpt_vision.py +++ b/request_llms/bridge_chatgpt_vision.py @@ -15,8 +15,10 @@ import requests import base64 import os import glob +from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \ + update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files + -from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \ get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY') From aee57364dd8377ee6c86dfde0944f48f48ccbaeb Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Sun, 26 Nov 2023 00:24:51 +0800 Subject: [PATCH 05/88] edit image --- crazy_functional.py | 12 +++++++++- crazy_functions/图片生成.py | 48 ++++++++++++++++++++++++++----------- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index 3d4df718..b3dc5baf 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -354,7 +354,7 @@ def get_crazy_functions(): print('Load function plugin failed') try: - from crazy_functions.图片生成 import 图片生成_DALLE2, 图片生成_DALLE3 + from crazy_functions.图片生成 import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2 function_plugins.update({ "图片生成_DALLE2 (先切换模型到openai或api2d)": { "Group": "对话", @@ -377,6 +377,16 @@ def get_crazy_functions(): "Function": HotReload(图片生成_DALLE3) }, }) + # function_plugins.update({ + # "图片修改_DALLE2 (先切换模型到openai或api2d)": { + # "Group": "对话", + # "Color": "stop", + # "AsButton": True, + # "AdvancedArgs": False, # 调用时,唤起高级参数输入区(默认False) + # # "Info": "使用DALLE2修改图片 | 输入参数字符串,提供图像的内容", + # "Function": HotReload(图片修改_DALLE2) + # }, + # }) except: print(trimmed_format_exc()) print('Load function plugin failed') diff --git a/crazy_functions/图片生成.py b/crazy_functions/图片生成.py index 642a9e22..5c56b79d 100644 --- a/crazy_functions/图片生成.py +++ b/crazy_functions/图片生成.py @@ -54,19 +54,23 @@ def edit_image(llm_kwargs, prompt, image_path, resolution="1024x1024", model="da img_endpoint = chat_endpoint.replace('chat/completions','images/edits') # # Generate the image url = img_endpoint + n = 1 headers = { 'Authorization': f"Bearer {api_key}", - 'Content-Type': 'application/json' } - data = { - 'image': open(image_path, 'rb'), - 'prompt': prompt, - 'n': 1, - 'size': resolution, - 'model': model, - 'response_format': 'url' - } - response = requests.post(url, headers=headers, json=data, proxies=proxies) + make_transparent(image_path, image_path+'.transparent.png') + image_path = image_path+'.transparent.png' + with open(image_path, 'rb') as f: + file_content = f.read() + files = { + 'image': (os.path.basename(image_path), file_content), + # 'mask': ('mask.png', open('mask.png', 'rb')) + 'prompt': (None, prompt), + "n": (None, str(n)), + 'size': (None, resolution), + } + + response = requests.post(url, headers=headers, files=files, proxies=proxies) print(response.content) try: image_url = json.loads(response.content.decode('utf8'))['data'][0]['url'] @@ -187,14 +191,14 @@ def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys yield from update_ui(chatbot=chatbot, history=history) return - image_path = state.req[0] - resolution = state.req[1] - prompt = state.req[2] + image_path = state.req[0]['value'] + resolution = state.req[1]['value'] + prompt = state.req[2]['value'] chatbot.append(["图片修改, 执行中", f"图片:`{image_path}`
分辨率:`{resolution}`
修改需求:`{prompt}`"]) yield from update_ui(chatbot=chatbot, history=history) image_url, image_path = edit_image(llm_kwargs, prompt, image_path, resolution) - chatbot.append([state.prompt, + chatbot.append([prompt, f'图像中转网址:
`{image_url}`
'+ f'中转网址预览:
' f'本地文件地址:
`{image_path}`
'+ @@ -202,3 +206,19 @@ def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys ]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新 +def make_transparent(input_image_path, output_image_path): + from PIL import Image + image = Image.open(input_image_path) + image = image.convert("RGBA") + + data = image.getdata() + + new_data = [] + for item in data: + if item[0] == 255 and item[1] == 255 and item[2] == 255: + new_data.append((255, 255, 255, 0)) + else: + new_data.append(item) + + image.putdata(new_data) + image.save(output_image_path, "PNG") From 2d2e02040d7d91d2f2a4c34f4d0bf677873b5f4d Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Sun, 26 Nov 2023 01:08:34 +0800 Subject: [PATCH 06/88] =?UTF-8?q?DALLE2=E4=BF=AE=E6=94=B9=E5=9B=BE?= =?UTF-8?q?=E5=83=8F=E6=8F=92=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 20 +++--- .../multi_stage/multi_stage_utils.py | 6 +- crazy_functions/图片生成.py | 64 +++++++++++++++---- 3 files changed, 61 insertions(+), 29 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index b3dc5baf..d2e75750 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -377,16 +377,16 @@ def get_crazy_functions(): "Function": HotReload(图片生成_DALLE3) }, }) - # function_plugins.update({ - # "图片修改_DALLE2 (先切换模型到openai或api2d)": { - # "Group": "对话", - # "Color": "stop", - # "AsButton": True, - # "AdvancedArgs": False, # 调用时,唤起高级参数输入区(默认False) - # # "Info": "使用DALLE2修改图片 | 输入参数字符串,提供图像的内容", - # "Function": HotReload(图片修改_DALLE2) - # }, - # }) + function_plugins.update({ + "图片修改_DALLE2 (先切换模型到openai或api2d)": { + "Group": "对话", + "Color": "stop", + "AsButton": False, + "AdvancedArgs": False, # 调用时,唤起高级参数输入区(默认False) + # "Info": "使用DALLE2修改图片 | 输入参数字符串,提供图像的内容", + "Function": HotReload(图片修改_DALLE2) + }, + }) except: print(trimmed_format_exc()) print('Load function plugin failed') diff --git a/crazy_functions/multi_stage/multi_stage_utils.py b/crazy_functions/multi_stage/multi_stage_utils.py index 60f07783..f85d35aa 100644 --- a/crazy_functions/multi_stage/multi_stage_utils.py +++ b/crazy_functions/multi_stage/multi_stage_utils.py @@ -21,11 +21,7 @@ class GptAcademicState(): def reset(self): pass - def lock_plugin(self, chatbot): - chatbot._cookies['plugin_state'] = pickle.dumps(self) - - def unlock_plugin(self, chatbot): - self.reset() + def dump_state(self, chatbot): chatbot._cookies['plugin_state'] = pickle.dumps(self) def set_state(self, chatbot, key, value): diff --git a/crazy_functions/图片生成.py b/crazy_functions/图片生成.py index 5c56b79d..134eb07a 100644 --- a/crazy_functions/图片生成.py +++ b/crazy_functions/图片生成.py @@ -58,8 +58,10 @@ def edit_image(llm_kwargs, prompt, image_path, resolution="1024x1024", model="da headers = { 'Authorization': f"Bearer {api_key}", } - make_transparent(image_path, image_path+'.transparent.png') - image_path = image_path+'.transparent.png' + make_transparent(image_path, image_path+'.tsp.png') + make_square_image(image_path+'.tsp.png', image_path+'.tspsq.png') + resize_image(image_path+'.tspsq.png', image_path+'.ready.png', max_size=1024) + image_path = image_path+'.ready.png' with open(image_path, 'rb') as f: file_content = f.read() files = { @@ -134,6 +136,7 @@ def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys ]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新 + class ImageEditState(GptAcademicState): # 尚未完成 def get_image_file(self, x): @@ -146,18 +149,27 @@ class ImageEditState(GptAcademicState): file = None if not confirm else file_manifest[0] return confirm, file + def lock_plugin(self, chatbot): + chatbot._cookies['lock_plugin'] = 'crazy_functions.图片生成->图片修改_DALLE2' + self.dump_state(chatbot) + + def unlock_plugin(self, chatbot): + self.reset() + chatbot._cookies['lock_plugin'] = None + self.dump_state(chatbot) + def get_resolution(self, x): return (x in ['256x256', '512x512', '1024x1024']), x - + def get_prompt(self, x): confirm = (len(x)>=5) and (not self.get_resolution(x)[0]) and (not self.get_image_file(x)[0]) return confirm, x - + def reset(self): self.req = [ - {'value':None, 'description': '请先上传图像(必须是.png格式), 然后再次点击本插件', 'verify_fn': self.get_image_file}, - {'value':None, 'description': '请输入分辨率,可选:256x256, 512x512 或 1024x1024', 'verify_fn': self.get_resolution}, - {'value':None, 'description': '请输入修改需求,建议您使用英文提示词', 'verify_fn': self.get_prompt}, + {'value':None, 'description': '请先上传图像(必须是.png格式), 然后再次点击本插件', 'verify_fn': self.get_image_file}, + {'value':None, 'description': '请输入分辨率,可选:256x256, 512x512 或 1024x1024, 然后再次点击本插件', 'verify_fn': self.get_resolution}, + {'value':None, 'description': '请输入修改需求,建议您使用英文提示词, 然后再次点击本插件', 'verify_fn': self.get_prompt}, ] self.info = "" @@ -167,7 +179,7 @@ class ImageEditState(GptAcademicState): confirm, res = r['verify_fn'](prompt) if confirm: r['value'] = res - self.set_state(chatbot, 'dummy_key', 'dummy_value') + self.dump_state(chatbot) break return self @@ -186,8 +198,9 @@ def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys history = [] # 清空历史 state = ImageEditState.get_state(chatbot, ImageEditState) state = state.feed(prompt, chatbot) + state.lock_plugin(chatbot) if not state.already_obtained_all_materials(): - chatbot.append(["图片修改(先上传图片,再输入修改需求,最后输入分辨率)", state.next_req()]) + chatbot.append(["图片修改\n\n1. 上传图片(图片中需要修改的位置用橡皮擦擦除为纯白色,即RGB=255,255,255)\n2. 输入分辨率 \n3. 输入修改需求", state.next_req()]) yield from update_ui(chatbot=chatbot, history=history) return @@ -196,29 +209,52 @@ def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys prompt = state.req[2]['value'] chatbot.append(["图片修改, 执行中", f"图片:`{image_path}`
分辨率:`{resolution}`
修改需求:`{prompt}`"]) yield from update_ui(chatbot=chatbot, history=history) - image_url, image_path = edit_image(llm_kwargs, prompt, image_path, resolution) - chatbot.append([prompt, + chatbot.append([prompt, f'图像中转网址:
`{image_url}`
'+ f'中转网址预览:
' f'本地文件地址:
`{image_path}`
'+ f'本地文件预览:
' ]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新 + state.unlock_plugin(chatbot) def make_transparent(input_image_path, output_image_path): from PIL import Image image = Image.open(input_image_path) image = image.convert("RGBA") - data = image.getdata() - new_data = [] for item in data: if item[0] == 255 and item[1] == 255 and item[2] == 255: new_data.append((255, 255, 255, 0)) else: new_data.append(item) - image.putdata(new_data) image.save(output_image_path, "PNG") + +def resize_image(input_path, output_path, max_size=1024): + from PIL import Image + with Image.open(input_path) as img: + width, height = img.size + if width > max_size or height > max_size: + if width >= height: + new_width = max_size + new_height = int((max_size / width) * height) + else: + new_height = max_size + new_width = int((max_size / height) * width) + + resized_img = img.resize(size=(new_width, new_height)) + resized_img.save(output_path) + else: + img.save(output_path) + +def make_square_image(input_path, output_path): + from PIL import Image + with Image.open(input_path) as img: + width, height = img.size + size = max(width, height) + new_img = Image.new("RGBA", (size, size), color="black") + new_img.paste(img, ((size - width) // 2, (size - height) // 2)) + new_img.save(output_path) \ No newline at end of file From 688df6aa24d9f18b46635547334444d8f2224767 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:28:37 +0800 Subject: [PATCH 07/88] Update README.md --- README.md | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b1e5568e..05fb0ba5 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,65 @@ -> **Note** +> **Caution** > > 2023.11.12: 某些依赖包尚不兼容python 3.12,推荐python 3.11。 > > 2023.11.7: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目开源免费,近期发现有人蔑视开源协议并利用本项目违规圈钱,请提高警惕,谨防上当受骗。 +
+

+ GPT 学术优化 (GPT Academic) +

-#
GPT 学术优化 (GPT Academic)
+[![Github][Github-image]][Github-url] +[![Releases][Releases-image]][Releases-url] +[![Installation][Installation-image]][Installation-url] +[![Wiki][Wiki-image]][Wiki-url] +[![PR][PRs-image]][PRs-url] + +[Github-image]: https://img.shields.io/badge/github-12100E.svg?&logo=github&logoColor=white +[Releases-image]: https://img.shields.io/badge/Releases-v3.6.0-blue +[Installation-image]: https://img.shields.io/badge/Installation-v3.6.1-blue +[Wiki-image]: https://img.shields.io/badge/wiki-000000.svg?logo=wikipedia +[PRs-image]: https://img.shields.io/badge/PRs-welcome-pink + +[Github-url]: https://github.com/binary-husky/gpt_academic +[Releases-url]: https://github.com/binary-husky/gpt_academic/releases +[Installation-url]: https://github.com/binary-husky/gpt_academic#installation +[Wiki-url]: https://github.com/binary-husky/gpt_academic/wiki +[PRs-url]: https://github.com/binary-husky/gpt_academic/pulls + + +
**如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或插件,欢迎发pull requests!** -If you like this project, please give it a Star. We also have a README in [English|](docs/README.English.md)[日本語|](docs/README.Japanese.md)[한국어|](docs/README.Korean.md)[Русский|](docs/README.Russian.md)[Français](docs/README.French.md) translated by this project itself. +If you like this project, please give it a Star. + + +Read this in [English | ](docs/README.English.md)[日本語 | ](docs/README.Japanese.md)[한국어 | ](docs/README.Korean.md)[Русский | ](docs/README.Russian.md)[Français](docs/README.French.md). All translations have been provided by the project itself. + To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental). +

> **Note** > > 1.请注意只有 **高亮** 标识的插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。 > -> 2.本项目中每个文件的功能都在[自译解报告`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题[`wiki`](https://github.com/binary-husky/gpt_academic/wiki)。[常规安装方法](#installation) | [一键安装脚本](https://github.com/binary-husky/gpt_academic/releases) | [配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 +> 2.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 +> +> [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki)) > > 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 +

- +# Features Overview
功能(⭐= 近期新增功能) | 描述 --- | --- -⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B)! | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, 通义千问[Qwen](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf),[智谱API](https://open.bigmodel.cn/),DALLE3, [DeepseekCoder](https://coder.deepseek.com/) +⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B) | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, 通义千问[Qwen](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf),[智谱API](https://open.bigmodel.cn/),DALLE3, [DeepseekCoder](https://coder.deepseek.com/) 润色、翻译、代码解释 | 一键润色、翻译、查找论文语法错误、解释代码 [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键 模块化设计 | 支持自定义强大的[插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) @@ -57,9 +88,10 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼
+ - 新界面(修改`config.py`中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换)
- +
@@ -100,7 +132,7 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 2. 配置API_KEY - 在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 + 在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。 [Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解该读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中(仅复制您修改过的配置条目即可)。 」 From d39945c415e616f621c394da15a0bc379b471caf Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:38:59 +0800 Subject: [PATCH 08/88] Update README.md --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 05fb0ba5..b6af2cfd 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ To translate this project to arbitrary language with GPT, read and run [`multi_l 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [插件] 一键解读latex/pdf论文全文并生成摘要 Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [插件] 一键翻译或润色latex论文 批量注释生成 | [插件] 一键批量生成函数注释 -Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗? +Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗?就是出自他的手笔 chat分析报告生成 | [插件] 运行后自动生成总结汇报 [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [插件] PDF论文提取题目&摘要+翻译全文(多线程) [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [插件] 输入arxiv文章url即可一键翻译摘要+下载PDF @@ -79,7 +79,7 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 ⭐AutoGen多智能体插件 | [插件] 借助微软AutoGen,探索多Agent的智能涌现可能! 启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 -[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? +[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)伺候的感觉一定会很不错吧? ⭐ChatGLM2微调模型 | 支持加载ChatGLM2微调模型,提供ChatGLM2微调辅助插件 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/) ⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中) @@ -95,7 +95,7 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 -- 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放粘贴板 +- 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放剪贴板
@@ -105,7 +105,7 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 -- 如果输出包含公式,会同时以tex形式和渲染形式显示,方便复制和阅读 +- 如果输出包含公式,会以tex形式和渲染形式同时显示,方便复制和阅读
@@ -134,9 +134,9 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。 [Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 - 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解该读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中(仅复制您修改过的配置条目即可)。 」 + 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解该读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中(仅复制您修改过的配置条目即可) 」。 - 「 支持通过`环境变量`配置项目,环境变量的书写格式参考`docker-compose.yml`文件或者我们的[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。配置读取优先级: `环境变量` > `config_private.py` > `config.py`。 」 + 「 支持通过`环境变量`配置项目,环境变量的书写格式参考`docker-compose.yml`文件或者我们的[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。配置读取优先级: `环境变量` > `config_private.py` > `config.py` 」。 3. 安装依赖 @@ -203,7 +203,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt- P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用方案4或者方案0获取Latex功能。 -2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时) +2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行) [![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml) ``` sh @@ -215,7 +215,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 ### 安装方法III:其他部署姿势 1. **Windows一键运行脚本**。 完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。 -脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。 +脚本的贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。 2. 使用第三方API、Azure等、文心一言、星火等,见[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) @@ -231,7 +231,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 # Advanced Usage ### I:自定义新的便捷按钮(学术快捷键) -任意文本编辑器打开`core_functional.py`,添加条目如下,然后重启程序。(如按钮已存在,那么前缀、后缀都支持热修改,无需重启程序即可生效。) +任意文本编辑器打开`core_functional.py`,添加如下条目,然后重启程序。(如果按钮已存在,那么可以直接修改(前缀、后缀都已支持热修改),无需重启程序即可生效。) 例如 ```python @@ -352,7 +352,7 @@ GPT Academic开发者QQ群:`610599535` - 已知问题 - 某些浏览器翻译插件干扰此软件前端的运行 - - 官方Gradio目前有很多兼容性Bug,请务必使用`requirement.txt`安装Gradio + - 官方Gradio目前有很多兼容性Bug,请**务必使用`requirement.txt`安装Gradio** ### III:主题 可以通过修改`THEME`选项(config.py)变更主题 From d57bb8afbe5947cf673c2ee6051eda8dcaa76147 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:41:05 +0800 Subject: [PATCH 09/88] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b6af2cfd..6f939817 100644 --- a/README.md +++ b/README.md @@ -214,8 +214,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 ### 安装方法III:其他部署姿势 1. **Windows一键运行脚本**。 -完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。 -脚本的贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。 +完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。脚本贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。 2. 使用第三方API、Azure等、文心一言、星火等,见[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) @@ -250,6 +249,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 ### II:自定义函数插件 编写强大的函数插件来执行任何你想得到的和想不到的任务。 + 本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。 详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 From 8780fe29f1083f92fca446a4162c28228bbc6121 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:07:27 +0800 Subject: [PATCH 10/88] Update README.md --- README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 6f939817..b38a1c38 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ > > 2023.11.7: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目开源免费,近期发现有人蔑视开源协议并利用本项目违规圈钱,请提高警惕,谨防上当受骗。 +

@@ -63,7 +64,7 @@ To translate this project to arbitrary language with GPT, read and run [`multi_l 润色、翻译、代码解释 | 一键润色、翻译、查找论文语法错误、解释代码 [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键 模块化设计 | 支持自定义强大的[插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) -[程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [插件] 一键可以剖析Python/C/C++/Java/Lua/...项目树 或 [自我剖析](https://www.bilibili.com/video/BV1cj411A7VW) +[程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [插件] 一键剖析Python/C/C++/Java/Lua/...项目树 或 [自我剖析](https://www.bilibili.com/video/BV1cj411A7VW) 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [插件] 一键解读latex/pdf论文全文并生成摘要 Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [插件] 一键翻译或润色latex论文 批量注释生成 | [插件] 一键批量生成函数注释 @@ -83,7 +84,7 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 ⭐ChatGLM2微调模型 | 支持加载ChatGLM2微调模型,提供ChatGLM2微调辅助插件 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/) ⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中) -⭐虚空终端插件 | [插件] 用自然语言,直接调度本项目其他插件 +⭐虚空终端插件 | [插件] 能够使用自然语言直接调度本项目其他插件 更多新功能展示 (图像生成等) …… | 见本文档结尾处 ……

@@ -110,7 +111,7 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 -- 懒得看项目代码?整个工程直接给chatgpt炫嘴里 +- 懒得看项目代码?直接把整个工程炫ChatGPT嘴里
@@ -134,7 +135,7 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。 [Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 - 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解该读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中(仅复制您修改过的配置条目即可) 」。 + 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解以上读取逻辑,我们强烈建议您在`config.py`同路径下创建一个名为`config_private.py`的新配置文件,并使用`config_private.py`配置项目,以确保更新或其他用户无法轻易查看您的私有配置 」。 「 支持通过`环境变量`配置项目,环境变量的书写格式参考`docker-compose.yml`文件或者我们的[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。配置读取优先级: `环境变量` > `config_private.py` > `config.py` 」。 @@ -183,7 +184,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt- ### 安装方法II:使用Docker -0. 部署项目的全部能力(这个是包含cuda和latex的大型镜像。但如果您网速慢、硬盘小,则不推荐使用这个) +0. 部署项目的全部能力(这个是包含cuda和latex的大型镜像。但如果您网速慢、硬盘小,则不推荐该方法全部部署项目) [![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml) ``` sh @@ -212,7 +213,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 ``` -### 安装方法III:其他部署姿势 +### 安装方法III:其他部署方法 1. **Windows一键运行脚本**。 完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。脚本贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。 @@ -221,7 +222,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 3. 云服务器远程部署避坑指南。 请访问[云服务器远程部署wiki](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) -4. 一些新型的部署平台或方法 +4. 在其他平台部署&二级网址部署 - 使用Sealos[一键部署](https://github.com/binary-husky/gpt_academic/issues/993)。 - 使用WSL2(Windows Subsystem for Linux 子系统)。请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) - 如何在二级网址(如`http://localhost/subpath`)下运行。请访问[FastAPI运行说明](docs/WithFastapi.md) From 17f361d63bcea24a2ec2196120b0b664144c53cd Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:11:29 +0800 Subject: [PATCH 11/88] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b38a1c38..ffcf2663 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,11 @@ [![Wiki][Wiki-image]][Wiki-url] [![PR][PRs-image]][PRs-url] -[Github-image]: https://img.shields.io/badge/github-12100E.svg?&logo=github&logoColor=white -[Releases-image]: https://img.shields.io/badge/Releases-v3.6.0-blue -[Installation-image]: https://img.shields.io/badge/Installation-v3.6.1-blue -[Wiki-image]: https://img.shields.io/badge/wiki-000000.svg?logo=wikipedia -[PRs-image]: https://img.shields.io/badge/PRs-welcome-pink +[Github-image]: https://img.shields.io/badge/github-12100E.svg?&logo=github&logoColor=white?style=flat-square +[Releases-image]: https://img.shields.io/badge/Releases-v3.6.0-blue?style=flat-square +[Installation-image]: https://img.shields.io/badge/Installation-v3.6.1-blue?style=flat-square +[Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square +[PRs-image]: https://img.shields.io/badge/PRs-welcome-pink?style=flat-square [Github-url]: https://github.com/binary-husky/gpt_academic [Releases-url]: https://github.com/binary-husky/gpt_academic/releases From c141e767c65bdfdaa66f3e773672e5e1a6d9ac8b Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:37:20 +0800 Subject: [PATCH 12/88] Update README.md --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ffcf2663..06abfac6 100644 --- a/README.md +++ b/README.md @@ -12,17 +12,21 @@ [![Github][Github-image]][Github-url] +[![License][License-image]][License-url] + [![Releases][Releases-image]][Releases-url] [![Installation][Installation-image]][Installation-url] [![Wiki][Wiki-image]][Wiki-url] [![PR][PRs-image]][PRs-url] -[Github-image]: https://img.shields.io/badge/github-12100E.svg?&logo=github&logoColor=white?style=flat-square +[License-image]: https://img.shields.io/badge/LICENSE-GPL3.0-black?&style=for-the-badge +[Github-image]: https://img.shields.io/badge/github-12100E.svg?&style=for-the-badge&logo=github&logoColor=white [Releases-image]: https://img.shields.io/badge/Releases-v3.6.0-blue?style=flat-square [Installation-image]: https://img.shields.io/badge/Installation-v3.6.1-blue?style=flat-square -[Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square +[Wiki-image]: https://img.shields.io/badge/wiki-项目文档-yellow?style=flat-square [PRs-image]: https://img.shields.io/badge/PRs-welcome-pink?style=flat-square +[License-url]: https://github.com/binary-husky/gpt_academic/blob/master/LICENSE [Github-url]: https://github.com/binary-husky/gpt_academic [Releases-url]: https://github.com/binary-husky/gpt_academic/releases [Installation-url]: https://github.com/binary-husky/gpt_academic#installation From f9e9b6f4ec3252b6028601da7c3c8ff614fe49fd Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:38:08 +0800 Subject: [PATCH 13/88] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 06abfac6..79dd9892 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ If you like this project, please give it a Star. -Read this in [English | ](docs/README.English.md)[日本語 | ](docs/README.Japanese.md)[한국어 | ](docs/README.Korean.md)[Русский | ](docs/README.Russian.md)[Français](docs/README.French.md). All translations have been provided by the project itself. +Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).

From e4c057f5a304b5a5a4a4940f7e8282f2b112126d Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:39:33 +0800 Subject: [PATCH 14/88] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 79dd9892..f747be90 100644 --- a/README.md +++ b/README.md @@ -19,11 +19,11 @@ [![Wiki][Wiki-image]][Wiki-url] [![PR][PRs-image]][PRs-url] -[License-image]: https://img.shields.io/badge/LICENSE-GPL3.0-black?&style=for-the-badge +[License-image]: https://img.shields.io/badge/LICENSE-GPL3.0-orange?&style=for-the-badge [Github-image]: https://img.shields.io/badge/github-12100E.svg?&style=for-the-badge&logo=github&logoColor=white [Releases-image]: https://img.shields.io/badge/Releases-v3.6.0-blue?style=flat-square [Installation-image]: https://img.shields.io/badge/Installation-v3.6.1-blue?style=flat-square -[Wiki-image]: https://img.shields.io/badge/wiki-项目文档-yellow?style=flat-square +[Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square [PRs-image]: https://img.shields.io/badge/PRs-welcome-pink?style=flat-square [License-url]: https://github.com/binary-husky/gpt_academic/blob/master/LICENSE From f417c1ce6df0873b7c481cea6edf7a8e9d73ca4e Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:46:00 +0800 Subject: [PATCH 15/88] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f747be90..5716b66c 100644 --- a/README.md +++ b/README.md @@ -135,9 +135,9 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 cd gpt_academic ``` -2. 配置API_KEY +2. 配置API_KEY等变量 - 在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。 [Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 + 在`config.py`中,配置API KEY等变量。[特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1)、[Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解以上读取逻辑,我们强烈建议您在`config.py`同路径下创建一个名为`config_private.py`的新配置文件,并使用`config_private.py`配置项目,以确保更新或其他用户无法轻易查看您的私有配置 」。 From 6417ca9ddeb92046e85eecb101defbad690e9989 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:46:43 +0800 Subject: [PATCH 16/88] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5716b66c..a9c64588 100644 --- a/README.md +++ b/README.md @@ -48,13 +48,13 @@ To translate this project to arbitrary language with GPT, read and run [`multi_l > **Note** > -> 1.请注意只有 **高亮** 标识的插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。 +> 1. 请注意只有 **高亮** 标识的插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。 > -> 2.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 +> 2. 本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 > > [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki)) > -> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 +> 3. 本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。

From 29775dedd8400b83a9051ce2120dd655d63291a8 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:49:38 +0800 Subject: [PATCH 17/88] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a9c64588..04b70fed 100644 --- a/README.md +++ b/README.md @@ -188,7 +188,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt- ### 安装方法II:使用Docker -0. 部署项目的全部能力(这个是包含cuda和latex的大型镜像。但如果您网速慢、硬盘小,则不推荐该方法全部部署项目) +0. 部署项目的全部能力(这个是包含cuda和latex的大型镜像。但如果您网速慢、硬盘小,则不推荐该方法部署完整项目) [![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml) ``` sh From f44642d9d23182a4fc43851ebe2e00b2e3593c7e Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:51:44 +0800 Subject: [PATCH 18/88] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 04b70fed..6ff14e4a 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ To translate this project to arbitrary language with GPT, read and run [`multi_l > > 2. 本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 > -> [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki)) +> [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki)) > > 3. 本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 From 5720ac127c7cef04b4bef6b80e82e65e4a674bac Mon Sep 17 00:00:00 2001 From: spike <2411123479@qq.com> Date: Wed, 29 Nov 2023 20:04:15 +0800 Subject: [PATCH 19/88] =?UTF-8?q?=E7=9B=91=E5=90=AC=E8=BE=93=E5=85=A5?= =?UTF-8?q?=E6=A1=86=EF=BC=8C=E6=94=AF=E6=8C=81=E7=B2=98=E8=B4=B4=E4=B8=8A?= =?UTF-8?q?=E4=BC=A0=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 7 ++-- requirements.txt | 2 +- themes/common.js | 104 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index b29c94fc..53fb6889 100644 --- a/main.py +++ b/main.py @@ -85,7 +85,7 @@ def main(): with gr_L2(scale=1, elem_id="gpt-panel"): with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary: with gr.Row(): - txt = gr.Textbox(show_label=False, placeholder="Input question here.").style(container=False) + txt = gr.Textbox(show_label=False, placeholder="Input question here.", elem_id='user_input_main').style(container=False) with gr.Row(): submitBtn = gr.Button("提交", elem_id="elem_submit", variant="primary") with gr.Row(): @@ -146,7 +146,7 @@ def main(): with gr.Row(): with gr.Tab("上传文件", elem_id="interact-panel"): gr.Markdown("请上传本地文件/压缩包供“函数插件区”功能调用。请注意: 上传文件后会自动把输入区修改为相应路径。") - file_upload_2 = gr.Files(label="任何文件, 推荐上传压缩文件(zip, tar)", file_count="multiple") + file_upload_2 = gr.Files(label="任何文件, 推荐上传压缩文件(zip, tar)", file_count="multiple", elem_id="elem_upload_float") with gr.Tab("更换模型 & Prompt", elem_id="interact-panel"): md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False) @@ -178,7 +178,8 @@ def main(): with gr.Row() as row: row.style(equal_height=True) with gr.Column(scale=10): - txt2 = gr.Textbox(show_label=False, placeholder="Input question here.", lines=8, label="输入区2").style(container=False) + txt2 = gr.Textbox(show_label=False, placeholder="Input question here.", + elem_id='user_input_float', lines=8, label="输入区2").style(container=False) with gr.Column(scale=1, min_width=40): submitBtn2 = gr.Button("提交", variant="primary"); submitBtn2.style(size="sm") resetBtn2 = gr.Button("重置", variant="secondary"); resetBtn2.style(size="sm") diff --git a/requirements.txt b/requirements.txt index a5782f77..94fac531 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ tiktoken>=0.3.3 requests[socks] pydantic==1.10.11 transformers>=4.27.1 -scipdf_parser>=0.52 +scipdf_parser python-markdown-math websocket-client beautifulsoup4 diff --git a/themes/common.js b/themes/common.js index 849cb9a5..534bebc1 100644 --- a/themes/common.js +++ b/themes/common.js @@ -122,7 +122,7 @@ function chatbotAutoHeight(){ chatbot.style.maxHeight = pixelString; chatbot.style.height = pixelString; } } - + monitoring_input_box() update_height(); setInterval(function() { update_height_slow() @@ -160,4 +160,104 @@ function get_elements(consider_state_panel=false) { var chatbot_height = chatbot.style.height; var chatbot_height = parseInt(chatbot_height); return { panel_height_target, chatbot_height, chatbot }; -} \ No newline at end of file +} + + +function add_func_paste(input) { + let paste_files = []; + if (input) { + input.addEventListener("paste", async function (e) { + const clipboardData = e.clipboardData || window.clipboardData; + const items = clipboardData.items; + if (items) { + for (i = 0; i < items.length; i++) { + if (items[i].kind === "file") { // 确保是文件类型 + const file = items[i].getAsFile(); + // 将每一个粘贴的文件添加到files数组中 + paste_files.push(file); + e.preventDefault(); // 避免粘贴文件名到输入框 + } + } + if (paste_files.length > 0) { + // 按照文件列表执行批量上传逻辑 + await paste_upload_files(paste_files); + paste_files = [] + + } + } + }); + } +} + + +async function paste_upload_files(files) { + const uploadInputElement = elem_upload_float.querySelector("input[type=file]"); + let totalSizeMb = 0 + if (files && files.length > 0) { + // 执行具体的上传逻辑 + if (uploadInputElement) { + for (let i = 0; i < files.length; i++) { + // 将从文件数组中获取的文件大小(单位为字节)转换为MB, + totalSizeMb += files[i].size / 1024 / 1024; + } + // 检查文件总大小是否超过20MB + if (totalSizeMb > 20) { + toast_push('⚠️文件夹大于20MB 🚀上传文件中', 2000) + // return; // 如果超过了指定大小, 可以不进行后续上传操作 + } + // 监听change事件, 原生Gradio可以实现 + // uploadInputElement.addEventListener('change', function(){replace_input_string()}); + let event = new Event("change"); + Object.defineProperty(event, "target", {value: uploadInputElement, enumerable: true}); + Object.defineProperty(event, "currentTarget", {value: uploadInputElement, enumerable: true}); + Object.defineProperty(uploadInputElement, "files", {value: files, enumerable: true}); + uploadInputElement.dispatchEvent(event); + // toast_push('🎉上传文件成功', 2000) + } + } +} +//提示信息 封装 +function toast_push(msg, duration) { + duration = isNaN(duration) ? 3000 : duration; + const m = document.createElement('div'); + m.innerHTML = msg; + m.style.cssText = "font-size: var(--text-md) !important; color: rgb(255, 255, 255);background-color: rgba(0, 0, 0, 0.6);padding: 10px 15px;margin: 0 0 0 -60px;border-radius: 4px;position: fixed; top: 50%;left: 50%;width: 130px;text-align: center;"; + document.body.appendChild(m); + setTimeout(function () { + var d = 0.5; + m.style.opacity = '0'; + setTimeout(function () { + document.body.removeChild(m) + }, d * 1000); + }, duration); +} + +var elem_upload = null; +var elem_upload_float = null; +var elem_input_main = null; +var elem_input_float = null; + + +function monitoring_input_box() { + elem_upload = document.getElementById('elem_upload') + elem_upload_float = document.getElementById('elem_upload_float') + elem_input_main = document.getElementById('user_input_main') + elem_input_float = document.getElementById('user_input_float') + if (elem_input_main) { + if (elem_input_main.querySelector("textarea")) { + add_func_paste(elem_input_main.querySelector("textarea")) + } + } + if (elem_input_float) { + if (elem_input_float.querySelector("textarea")){ + add_func_paste(elem_input_float.querySelector("textarea")) + } + } +} + + +// 监视页面变化 +window.addEventListener("DOMContentLoaded", function () { + // const ga = document.getElementsByTagName("gradio-app"); + gradioApp().addEventListener("render", monitoring_input_box); +}); From 88802b0f7214318ebc373b2da325fd1b8e4f75a8 Mon Sep 17 00:00:00 2001 From: spike <2411123479@qq.com> Date: Wed, 29 Nov 2023 20:15:40 +0800 Subject: [PATCH 20/88] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=97=A0=E6=B3=95?= =?UTF-8?q?=E7=B2=98=E8=B4=B4=E7=9A=84toast?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- themes/common.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/themes/common.js b/themes/common.js index 534bebc1..a164a070 100644 --- a/themes/common.js +++ b/themes/common.js @@ -213,6 +213,8 @@ async function paste_upload_files(files) { Object.defineProperty(uploadInputElement, "files", {value: files, enumerable: true}); uploadInputElement.dispatchEvent(event); // toast_push('🎉上传文件成功', 2000) + } else { + toast_push('⚠️请先删除上传区中的历史文件,再尝试粘贴。', 2000) } } } @@ -221,7 +223,7 @@ function toast_push(msg, duration) { duration = isNaN(duration) ? 3000 : duration; const m = document.createElement('div'); m.innerHTML = msg; - m.style.cssText = "font-size: var(--text-md) !important; color: rgb(255, 255, 255);background-color: rgba(0, 0, 0, 0.6);padding: 10px 15px;margin: 0 0 0 -60px;border-radius: 4px;position: fixed; top: 50%;left: 50%;width: 130px;text-align: center;"; + m.style.cssText = "font-size: var(--text-md) !important; color: rgb(255, 255, 255);background-color: rgba(0, 0, 0, 0.6);padding: 10px 15px;margin: 0 0 0 -60px;border-radius: 4px;position: fixed; top: 50%;left: 50%;width: auto; text-align: center;"; document.body.appendChild(m); setTimeout(function () { var d = 0.5; From 9a0ed248ca87c50172fb2950449917020eeaf7b9 Mon Sep 17 00:00:00 2001 From: 505030475 Date: Thu, 30 Nov 2023 00:15:09 +0800 Subject: [PATCH 21/88] =?UTF-8?q?=E8=B0=81=E6=98=AF=E5=8D=A7=E5=BA=95?= =?UTF-8?q?=E5=B0=8F=E6=B8=B8=E6=88=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/互动小游戏.py | 159 ++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 crazy_functions/互动小游戏.py diff --git a/crazy_functions/互动小游戏.py b/crazy_functions/互动小游戏.py new file mode 100644 index 00000000..e00ef32b --- /dev/null +++ b/crazy_functions/互动小游戏.py @@ -0,0 +1,159 @@ +from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder +from crazy_functions.multi_stage.multi_stage_utils import GptAcademicState +from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +import random + +class 小游戏(GptAcademicState): + def __init__(self): + self.need_game_reset = True + self.llm_kwargs = None + super().__init__() + + def lock_plugin(self, chatbot): + chatbot._cookies['lock_plugin'] = 'crazy_functions.互动小游戏->谁是卧底' + self.dump_state(chatbot) + + def unlock_plugin(self, chatbot): + self.reset() + chatbot._cookies['lock_plugin'] = None + self.dump_state(chatbot) + + def set_state(self, chatbot, key, value): + return super().set_state(chatbot, key, value) + + def init_game(self, chatbot): + chatbot.get_cookies()['lock_plugin'] = '' + + def clean_up_game(self, chatbot): + chatbot.get_cookies()['lock_plugin'] = None + + def init_player(self): + pass + + def step(self, prompt, chatbot): + pass + + def continue_game(self, prompt, chatbot): + if self.need_game_reset: + self.need_game_reset = False + yield from self.init_game(chatbot) + yield from self.step(prompt, chatbot) + self.dump_state(chatbot) + yield update_ui(chatbot=chatbot, history=[]) + +class 小游戏_谁是卧底_玩家(): + def __init__(self, game_handle, card, llm_model, name) -> None: + self.game_handle = game_handle + self.card = card + self.name = name + self.is_out = False + self.llm_model = llm_model + self.is_human = llm_model == 'human' + self.what_player_has_spoken = [] + + def speek(self, content=None): + if content is None: + assert not self.is_human + speak_what = yield from + else: + self.what_player_has_spoken.append(content) + + def agi_speek(self): + inputs = f'please say something about {self.card}' + res = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs = inputs, + inputs_show_user=inputs, + llm_kwargs=self.game_handle.llm_kwargs, + chatbot=chatbot, + history=history, + sys_prompt=sys_prompt + ) + pass + + def vote(self, content=None): + if content is None: + assert not self.is_human + self.vote_who = yield from + else: + try: + self.vote_who = int(content) + except: + self.vote_who = None + + def agi_vote(self): + pass + +class 小游戏_谁是卧底(小游戏): + def __init__(self): + self.game_phase = '发言' # 投票 + super().__init__() + + def init_game(self, chatbot): + self.n_players = 3 + self.n_ai_players = self.n_players - 1 + card = "橙子" + undercover_card = "橘子" + llm_model = self.llm_kwargs['llm_model'] + self.players = [ + 小游戏_谁是卧底(self, card, llm_model, str(i)) for i in range(self.n_players) + ] + + undercover = random.randint(0, self.n_players-1) + human = 0 + + self.players[undercover].card = undercover_card + self.players[human].llm_model = 'human' + super().init_game(chatbot) + + def who_is_out(self): + votes = {} + for player in self.players: + if player.is_out: continue + if player.vote is None: continue + if player.vote not in votes: votes[player.vote] = 0 + votes[player.vote] += 1 + max_votes = max(votes.values()) + players_with_max_votes = [player for player, vote_count in votes.items() if vote_count == max_votes] + for player in players_with_max_votes: + print('淘汰了', player.name) + player.is_out = True + return players_with_max_votes + + def step(self, prompt, chatbot): + + if self.game_phase == '发言': + for player in self.players: + if player.is_out: continue + if player.is_human: + player.speek(prompt) + else: + player.speek() + self.game_phase = '投票' + + elif self.game_phase == '投票': + for player in self.players: + if player.is_out: continue + if player.is_human: + player.vote(prompt) + else: + player.vote() + self.who_is_out() + if len([player for player in self.players if not player.is_out]) <= 2: + if sum([player for player in self.players if player.is_undercover]) == 1: + print('卧底获胜') + else: + print('平民获胜') + self.need_game_reset = True + self.game_phase = '发言' + + else: + raise RuntimeError + + +@CatchException +def 谁是卧底(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + # 尚未完成 + history = [] # 清空历史 + state = 小游戏_谁是卧底.get_state(chatbot, 小游戏_谁是卧底) + state.llm_kwargs = llm_kwargs + yield from state.continue_game(prompt, chatbot) From 55d807c116112366bd86200f9765d2e9c0d5b48f Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 30 Nov 2023 22:19:05 +0800 Subject: [PATCH 22/88] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=86=85=E5=AD=98?= =?UTF-8?q?=E6=B3=84=E9=9C=B2=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_fns/latex_actions.py | 1 + crazy_functions/latex_fns/latex_toolbox.py | 33 ++++++++++++++++++++-- toolbox.py | 3 +- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index 74e8757e..be3d52e7 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -418,6 +418,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f merge_pdfs(origin_pdf, result_pdf, concat_pdf) promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI except Exception as e: + print(e) pass return True # 成功啦 else: diff --git a/crazy_functions/latex_fns/latex_toolbox.py b/crazy_functions/latex_fns/latex_toolbox.py index 4555ff18..0a6a873b 100644 --- a/crazy_functions/latex_fns/latex_toolbox.py +++ b/crazy_functions/latex_fns/latex_toolbox.py @@ -493,11 +493,38 @@ def compile_latex_with_timeout(command, cwd, timeout=60): return False return True +def run_in_subprocess_wrapper_func(func, args, kwargs, return_dict, exception_dict): + import sys + try: + result = func(*args, **kwargs) + return_dict['result'] = result + except Exception as e: + exc_info = sys.exc_info() + exception_dict['exception'] = exc_info +def run_in_subprocess(func): + import multiprocessing + def wrapper(*args, **kwargs): + return_dict = multiprocessing.Manager().dict() + exception_dict = multiprocessing.Manager().dict() + process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, + args=(func, args, kwargs, return_dict, exception_dict)) + process.start() + process.join() + process.close() + if 'exception' in exception_dict: + # ooops, the subprocess ran into an exception + exc_info = exception_dict['exception'] + raise exc_info[1].with_traceback(exc_info[2]) + if 'result' in return_dict.keys(): + # If the subprocess ran successfully, return the result + return return_dict['result'] + return wrapper -def merge_pdfs(pdf1_path, pdf2_path, output_path): - import PyPDF2 +def _merge_pdfs(pdf1_path, pdf2_path, output_path): + import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放 Percent = 0.95 + # raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.') # Open the first PDF file with open(pdf1_path, 'rb') as pdf1_file: pdf1_reader = PyPDF2.PdfFileReader(pdf1_file) @@ -531,3 +558,5 @@ def merge_pdfs(pdf1_path, pdf2_path, output_path): # Save the merged PDF file with open(output_path, 'wb') as output_file: output_writer.write(output_file) + +merge_pdfs = run_in_subprocess(_merge_pdfs) # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放 diff --git a/toolbox.py b/toolbox.py index 8747afdb..21c56014 100644 --- a/toolbox.py +++ b/toolbox.py @@ -561,7 +561,8 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): user_name = get_user(chatbot) else: user_name = default_user_name - + if not os.path.exists(file): + raise FileNotFoundError(f'文件{file}不存在') user_path = get_log_folder(user_name, plugin_name=None) if file_already_in_downloadzone(file, user_path): new_path = file From 900fad69cf9f63878d1ddf6636ee6e7566261424 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 30 Nov 2023 22:21:44 +0800 Subject: [PATCH 23/88] produce comparison pdf cache --- crazy_functions/Latex输出PDF结果.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index f3919edc..18a8d1ba 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -88,6 +88,9 @@ def arxiv_download(chatbot, history, txt, allow_cache=True): target_file = pj(translation_dir, 'translate_zh.pdf') if os.path.exists(target_file): promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot) + target_file_compare = pj(translation_dir, 'comparison.pdf') + if os.path.exists(target_file_compare): + promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot) return target_file return False def is_float(s): From ecaf2bdf45ca9bb5d89ee60bd4b26cf7789b3348 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 30 Nov 2023 22:36:16 +0800 Subject: [PATCH 24/88] add comparison pdf file save and load --- crazy_functions/latex_fns/latex_actions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index be3d52e7..113a2785 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -416,6 +416,8 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f from .latex_toolbox import merge_pdfs concat_pdf = pj(work_folder_modified, f'comparison.pdf') merge_pdfs(origin_pdf, result_pdf, concat_pdf) + if os.path.exists(pj(work_folder, '..', 'translation')): + shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf')) promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI except Exception as e: print(e) From 6723eb77b2768d0463992ecf292dfda66f08f5ff Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 30 Nov 2023 23:08:33 +0800 Subject: [PATCH 25/88] version3.62 --- README.md | 20 ++++++-------------- version | 4 ++-- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 6ff14e4a..8102da8d 100644 --- a/README.md +++ b/README.md @@ -38,23 +38,17 @@ **如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或插件,欢迎发pull requests!** -If you like this project, please give it a Star. - - -Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. - -To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental). +If you like this project, please give it a Star. Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).

> **Note** > -> 1. 请注意只有 **高亮** 标识的插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。 -> -> 2. 本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 +> 1.请注意只有 **高亮** 标识的插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。 > +> 2.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 > [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki)) > -> 3. 本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 +> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。

@@ -93,7 +87,6 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 - - 新界面(修改`config.py`中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换)
@@ -208,7 +201,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt- P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用方案4或者方案0获取Latex功能。 -2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行) +2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时) [![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml) ``` sh @@ -254,7 +247,6 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 ### II:自定义函数插件 编写强大的函数插件来执行任何你想得到的和想不到的任务。 - 本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。 详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 @@ -357,7 +349,7 @@ GPT Academic开发者QQ群:`610599535` - 已知问题 - 某些浏览器翻译插件干扰此软件前端的运行 - - 官方Gradio目前有很多兼容性Bug,请**务必使用`requirement.txt`安装Gradio** + - 官方Gradio目前有很多兼容性问题,请**务必使用`requirement.txt`安装Gradio** ### III:主题 可以通过修改`THEME`选项(config.py)变更主题 diff --git a/version b/version index 5f6de09c..cb4df5ae 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.61, + "version": 3.62, "show_feature": true, - "new_feature": "修复潜在的多用户冲突问题 <-> 接入Deepseek Coder <-> AutoGen多智能体插件测试版 <-> 修复本地模型在Windows下的加载BUG <-> 支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮" + "new_feature": "修复若干隐蔽的内存BUG <-> 修复多用户冲突问题 <-> 接入Deepseek Coder <-> AutoGen多智能体插件测试版 <-> 修复本地模型在Windows下的加载BUG <-> 支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮" } From a64d5500450d0bad901f26e4493320d397fb9915 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 30 Nov 2023 23:23:54 +0800 Subject: [PATCH 26/88] =?UTF-8?q?=E4=BF=AE=E6=94=B9README=E4=B8=AD?= =?UTF-8?q?=E7=9A=84=E4=B8=80=E4=BA=9B=E6=8D=A2=E8=A1=8C=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 8102da8d..54bf7c1f 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ > > 2023.11.7: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目开源免费,近期发现有人蔑视开源协议并利用本项目违规圈钱,请提高警惕,谨防上当受骗。 -

+

@@ -13,14 +13,13 @@ [![Github][Github-image]][Github-url] [![License][License-image]][License-url] - [![Releases][Releases-image]][Releases-url] [![Installation][Installation-image]][Installation-url] [![Wiki][Wiki-image]][Wiki-url] [![PR][PRs-image]][PRs-url] -[License-image]: https://img.shields.io/badge/LICENSE-GPL3.0-orange?&style=for-the-badge -[Github-image]: https://img.shields.io/badge/github-12100E.svg?&style=for-the-badge&logo=github&logoColor=white +[License-image]: https://img.shields.io/badge/LICENSE-GPL3.0-orange?&style=flat-square +[Github-image]: https://img.shields.io/badge/github-12100E.svg?&style=flat-square [Releases-image]: https://img.shields.io/badge/Releases-v3.6.0-blue?style=flat-square [Installation-image]: https://img.shields.io/badge/Installation-v3.6.1-blue?style=flat-square [Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square @@ -35,14 +34,14 @@

+
**如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或插件,欢迎发pull requests!** If you like this project, please give it a Star. Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental). -

+
+ -> **Note** -> > 1.请注意只有 **高亮** 标识的插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。 > > 2.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 @@ -52,8 +51,6 @@ If you like this project, please give it a Star. Read this in [English](docs/REA

-# Features Overview -
功能(⭐= 近期新增功能) | 描述 @@ -118,6 +115,8 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼
+

+ # Installation ### 安装方法I:直接运行 (Windows, Linux or MacOS) @@ -224,6 +223,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 - 使用WSL2(Windows Subsystem for Linux 子系统)。请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) - 如何在二级网址(如`http://localhost/subpath`)下运行。请访问[FastAPI运行说明](docs/WithFastapi.md) +

# Advanced Usage ### I:自定义新的便捷按钮(学术快捷键) @@ -250,6 +250,7 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。 详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 +

# Updates ### I:动态 From d8958da8cd0153a717a6585b3faf1d72bd6803ad Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:28:22 +0800 Subject: [PATCH 27/88] =?UTF-8?q?=E4=BF=AE=E6=94=B9Typo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functional.py b/crazy_functional.py index 3d4df718..3b8b9453 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -489,7 +489,7 @@ def get_crazy_functions(): }) from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF function_plugins.update({ - "Arixv论文精细翻译(输入arxivID)[需Latex]": { + "Arxiv论文精细翻译(输入arxivID)[需Latex]": { "Group": "学术", "Color": "stop", "AsButton": False, From 3d6ee5c755a1506489c2fb031aa9cdc275d2a737 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:29:45 +0800 Subject: [PATCH 28/88] =?UTF-8?q?=E8=BD=AC=E5=8C=96README=E5=BE=BD?= =?UTF-8?q?=E7=AB=A0=E4=B8=BA=E5=8A=A8=E6=80=81=E5=BE=BD=E7=AB=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将license、version、realease徽章都转化为动态徽章,减少README维护成本 --- README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 54bf7c1f..c0e0a836 100644 --- a/README.md +++ b/README.md @@ -18,15 +18,15 @@ [![Wiki][Wiki-image]][Wiki-url] [![PR][PRs-image]][PRs-url] -[License-image]: https://img.shields.io/badge/LICENSE-GPL3.0-orange?&style=flat-square -[Github-image]: https://img.shields.io/badge/github-12100E.svg?&style=flat-square -[Releases-image]: https://img.shields.io/badge/Releases-v3.6.0-blue?style=flat-square -[Installation-image]: https://img.shields.io/badge/Installation-v3.6.1-blue?style=flat-square +[Github-image]: https://img.shields.io/badge/github-12100E.svg?style=flat-square +[License-image]: https://img.shields.io/github/license/binary-husky/gpt_academic?label=License&style=flat-square&color=orange +[Releases-image]: https://img.shields.io/github/release/binary-husky/gpt_academic?label=Release&style=flat-square&color=blue +[Installation-image]: https://img.shields.io/badge/dynamic/json?color=blue&url=https://raw.githubusercontent.com/binary-husky/gpt_academic/master/version&query=$.version&label=Installation&style=flat-square [Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square [PRs-image]: https://img.shields.io/badge/PRs-welcome-pink?style=flat-square -[License-url]: https://github.com/binary-husky/gpt_academic/blob/master/LICENSE [Github-url]: https://github.com/binary-husky/gpt_academic +[License-url]: https://github.com/binary-husky/gpt_academic/blob/master/LICENSE [Releases-url]: https://github.com/binary-husky/gpt_academic/releases [Installation-url]: https://github.com/binary-husky/gpt_academic#installation [Wiki-url]: https://github.com/binary-husky/gpt_academic/wiki @@ -38,7 +38,9 @@ **如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或插件,欢迎发pull requests!** -If you like this project, please give it a Star. Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental). +If you like this project, please give it a Star. + +Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).
@@ -47,7 +49,7 @@ If you like this project, please give it a Star. Read this in [English](docs/REA > 2.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 > [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki)) > -> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 +> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交即可生效。

From e7f4c804eb5bf6a08a0b91eda74c4896cc8f0ab9 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:27:25 +0800 Subject: [PATCH 29/88] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=E5=88=86=E7=B1=BB=E5=90=8D=E7=A7=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将原有分类 “对话” 更名为 “对话&作图” --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index f170a2bb..f1d27289 100644 --- a/config.py +++ b/config.py @@ -82,7 +82,7 @@ MAX_RETRY = 2 # 插件分类默认选项 -DEFAULT_FN_GROUPS = ['对话', '编程', '学术', '智能体'] +DEFAULT_FN_GROUPS = ['对话&作图', '编程', '学术', '智能体'] # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 ) From e8dd3c02f2f22d72cadce87b86c9cbab73e8f488 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:30:25 +0800 Subject: [PATCH 30/88] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=E5=AF=B9=E5=BA=94=E7=9A=84=E5=88=86=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index 3b8b9453..8e786e6d 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -40,7 +40,7 @@ def get_crazy_functions(): function_plugins = { "虚空终端": { - "Group": "对话|编程|学术|智能体", + "Group": "对话&作图|编程|学术|智能体", "Color": "stop", "AsButton": True, "Function": HotReload(虚空终端) @@ -53,20 +53,20 @@ def get_crazy_functions(): "Function": HotReload(解析一个Python项目) }, "载入对话历史存档(先上传存档或输入路径)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "Info": "载入对话历史存档 | 输入参数为路径", "Function": HotReload(载入对话历史存档) }, "删除所有本地对话历史记录(谨慎操作)": { - "Group": "对话", + "Group": "对话&作图", "AsButton": False, "Info": "删除所有本地对话历史记录,谨慎操作 | 不需要输入参数", "Function": HotReload(删除所有本地对话历史记录) }, "清除所有缓存文件(谨慎操作)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, # 加入下拉菜单中 "Info": "清除所有缓存文件,谨慎操作 | 不需要输入参数", @@ -180,19 +180,19 @@ def get_crazy_functions(): "Function": HotReload(批量生成函数注释) }, "保存当前的对话": { - "Group": "对话", + "Group": "对话&作图", "AsButton": True, "Info": "保存当前的对话 | 不需要输入参数", "Function": HotReload(对话历史存档) }, "[多线程Demo]解析此项目本身(源码自译解)": { - "Group": "对话|编程", + "Group": "对话&作图|编程", "AsButton": False, # 加入下拉菜单中 "Info": "多线程解析并翻译此项目的源码 | 不需要输入参数", "Function": HotReload(解析项目本身) }, "历史上的今天": { - "Group": "对话", + "Group": "对话&作图", "AsButton": True, "Info": "查看历史上的今天事件 (这是一个面向开发者的插件Demo) | 不需要输入参数", "Function": HotReload(高阶功能模板函数) @@ -205,7 +205,7 @@ def get_crazy_functions(): "Function": HotReload(批量翻译PDF文档) }, "询问多个GPT模型": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": True, "Function": HotReload(同时问询) @@ -300,7 +300,7 @@ def get_crazy_functions(): from crazy_functions.联网的ChatGPT import 连接网络回答问题 function_plugins.update({ "连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, # 加入下拉菜单中 # "Info": "连接网络回答问题(需要访问谷歌)| 输入参数是一个问题", @@ -310,7 +310,7 @@ def get_crazy_functions(): from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题 function_plugins.update({ "连接网络回答问题(中文Bing版,输入问题后点击该插件)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, # 加入下拉菜单中 "Info": "连接网络回答问题(需要访问中文Bing)| 输入参数是一个问题", @@ -341,7 +341,7 @@ def get_crazy_functions(): from crazy_functions.询问多个大语言模型 import 同时问询_指定模型 function_plugins.update({ "询问多个GPT模型(手动指定询问哪些模型)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) @@ -357,7 +357,7 @@ def get_crazy_functions(): from crazy_functions.图片生成 import 图片生成_DALLE2, 图片生成_DALLE3 function_plugins.update({ "图片生成_DALLE2 (先切换模型到openai或api2d)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) @@ -368,7 +368,7 @@ def get_crazy_functions(): }) function_plugins.update({ "图片生成_DALLE3 (先切换模型到openai或api2d)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) @@ -385,7 +385,7 @@ def get_crazy_functions(): from crazy_functions.总结音视频 import 总结音视频 function_plugins.update({ "批量总结音视频(输入路径或上传压缩包)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "AdvancedArgs": True, @@ -402,7 +402,7 @@ def get_crazy_functions(): from crazy_functions.数学动画生成manim import 动画生成 function_plugins.update({ "数学动画生成(Manim)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "Info": "按照自然语言描述生成一个动画 | 输入参数是一段话", @@ -433,7 +433,7 @@ def get_crazy_functions(): from crazy_functions.Langchain知识库 import 知识库问答 function_plugins.update({ "构建知识库(先上传文件素材,再运行此插件)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "AdvancedArgs": True, @@ -449,7 +449,7 @@ def get_crazy_functions(): from crazy_functions.Langchain知识库 import 读取知识库作答 function_plugins.update({ "知识库问答(构建知识库后,再运行此插件)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "AdvancedArgs": True, @@ -465,7 +465,7 @@ def get_crazy_functions(): from crazy_functions.交互功能函数模板 import 交互功能模板函数 function_plugins.update({ "交互功能模板Demo函数(查找wallhaven.cc的壁纸)": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": False, "Function": HotReload(交互功能模板函数) @@ -527,7 +527,7 @@ def get_crazy_functions(): from crazy_functions.语音助手 import 语音助手 function_plugins.update({ "实时语音对话": { - "Group": "对话", + "Group": "对话&作图", "Color": "stop", "AsButton": True, "Info": "这是一个时刻聆听着的语音对话助手 | 没有输入参数", From ef12d4f754bd955431063d14963f25709c174f20 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:31:50 +0800 Subject: [PATCH 31/88] =?UTF-8?q?=E4=BF=AE=E6=94=B9dalle3=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E8=BE=93=E5=85=A5=E5=8C=BA=E6=8F=90=E7=A4=BA=E8=AF=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functional.py b/crazy_functional.py index 8e786e6d..dcf7f6b8 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -372,7 +372,7 @@ def get_crazy_functions(): "Color": "stop", "AsButton": False, "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) - "ArgsReminder": "在这里输入分辨率, 如1024x1024(默认),支持 1024x1024, 1792x1024, 1024x1792。如需生成高清图像,请输入 1024x1024-HD, 1792x1024-HD, 1024x1792-HD。", # 高级参数输入区的显示提示 + "ArgsReminder": "在这里输入自定义参数“分辨率-质量(可选)-风格(可选)”, 参数示例“1024x1024-hd-vivid” || 分辨率支持 1024x1024(默认)//1792x1024//1024x1792 || 质量支持 -standard(默认)//-hd || 风格支持 -vivid(默认)//-natural", # 高级参数输入区的显示提示 "Info": "使用DALLE3生成图片 | 输入参数字符串,提供图像的内容", "Function": HotReload(图片生成_DALLE3) }, From 6126024f2c94e6e56accd013736d4c0427e9596e Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:36:59 +0800 Subject: [PATCH 32/88] =?UTF-8?q?dall-e-3=E6=B7=BB=E5=8A=A0=20'style'=20?= =?UTF-8?q?=E9=A3=8E=E6=A0=BC=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dall-e-3添加 'style' 风格参数(参考 platform.openai.com/doc/api-reference),修改dall-e-3作图时的参数判断逻辑 --- crazy_functions/图片生成.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/crazy_functions/图片生成.py b/crazy_functions/图片生成.py index 642a9e22..104d4034 100644 --- a/crazy_functions/图片生成.py +++ b/crazy_functions/图片生成.py @@ -2,7 +2,7 @@ from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log from crazy_functions.multi_stage.multi_stage_utils import GptAcademicState -def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", quality=None): +def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", quality=None, style=None): import requests, json, time, os from request_llms.bridge_all import model_info @@ -25,7 +25,10 @@ def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", qual 'model': model, 'response_format': 'url' } - if quality is not None: data.update({'quality': quality}) + if quality is not None: + data['quality'] = quality + if style is not None: + data['style'] = style response = requests.post(url, headers=headers, json=data, proxies=proxies) print(response.content) try: @@ -115,13 +118,18 @@ def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*或者api2d-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 .....")) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新 if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") - resolution = plugin_kwargs.get("advanced_arg", '1024x1024').lower() - if resolution.endswith('-hd'): - resolution = resolution.replace('-hd', '') - quality = 'hd' - else: - quality = 'standard' - image_url, image_path = gen_image(llm_kwargs, prompt, resolution, model="dall-e-3", quality=quality) + resolution_arg = plugin_kwargs.get("advanced_arg", '1024x1024-standard-vivid').lower() + parts = resolution_arg.split('-') + resolution = parts[0] # 解析分辨率 + quality = 'standard' # 质量与风格默认值 + style = 'vivid' + # 遍历检查是否有额外参数 + for part in parts[1:]: + if part in ['hd', 'standard']: + quality = part + elif part in ['vivid', 'natural']: + style = part + image_url, image_path = gen_image(llm_kwargs, prompt, resolution, model="dall-e-3", quality=quality, style=style) chatbot.append([prompt, f'图像中转网址:
`{image_url}`
'+ f'中转网址预览:
' @@ -201,4 +209,3 @@ def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys f'本地文件预览:
' ]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新 - From 1134723c80cb9a68bd60a84e28b666f8cbe8be3e Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:40:11 +0800 Subject: [PATCH 33/88] =?UTF-8?q?=E4=BF=AE=E6=94=B9docs=E4=B8=AD=E6=8F=92?= =?UTF-8?q?=E4=BB=B6=E5=88=86=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/translate_english.json | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/translate_english.json b/docs/translate_english.json index 955dcaf9..400ec972 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -2183,9 +2183,8 @@ "找不到合适插件执行该任务": "Cannot find a suitable plugin to perform this task", "接驳VoidTerminal": "Connect to VoidTerminal", "**很好": "**Very good", - "对话|编程": "Conversation|Programming", - "对话|编程|学术": "Conversation|Programming|Academic", - "4. 建议使用 GPT3.5 或更强的模型": "4. It is recommended to use GPT3.5 or a stronger model", + "对话&作图|编程": "Conversation&ImageGenerating|Programming", + "对话&作图|编程|学术": "Conversation&ImageGenerating|Programming|Academic", "4. 建议使用 GPT3.5 或更强的模型": "4. It is recommended to use GPT3.5 or a stronger model", "「请调用插件翻译PDF论文": "Please call the plugin to translate the PDF paper", "3. 如果您使用「调用插件xxx」、「修改配置xxx」、「请问」等关键词": "3. If you use keywords such as 'call plugin xxx', 'modify configuration xxx', 'please', etc.", "以下是一篇学术论文的基本信息": "The following is the basic information of an academic paper", @@ -2630,7 +2629,7 @@ "已经被记忆": "Already memorized", "默认用英文的": "Default to English", "错误追踪": "Error tracking", - "对话|编程|学术|智能体": "Dialogue|Programming|Academic|Intelligent agent", + "对话&编程|编程|学术|智能体": "Conversation&ImageGenerating|Programming|Academic|Intelligent agent", "请检查": "Please check", "检测到被滞留的缓存文档": "Detected cached documents being left behind", "还有哪些场合允许使用代理": "What other occasions allow the use of proxies", @@ -2904,4 +2903,4 @@ "请配置ZHIPUAI_API_KEY": "Please configure ZHIPUAI_API_KEY", "单个azure模型": "Single Azure model", "预留参数 context 未实现": "Reserved parameter 'context' not implemented" -} \ No newline at end of file +} From 2aab6cb708c6f82de3bd181bab0232da7eb3ed9c Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:50:20 +0800 Subject: [PATCH 34/88] =?UTF-8?q?=E4=BC=98=E5=8C=96=E9=83=A8=E5=88=86?= =?UTF-8?q?=E7=BF=BB=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/translate_english.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/translate_english.json b/docs/translate_english.json index 400ec972..bf09f66c 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -923,7 +923,7 @@ "的第": "The", "个片段": "fragment", "总结文章": "Summarize the article", - "根据以上的对话": "According to the above dialogue", + "根据以上的对话": "According to the conversation above", "的主要内容": "The main content of", "所有文件都总结完成了吗": "Are all files summarized?", "如果是.doc文件": "If it is a .doc file", @@ -1501,7 +1501,7 @@ "发送请求到OpenAI后": "After sending the request to OpenAI", "上下布局": "Vertical Layout", "左右布局": "Horizontal Layout", - "对话窗的高度": "Height of the Dialogue Window", + "对话窗的高度": "Height of the Conversation Window", "重试的次数限制": "Retry Limit", "gpt4现在只对申请成功的人开放": "GPT-4 is now only open to those who have successfully applied", "提高限制请查询": "Please check for higher limits", From d99b443b4cae5d7599b7060ae488ce14ab6d0a11 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:51:04 +0800 Subject: [PATCH 35/88] =?UTF-8?q?=E4=BC=98=E5=8C=96=E9=83=A8=E5=88=86?= =?UTF-8?q?=E7=BF=BB=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/translate_traditionalchinese.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/translate_traditionalchinese.json b/docs/translate_traditionalchinese.json index 9ca7cbaa..4edc65de 100644 --- a/docs/translate_traditionalchinese.json +++ b/docs/translate_traditionalchinese.json @@ -1043,9 +1043,9 @@ "jittorllms响应异常": "jittorllms response exception", "在项目根目录运行这两个指令": "Run these two commands in the project root directory", "获取tokenizer": "Get tokenizer", - "chatbot 为WebUI中显示的对话列表": "chatbot is the list of dialogues displayed in WebUI", + "chatbot 为WebUI中显示的对话列表": "chatbot is the list of conversations displayed in WebUI", "test_解析一个Cpp项目": "test_parse a Cpp project", - "将对话记录history以Markdown格式写入文件中": "Write the dialogue record history to a file in Markdown format", + "将对话记录history以Markdown格式写入文件中": "Write the conversations record history to a file in Markdown format", "装饰器函数": "Decorator function", "玫瑰色": "Rose color", "将单空行": "刪除單行空白", @@ -2270,4 +2270,4 @@ "标注节点的行数范围": "標註節點的行數範圍", "默认 True": "默認 True", "将两个PDF拼接": "將兩個PDF拼接" -} \ No newline at end of file +} From 498598624398ec75e231804e35c576583f12cd70 Mon Sep 17 00:00:00 2001 From: jlw463195935 <463195395@qq.com> Date: Fri, 1 Dec 2023 16:11:44 +0800 Subject: [PATCH 36/88] =?UTF-8?q?=E5=8A=A0=E5=85=A5=E4=BA=86int4=20int8?= =?UTF-8?q?=E9=87=8F=E5=8C=96=EF=BC=8C=E5=8A=A0=E5=85=A5=E9=BB=98=E8=AE=A4?= =?UTF-8?q?fp16=E5=8A=A0=E8=BD=BD=EF=BC=88in4=E5=92=8Cint8=E9=9C=80?= =?UTF-8?q?=E8=A6=81=E5=AE=89=E8=A3=85=E9=A2=9D=E5=A4=96=E7=9A=84=E5=BA=93?= =?UTF-8?q?=EF=BC=89=20=E8=A7=A3=E5=86=B3=E8=BF=9E=E7=BB=AD=E5=AF=B9?= =?UTF-8?q?=E8=AF=9Dtoken=E6=97=A0=E9=99=90=E5=A2=9E=E9=95=BF=E7=88=86?= =?UTF-8?q?=E6=98=BE=E5=AD=98=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 8 +++++ config.py | 4 ++- request_llms/bridge_deepseekcoder.py | 48 ++++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 54bf7c1f..e8893d67 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,14 @@ git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss # # 【可选步骤IV】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型,目前支持的全部模型如下(jittorllms系列目前仅支持docker方案): AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] + +# 【可选步骤V】支持本地模型INT8,INT4量化(模型本身不是量化版本,目前deepseek-coder支持,后面测试后会加入更多模型量化选择) +pip install bitsandbyte +# windows用户安装bitsandbytes需要使用下面bitsandbytes-windows-webui +python -m pip install bitsandbytes --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui +pip install -U git+https://github.com/huggingface/transformers.git +pip install -U git+https://github.com/huggingface/accelerate.git +pip install peft ```

diff --git a/config.py b/config.py index f170a2bb..fcad051e 100644 --- a/config.py +++ b/config.py @@ -91,7 +91,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4", - "chatglm3", "moss", "claude-2"] + "chatglm3", "moss", "claude-2", "deepseekcoder"] # P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"] @@ -114,6 +114,8 @@ CHATGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda" LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本 +# 设置deepseekcoder运行时输入的最大token数(超过4096没有意义),对话过程爆显存可以适当调小 +MAX_INPUT_TOKEN_LENGTH = 2048 # 设置gradio的并行线程数(不需要修改) CONCURRENT_COUNT = 100 diff --git a/request_llms/bridge_deepseekcoder.py b/request_llms/bridge_deepseekcoder.py index 2242eec7..09bd0b38 100644 --- a/request_llms/bridge_deepseekcoder.py +++ b/request_llms/bridge_deepseekcoder.py @@ -6,7 +6,9 @@ from toolbox import ProxyNetworkActivate from toolbox import get_conf from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns from threading import Thread +import torch +MAX_INPUT_TOKEN_LENGTH = get_conf("MAX_INPUT_TOKEN_LENGTH") def download_huggingface_model(model_name, max_retry, local_dir): from huggingface_hub import snapshot_download for i in range(1, max_retry): @@ -36,9 +38,46 @@ class GetCoderLMHandle(LocalLLMHandle): # tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) self._streamer = TextIteratorStreamer(tokenizer) - model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + device_map = { + "transformer.word_embeddings": 0, + "transformer.word_embeddings_layernorm": 0, + "lm_head": 0, + "transformer.h": 0, + "transformer.ln_f": 0, + "model.embed_tokens": 0, + "model.layers": 0, + "model.norm": 0, + } + + # 检查量化配置 + quantization_type = get_conf('LOCAL_MODEL_QUANT') + if get_conf('LOCAL_MODEL_DEVICE') != 'cpu': - model = model.cuda() + if quantization_type == "INT8": + from transformers import BitsAndBytesConfig + # 使用 INT8 量化 + model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, load_in_8bit=True, + device_map=device_map) + elif quantization_type == "INT4": + from transformers import BitsAndBytesConfig + # 使用 INT4 量化 + bnb_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16 + ) + model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, + quantization_config=bnb_config, device_map=device_map) + else: + # 使用默认的 FP16 + model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, + torch_dtype=torch.bfloat16, device_map=device_map) + else: + # CPU 模式 + model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, + torch_dtype=torch.bfloat16) + return model, tokenizer def llm_stream_generator(self, **kwargs): @@ -54,7 +93,10 @@ class GetCoderLMHandle(LocalLLMHandle): query, max_length, top_p, temperature, history = adaptor(kwargs) history.append({ 'role': 'user', 'content': query}) messages = history - inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt").to(self._model.device) + inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt") + if inputs.shape[1] > MAX_INPUT_TOKEN_LENGTH: + inputs = inputs[:, -MAX_INPUT_TOKEN_LENGTH:] + inputs = inputs.to(self._model.device) generation_kwargs = dict( inputs=inputs, max_new_tokens=max_length, From 552219fd5a7a30e924d042b78f29547ced8c333c Mon Sep 17 00:00:00 2001 From: jlw463195935 <463195395@qq.com> Date: Fri, 1 Dec 2023 16:17:30 +0800 Subject: [PATCH 37/88] =?UTF-8?q?=E5=8A=A0=E5=85=A5=E4=BA=86int4=20int8?= =?UTF-8?q?=E9=87=8F=E5=8C=96=EF=BC=8C=E5=8A=A0=E5=85=A5=E9=BB=98=E8=AE=A4?= =?UTF-8?q?fp16=E5=8A=A0=E8=BD=BD=EF=BC=88in4=E5=92=8Cint8=E9=9C=80?= =?UTF-8?q?=E8=A6=81=E5=AE=89=E8=A3=85=E9=A2=9D=E5=A4=96=E7=9A=84=E5=BA=93?= =?UTF-8?q?=EF=BC=8C=E7=9B=AE=E5=89=8D=E5=8F=AA=E6=B5=8B=E8=AF=95=E5=8A=A0?= =?UTF-8?q?=E5=85=A5deepseek-coder=E6=A8=A1=E5=9E=8B=EF=BC=8C=E5=90=8E?= =?UTF-8?q?=E7=BB=AD=E6=B5=8B=E8=AF=95=E4=BC=9A=E5=8A=A0=E5=85=A5=E6=9B=B4?= =?UTF-8?q?=E5=A4=9A=EF=BC=89=20=E8=A7=A3=E5=86=B3deepseek-coder=E8=BF=9E?= =?UTF-8?q?=E7=BB=AD=E5=AF=B9=E8=AF=9Dtoken=E6=97=A0=E9=99=90=E5=A2=9E?= =?UTF-8?q?=E9=95=BF=E7=88=86=E6=98=BE=E5=AD=98=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e8893d67..fd0ec5c2 100644 --- a/README.md +++ b/README.md @@ -167,7 +167,7 @@ git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss # # 【可选步骤IV】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型,目前支持的全部模型如下(jittorllms系列目前仅支持docker方案): AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] -# 【可选步骤V】支持本地模型INT8,INT4量化(模型本身不是量化版本,目前deepseek-coder支持,后面测试后会加入更多模型量化选择) +# 【可选步骤V】支持本地模型INT8,INT4量化(这里所指的模型本身不是量化版本,目前deepseek-coder支持,后面测试后会加入更多模型量化选择) pip install bitsandbyte # windows用户安装bitsandbytes需要使用下面bitsandbytes-windows-webui python -m pip install bitsandbytes --prefer-binary --extra-index-url=https://jllllll.github.io/bitsandbytes-windows-webui From da376068e1ae93ef09ea7c0f6a79657a4b52e5fc Mon Sep 17 00:00:00 2001 From: Alpha <1526147838@qq.com> Date: Sat, 2 Dec 2023 21:31:59 +0800 Subject: [PATCH 38/88] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86qwen=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E6=9C=AC=E5=9C=B0=E6=A8=A1=E5=9E=8B=E6=97=B6=E5=80=99?= =?UTF-8?q?=E7=9A=84=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 4 ++-- request_llms/bridge_qwen.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config.py b/config.py index f170a2bb..4284cb85 100644 --- a/config.py +++ b/config.py @@ -91,10 +91,10 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4", - "chatglm3", "moss", "claude-2"] + "chatglm3", "moss", "claude-2","qwen"] # P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"] - +# 如果你需要使用Qwen的本地模型,比如qwen1.8b,那么还需要在request_llms\bridge_qwen.py设置一下模型的路径! # 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4" MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3" diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py index 85a4d80c..d8408d8f 100644 --- a/request_llms/bridge_qwen.py +++ b/request_llms/bridge_qwen.py @@ -30,7 +30,7 @@ class GetQwenLMHandle(LocalLLMHandle): from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig with ProxyNetworkActivate('Download_LLM'): - model_id = 'qwen/Qwen-7B-Chat' + model_id = 'qwen/Qwen-7B-Chat' #在这里更改路径,如果你已经下载好了的话,同时,别忘记tokenizer self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True) # use fp16 model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval() @@ -51,7 +51,7 @@ class GetQwenLMHandle(LocalLLMHandle): query, max_length, top_p, temperature, history = adaptor(kwargs) - for response in self._model.chat(self._tokenizer, query, history=history, stream=True): + for response in self._model.chat_stream(self._tokenizer, query, history=history): yield response def try_to_import_special_deps(self, **kwargs): From 94ab41d3c0f9ed7addc37f9a436ddd21e473ec2b Mon Sep 17 00:00:00 2001 From: Alpha <1526147838@qq.com> Date: Sat, 2 Dec 2023 23:12:25 +0800 Subject: [PATCH 39/88] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86qwen1.8b?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 261 +++++++++--------- config.py | 9 +- request_llms/bridge_all.py | 26 +- request_llms/bridge_qwen_1_8B.py | 67 +++++ .../{bridge_qwen.py => bridge_qwen_7B.py} | 2 +- tests/test_llms.py | 5 +- 6 files changed, 226 insertions(+), 144 deletions(-) create mode 100644 request_llms/bridge_qwen_1_8B.py rename request_llms/{bridge_qwen.py => bridge_qwen_7B.py} (99%) diff --git a/README.md b/README.md index 54bf7c1f..c7fb9c7c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ > **Caution** -> -> 2023.11.12: 某些依赖包尚不兼容python 3.12,推荐python 3.11。 -> +> +> 2023.11.12: 某些依赖包尚不兼容 python 3.12,推荐 python 3.11。 +> > 2023.11.7: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目开源免费,近期发现有人蔑视开源协议并利用本项目违规圈钱,请提高警惕,谨防上当受骗。
@@ -24,7 +24,6 @@ [Installation-image]: https://img.shields.io/badge/Installation-v3.6.1-blue?style=flat-square [Wiki-image]: https://img.shields.io/badge/wiki-项目文档-black?style=flat-square [PRs-image]: https://img.shields.io/badge/PRs-welcome-pink?style=flat-square - [License-url]: https://github.com/binary-husky/gpt_academic/blob/master/LICENSE [Github-url]: https://github.com/binary-husky/gpt_academic [Releases-url]: https://github.com/binary-husky/gpt_academic/releases @@ -32,65 +31,62 @@ [Wiki-url]: https://github.com/binary-husky/gpt_academic/wiki [PRs-url]: https://github.com/binary-husky/gpt_academic/pulls -

-**如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或插件,欢迎发pull requests!** +**如果喜欢这个项目,请给它一个 Star;如果您发明了好用的快捷键或插件,欢迎发 pull requests!** If you like this project, please give it a Star. Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).
- -> 1.请注意只有 **高亮** 标识的插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR。 +> 1.请注意只有 **高亮** 标识的插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的 PR。 > -> 2.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题请查阅wiki。 -> [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)]([https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki)) -> -> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 +> 2.本项目中每个文件的功能都在[自译解报告](https://github.com/binary-husky/gpt_academic/wiki/GPT‐Academic项目自译解报告)`self_analysis.md`详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用 GPT 重新生成项目的自我解析报告。常见问题请查阅 wiki。 +> [![常规安装方法](https://img.shields.io/static/v1?label=&message=常规安装方法&color=gray)](#installation) [![一键安装脚本](https://img.shields.io/static/v1?label=&message=一键安装脚本&color=gray)](https://github.com/binary-husky/gpt_academic/releases) [![配置说明](https://img.shields.io/static/v1?label=&message=配置说明&color=gray)](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) [![wiki](https://img.shields.io/static/v1?label=&message=wiki&color=gray)](<[https://github.com/binary-husky/gpt_academic/wiki/项目配置说明](https://github.com/binary-husky/gpt_academic/wiki)>) +> +> 3.本项目兼容并鼓励尝试国产大语言模型 ChatGLM 等。支持多个 api-key 共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。

-功能(⭐= 近期新增功能) | 描述 ---- | --- -⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B) | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, 通义千问[Qwen](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),上海AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf),[智谱API](https://open.bigmodel.cn/),DALLE3, [DeepseekCoder](https://coder.deepseek.com/) -润色、翻译、代码解释 | 一键润色、翻译、查找论文语法错误、解释代码 -[自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键 -模块化设计 | 支持自定义强大的[插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) -[程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [插件] 一键剖析Python/C/C++/Java/Lua/...项目树 或 [自我剖析](https://www.bilibili.com/video/BV1cj411A7VW) -读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [插件] 一键解读latex/pdf论文全文并生成摘要 -Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [插件] 一键翻译或润色latex论文 -批量注释生成 | [插件] 一键批量生成函数注释 -Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗?就是出自他的手笔 -chat分析报告生成 | [插件] 运行后自动生成总结汇报 -[PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [插件] PDF论文提取题目&摘要+翻译全文(多线程) -[Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [插件] 输入arxiv文章url即可一键翻译摘要+下载PDF -Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF -[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/) -互联网信息聚合+GPT | [插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时 -⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具 -⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机 -公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 -⭐AutoGen多智能体插件 | [插件] 借助微软AutoGen,探索多Agent的智能涌现可能! -启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 -[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)伺候的感觉一定会很不错吧? -⭐ChatGLM2微调模型 | 支持加载ChatGLM2微调模型,提供ChatGLM2微调辅助插件 -更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/) -⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip包 | 脱离GUI,在Python中直接调用本项目的所有函数插件(开发中) -⭐虚空终端插件 | [插件] 能够使用自然语言直接调度本项目其他插件 -更多新功能展示 (图像生成等) …… | 见本文档结尾处 …… +| 功能(⭐= 近期新增功能) | 描述 | +| ------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ⭐[接入新模型](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E5%88%87%E6%8D%A2%E6%A8%A1%E5%9E%8B) | 百度[千帆](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu)与文心一言, 通义千问[Qwen-7B](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary),通义千问[Qwen-1_8B](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary),上海 AI-Lab[书生](https://github.com/InternLM/InternLM),讯飞[星火](https://xinghuo.xfyun.cn/),[LLaMa2](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf),[智谱 API](https://open.bigmodel.cn/),DALLE3, [DeepseekCoder](https://coder.deepseek.com/) | +| 润色、翻译、代码解释 | 一键润色、翻译、查找论文语法错误、解释代码 | +| [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键 | +| 模块化设计 | 支持自定义强大的[插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) | +| [程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [插件] 一键剖析 Python/C/C++/Java/Lua/...项目树 或 [自我剖析](https://www.bilibili.com/video/BV1cj411A7VW) | +| 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [插件] 一键解读 latex/pdf 论文全文并生成摘要 | +| Latex 全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [插件] 一键翻译或润色 latex 论文 | +| 批量注释生成 | [插件] 一键批量生成函数注释 | +| Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [插件] 看到上面 5 种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗?就是出自他的手笔 | +| chat 分析报告生成 | [插件] 运行后自动生成总结汇报 | +| [PDF 论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [插件] PDF 论文提取题目&摘要+翻译全文(多线程) | +| [Arxiv 小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [插件] 输入 arxiv 文章 url 即可一键翻译摘要+下载 PDF | +| Latex 论文一键校对 | [插件] 仿 Grammarly 对 Latex 文章进行语法、拼写纠错+输出对照 PDF | +| [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [插件] 给定任意谷歌学术搜索页面 URL,让 gpt 帮你[写 relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/) | +| 互联网信息聚合+GPT | [插件] 一键[让 GPT 从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时 | +| ⭐Arxiv 论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [插件] 一键[以超高质量翻译 arxiv 论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具 | +| ⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机 | +| 公式/图片/表格显示 | 可以同时显示公式的[tex 形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 | +| ⭐AutoGen 多智能体插件 | [插件] 借助微软 AutoGen,探索多 Agent 的智能涌现可能! | +| 启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器 url 后面添加`/?__theme=dark`可以切换 dark 主题 | +| [多 LLM 模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被 GPT3.5、GPT4、[清华 ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦 MOSS](https://github.com/OpenLMLab/MOSS)伺候的感觉一定会很不错吧? | +| ⭐ChatGLM2 微调模型 | 支持加载 ChatGLM2 微调模型,提供 ChatGLM2 微调辅助插件 | +| 更多 LLM 模型接入,支持[huggingface 部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入 Newbing 接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古 α](https://openi.org.cn/pangu/) | +| ⭐[void-terminal](https://github.com/binary-husky/void-terminal) pip 包 | 脱离 GUI,在 Python 中直接调用本项目的所有函数插件(开发中) | +| ⭐ 虚空终端插件 | [插件] 能够使用自然语言直接调度本项目其他插件 | +| 更多新功能展示 (图像生成等) …… | 见本文档结尾处 …… | +
- -- 新界面(修改`config.py`中的LAYOUT选项即可实现“左右布局”和“上下布局”的切换) +- 新界面(修改`config.py`中的 LAYOUT 选项即可实现“左右布局”和“上下布局”的切换)
- -- 所有按钮都通过读取functional.py动态生成,可随意加自定义功能,解放剪贴板 +- 所有按钮都通过读取 functional.py 动态生成,可随意加自定义功能,解放剪贴板
@@ -100,12 +96,12 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼 -- 如果输出包含公式,会以tex形式和渲染形式同时显示,方便复制和阅读 +- 如果输出包含公式,会以 tex 形式和渲染形式同时显示,方便复制和阅读
-- 懒得看项目代码?直接把整个工程炫ChatGPT嘴里 +- 懒得看项目代码?直接把整个工程炫 ChatGPT 嘴里
@@ -118,44 +114,44 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼

# Installation -### 安装方法I:直接运行 (Windows, Linux or MacOS) + +### 安装方法 I:直接运行 (Windows, Linux or MacOS) 1. 下载项目 - ```sh - git clone --depth=1 https://github.com/binary-husky/gpt_academic.git - cd gpt_academic - ``` + ```sh + git clone --depth=1 https://github.com/binary-husky/gpt_academic.git + cd gpt_academic + ``` -2. 配置API_KEY等变量 +2. 配置 API_KEY 等变量 - 在`config.py`中,配置API KEY等变量。[特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1)、[Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 + 在`config.py`中,配置 API KEY 等变量。[特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1)、[Wiki-项目配置说明](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。 - 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解以上读取逻辑,我们强烈建议您在`config.py`同路径下创建一个名为`config_private.py`的新配置文件,并使用`config_private.py`配置项目,以确保更新或其他用户无法轻易查看您的私有配置 」。 - - 「 支持通过`环境变量`配置项目,环境变量的书写格式参考`docker-compose.yml`文件或者我们的[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。配置读取优先级: `环境变量` > `config_private.py` > `config.py` 」。 + 「 程序会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。如您能理解以上读取逻辑,我们强烈建议您在`config.py`同路径下创建一个名为`config_private.py`的新配置文件,并使用`config_private.py`配置项目,以确保更新或其他用户无法轻易查看您的私有配置 」。 + 「 支持通过`环境变量`配置项目,环境变量的书写格式参考`docker-compose.yml`文件或者我们的[Wiki 页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明)。配置读取优先级: `环境变量` > `config_private.py` > `config.py` 」。 3. 安装依赖 - ```sh - # (选择I: 如熟悉python, python推荐版本 3.9 ~ 3.11)备注:使用官方pip源或者阿里pip源, 临时换源方法:python -m pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ - python -m pip install -r requirements.txt - # (选择II: 使用Anaconda)步骤也是类似的 (https://www.bilibili.com/video/BV1rc411W7Dr): - conda create -n gptac_venv python=3.11 # 创建anaconda环境 - conda activate gptac_venv # 激活anaconda环境 - python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤 - ``` + ```sh + # (选择I: 如熟悉python, python推荐版本 3.9 ~ 3.11)备注:使用官方pip源或者阿里pip源, 临时换源方法:python -m pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ + python -m pip install -r requirements.txt + # (选择II: 使用Anaconda)步骤也是类似的 (https://www.bilibili.com/video/BV1rc411W7Dr): + conda create -n gptac_venv python=3.11 # 创建anaconda环境 + conda activate gptac_venv # 激活anaconda环境 + python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤 + ```
如果需要支持清华ChatGLM2/复旦MOSS/RWKV作为后端,请点击展开此处

-【可选步骤】如果需要支持清华ChatGLM2/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强): +【可选步骤】如果需要支持清华 ChatGLM2/复旦 MOSS 作为后端,需要额外安装更多依赖(前提条件:熟悉 Python + 用过 Pytorch + 电脑配置够强): ```sh # 【可选步骤I】支持清华ChatGLM2。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True) -python -m pip install -r request_llms/requirements_chatglm.txt +python -m pip install -r request_llms/requirements_chatglm.txt # 【可选步骤II】支持复旦MOSS python -m pip install -r request_llms/requirements_moss.txt @@ -171,61 +167,60 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-

- - 4. 运行 - ```sh - python main.py - ``` + ```sh + python main.py + ``` -### 安装方法II:使用Docker +### 安装方法 II:使用 Docker -0. 部署项目的全部能力(这个是包含cuda和latex的大型镜像。但如果您网速慢、硬盘小,则不推荐该方法部署完整项目) -[![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml) +0. 部署项目的全部能力(这个是包含 cuda 和 latex 的大型镜像。但如果您网速慢、硬盘小,则不推荐该方法部署完整项目) + [![fullcapacity](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-all-capacity.yml) - ``` sh - # 修改docker-compose.yml,保留方案0并删除其他方案。然后运行: - docker-compose up - ``` + ``` sh + # 修改docker-compose.yml,保留方案0并删除其他方案。然后运行: + docker-compose up + ``` -1. 仅ChatGPT+文心一言+spark等在线模型(推荐大多数人选择) -[![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml) -[![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml) -[![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml) +1. 仅 ChatGPT+文心一言+spark 等在线模型(推荐大多数人选择) + [![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml) + [![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml) + [![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml) - ``` sh - # 修改docker-compose.yml,保留方案1并删除其他方案。然后运行: - docker-compose up - ``` + ``` sh + # 修改docker-compose.yml,保留方案1并删除其他方案。然后运行: + docker-compose up + ``` -P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用方案4或者方案0获取Latex功能。 +P.S. 如果需要依赖 Latex 的插件功能,请见 Wiki。另外,您也可以直接使用方案 4 或者方案 0 获取 Latex 功能。 -2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时) -[![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml) +2. ChatGPT + ChatGLM2 + MOSS + LLAMA2 + 通义千问(需要熟悉[Nvidia Docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#installing-on-ubuntu-and-debian)运行时) + [![chatglm](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-chatglm.yml) - ``` sh - # 修改docker-compose.yml,保留方案2并删除其他方案。然后运行: - docker-compose up - ``` + ``` sh + # 修改docker-compose.yml,保留方案2并删除其他方案。然后运行: + docker-compose up + ``` +### 安装方法 III:其他部署方法 -### 安装方法III:其他部署方法 -1. **Windows一键运行脚本**。 -完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。脚本贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。 +1. **Windows 一键运行脚本**。 + 完全不熟悉 python 环境的 Windows 用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。脚本贡献来源:[oobabooga](https://github.com/oobabooga/one-click-installers)。 -2. 使用第三方API、Azure等、文心一言、星火等,见[Wiki页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) +2. 使用第三方 API、Azure 等、文心一言、星火等,见[Wiki 页面](https://github.com/binary-husky/gpt_academic/wiki/项目配置说明) 3. 云服务器远程部署避坑指南。 -请访问[云服务器远程部署wiki](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) + 请访问[云服务器远程部署 wiki](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) 4. 在其他平台部署&二级网址部署 - - 使用Sealos[一键部署](https://github.com/binary-husky/gpt_academic/issues/993)。 - - 使用WSL2(Windows Subsystem for Linux 子系统)。请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) - - 如何在二级网址(如`http://localhost/subpath`)下运行。请访问[FastAPI运行说明](docs/WithFastapi.md) + - 使用 Sealos[一键部署](https://github.com/binary-husky/gpt_academic/issues/993)。 + - 使用 WSL2(Windows Subsystem for Linux 子系统)。请访问[部署 wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) + - 如何在二级网址(如`http://localhost/subpath`)下运行。请访问[FastAPI 运行说明](docs/WithFastapi.md)

# Advanced Usage + ### I:自定义新的便捷按钮(学术快捷键) 任意文本编辑器打开`core_functional.py`,添加如下条目,然后重启程序。(如果按钮已存在,那么可以直接修改(前缀、后缀都已支持热修改),无需重启程序即可生效。) @@ -234,8 +229,8 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 ```python "超级英译中": { # 前缀,会被加在你的输入之前。例如,用来描述你的要求,例如翻译、解释代码、润色等等 - "Prefix": "请翻译把下面一段内容成中文,然后用一个markdown表格逐一解释文中出现的专有名词:\n\n", - + "Prefix": "请翻译把下面一段内容成中文,然后用一个markdown表格逐一解释文中出现的专有名词:\n\n", + # 后缀,会被加在你的输入之后。例如,配合前缀可以把你的输入内容用引号圈起来。 "Suffix": "", }, @@ -246,23 +241,25 @@ P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以 ### II:自定义函数插件 + 编写强大的函数插件来执行任何你想得到的和想不到的任务。 -本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。 +本项目的插件编写、调试难度很低,只要您具备一定的 python 基础知识,就可以仿照我们提供的模板实现自己的插件功能。 详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。

# Updates + ### I:动态 -1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件, +1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的 html 文件, 另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。 -Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存。 +Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史 html 存档缓存。
-2. ⭐Latex/Arxiv论文翻译功能⭐ +2. ⭐Latex/Arxiv 论文翻译功能 ⭐
===> @@ -270,7 +267,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 3. 虚空终端(从自然语言输入中,理解用户意图+自动调用其他插件) -- 步骤一:输入 “ 请调用插件翻译PDF论文,地址为https://openreview.net/pdf?id=rJl0r3R9KX ” +- 步骤一:输入 “ 请调用插件翻译 PDF 论文,地址为https://openreview.net/pdf?id=rJl0r3R9KX ” - 步骤二:点击“虚空终端”
@@ -294,17 +291,17 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
-7. OpenAI图像生成 +7. OpenAI 图像生成
-8. OpenAI音频解析与总结 +8. OpenAI 音频解析与总结
-9. Latex全文校对纠错 +9. Latex 全文校对纠错
===> @@ -315,47 +312,46 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
- - ### II:版本: -- version 3.70(todo): 优化AutoGen插件主题并设计一系列衍生插件 -- version 3.60: 引入AutoGen作为新一代插件的基石 -- version 3.57: 支持GLM3,星火v3,文心一言v4,修复本地模型的并发BUG -- version 3.56: 支持动态追加基础功能按钮,新汇报PDF汇总页面 +- version 3.70(todo): 优化 AutoGen 插件主题并设计一系列衍生插件 +- version 3.60: 引入 AutoGen 作为新一代插件的基石 +- version 3.57: 支持 GLM3,星火 v3,文心一言 v4,修复本地模型的并发 BUG +- version 3.56: 支持动态追加基础功能按钮,新汇报 PDF 汇总页面 - version 3.55: 重构前端界面,引入悬浮窗口与菜单栏 - version 3.54: 新增动态代码解释器(Code Interpreter)(待完善) - version 3.53: 支持动态选择不同界面主题,提高稳定性&解决多用户冲突问题 -- version 3.50: 使用自然语言调用本项目的所有函数插件(虚空终端),支持插件分类,改进UI,设计新主题 +- version 3.50: 使用自然语言调用本项目的所有函数插件(虚空终端),支持插件分类,改进 UI,设计新主题 - version 3.49: 支持百度千帆平台和文心一言 -- version 3.48: 支持阿里达摩院通义千问,上海AI-Lab书生,讯飞星火 +- version 3.48: 支持阿里达摩院通义千问,上海 AI-Lab 书生,讯飞星火 - version 3.46: 支持完全脱手操作的实时语音对话 -- version 3.45: 支持自定义ChatGLM2微调模型 -- version 3.44: 正式支持Azure,优化界面易用性 -- version 3.4: +arxiv论文翻译、latex论文批改功能 +- version 3.45: 支持自定义 ChatGLM2 微调模型 +- version 3.44: 正式支持 Azure,优化界面易用性 +- version 3.4: +arxiv 论文翻译、latex 论文批改功能 - version 3.3: +互联网信息综合功能 -- version 3.2: 函数插件支持更多参数接口 (保存对话功能, 解读任意语言代码+同时询问任意的LLM组合) -- version 3.1: 支持同时问询多个gpt模型!支持api2d,支持多个apikey负载均衡 -- version 3.0: 对chatglm和其他小型llm的支持 +- version 3.2: 函数插件支持更多参数接口 (保存对话功能, 解读任意语言代码+同时询问任意的 LLM 组合) +- version 3.1: 支持同时问询多个 gpt 模型!支持 api2d,支持多个 apikey 负载均衡 +- version 3.0: 对 chatglm 和其他小型 llm 的支持 - version 2.6: 重构了插件结构,提高了交互性,加入更多插件 -- version 2.5: 自更新,解决总结大工程源代码时文本过长、token溢出的问题 -- version 2.4: 新增PDF全文翻译功能; 新增输入区切换位置的功能 +- version 2.5: 自更新,解决总结大工程源代码时文本过长、token 溢出的问题 +- version 2.4: 新增 PDF 全文翻译功能; 新增输入区切换位置的功能 - version 2.3: 增强多线程交互性 - version 2.2: 函数插件支持热重载 - version 2.1: 可折叠式布局 - version 2.0: 引入模块化函数插件 - version 1.0: 基础功能 -GPT Academic开发者QQ群:`610599535` +GPT Academic 开发者 QQ 群:`610599535` - 已知问题 - - 某些浏览器翻译插件干扰此软件前端的运行 - - 官方Gradio目前有很多兼容性问题,请**务必使用`requirement.txt`安装Gradio** + - 某些浏览器翻译插件干扰此软件前端的运行 + - 官方 Gradio 目前有很多兼容性问题,请**务必使用`requirement.txt`安装 Gradio** ### III:主题 -可以通过修改`THEME`选项(config.py)变更主题 -1. `Chuanhu-Small-and-Beautiful` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/) +可以通过修改`THEME`选项(config.py)变更主题 + +1. `Chuanhu-Small-and-Beautiful` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/) ### IV:本项目的开发分支 @@ -363,7 +359,6 @@ GPT Academic开发者QQ群:`610599535` 2. `frontier` 分支: 开发分支,测试版 3. 如何接入其他大模型:[接入其他大模型](request_llms/README.md) - ### V:参考与学习 ``` diff --git a/config.py b/config.py index 4284cb85..45365f5e 100644 --- a/config.py +++ b/config.py @@ -91,10 +91,10 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4", - "chatglm3", "moss", "claude-2","qwen"] -# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" + "chatglm3", "moss", "claude-2","qwen-1_8B","qwen-7B"] +# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"] -# 如果你需要使用Qwen的本地模型,比如qwen1.8b,那么还需要在request_llms\bridge_qwen.py设置一下模型的路径! +# 如果你需要使用Qwen的本地模型,比如qwen1.8b,那么还需要在request_llms下找到对应的文件,设置一下模型的路径! # 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4" MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3" @@ -291,7 +291,8 @@ NUM_CUSTOM_BASIC_BTN = 4 ├── "jittorllms_pangualpha" ├── "jittorllms_llama" ├── "deepseekcoder" -├── "qwen" +├── "qwen-1_8B" +├── "qwen-7B" ├── RWKV的支持见Wiki └── "llama2" diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 8dece548..f20ca651 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -431,12 +431,12 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS: }) except: print(trimmed_format_exc()) -if "qwen" in AVAIL_LLM_MODELS: +if "qwen-1_8B" in AVAIL_LLM_MODELS: # qwen-1.8B try: - from .bridge_qwen import predict_no_ui_long_connection as qwen_noui - from .bridge_qwen import predict as qwen_ui + from .bridge_qwen_1_8B import predict_no_ui_long_connection as qwen_noui + from .bridge_qwen_1_8B import predict as qwen_ui model_info.update({ - "qwen": { + "qwen-1_8B": { "fn_with_ui": qwen_ui, "fn_without_ui": qwen_noui, "endpoint": None, @@ -447,6 +447,24 @@ if "qwen" in AVAIL_LLM_MODELS: }) except: print(trimmed_format_exc()) + +if "qwen-7B" in AVAIL_LLM_MODELS: # qwen-7B + try: + from .bridge_qwen_7B import predict_no_ui_long_connection as qwen_noui + from .bridge_qwen_7B import predict as qwen_ui + model_info.update({ + "qwen-7B": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "endpoint": None, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + } + }) + except: + print(trimmed_format_exc()) + if "chatgpt_website" in AVAIL_LLM_MODELS: # 接入一些逆向工程https://github.com/acheong08/ChatGPT-to-API/ try: from .bridge_chatgpt_website import predict_no_ui_long_connection as chatgpt_website_noui diff --git a/request_llms/bridge_qwen_1_8B.py b/request_llms/bridge_qwen_1_8B.py new file mode 100644 index 00000000..06288305 --- /dev/null +++ b/request_llms/bridge_qwen_1_8B.py @@ -0,0 +1,67 @@ +model_name = "Qwen1_8B" +cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`" + + +from transformers import AutoModel, AutoTokenizer +import time +import threading +import importlib +from toolbox import update_ui, get_conf, ProxyNetworkActivate +from multiprocessing import Process, Pipe +from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns + + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 Local Model +# ------------------------------------------------------------------------------------------------------------------------ +class GetQwenLMHandle(LocalLLMHandle): + + def load_model_info(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + self.model_name = model_name + self.cmd_to_install = cmd_to_install + + def load_model_and_tokenizer(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + import os, glob + import os + import platform + from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig + + with ProxyNetworkActivate('Download_LLM'): + model_id = 'Qwen/Qwen-1_8B-Chat' + self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-1_8B-Chat', trust_remote_code=True, resume_download=True) + # use fp16 + model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval() + model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 + self._model = model + + return self._model, self._tokenizer + + def llm_stream_generator(self, **kwargs): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + def adaptor(kwargs): + query = kwargs['query'] + max_length = kwargs['max_length'] + top_p = kwargs['top_p'] + temperature = kwargs['temperature'] + history = kwargs['history'] + return query, max_length, top_p, temperature, history + + query, max_length, top_p, temperature, history = adaptor(kwargs) + + for response in self._model.chat_stream(self._tokenizer, query, history=history): + yield response + + def try_to_import_special_deps(self, **kwargs): + # import something that will raise error if the user does not install requirement_*.txt + # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行 + import importlib + importlib.import_module('modelscope') + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 GPT-Academic Interface +# ------------------------------------------------------------------------------------------------------------------------ +predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name) \ No newline at end of file diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen_7B.py similarity index 99% rename from request_llms/bridge_qwen.py rename to request_llms/bridge_qwen_7B.py index d8408d8f..dfe1fc44 100644 --- a/request_llms/bridge_qwen.py +++ b/request_llms/bridge_qwen_7B.py @@ -1,4 +1,4 @@ -model_name = "Qwen" +model_name = "Qwen-7B" cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`" diff --git a/tests/test_llms.py b/tests/test_llms.py index 8b685972..2426cc3d 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -16,8 +16,9 @@ if __name__ == "__main__": # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection # from request_llms.bridge_claude import predict_no_ui_long_connection # from request_llms.bridge_internlm import predict_no_ui_long_connection - from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection - # from request_llms.bridge_qwen import predict_no_ui_long_connection + # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection + # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection + from request_llms.bridge_qwen_1_8B import predict_no_ui_long_connection # from request_llms.bridge_spark import predict_no_ui_long_connection # from request_llms.bridge_zhipu import predict_no_ui_long_connection # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection From 0cd3274d04830aacd1f0e1e683f23665432013f7 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 4 Dec 2023 10:30:02 +0800 Subject: [PATCH 40/88] combine qwen model family --- config.py | 8 +++++-- .../{bridge_qwen_7B.py => bridge_qwen.py} | 24 +++++++------------ request_llms/requirements_qwen.txt | 4 +++- 3 files changed, 17 insertions(+), 19 deletions(-) rename request_llms/{bridge_qwen_7B.py => bridge_qwen.py} (77%) diff --git a/config.py b/config.py index f170a2bb..44e9f079 100644 --- a/config.py +++ b/config.py @@ -15,13 +15,13 @@ API_KEY = "此处填API密钥" # 可同时填写多个API-KEY,用英文逗 USE_PROXY = False if USE_PROXY: """ + 代理网络的地址,打开你的代理软件查看代理协议(socks5h / http)、地址(localhost)和端口(11284) 填写格式是 [协议]:// [地址] :[端口],填写之前不要忘记把USE_PROXY改成True,如果直接在海外服务器部署,此处不修改 <配置教程&视频教程> https://github.com/binary-husky/gpt_academic/issues/1> [协议] 常见协议无非socks5h/http; 例如 v2**y 和 ss* 的默认本地协议是socks5h; 而cl**h 的默认本地协议是http - [地址] 懂的都懂,不懂就填localhost或者127.0.0.1肯定错不了(localhost意思是代理软件安装在本机上) + [地址] 填localhost或者127.0.0.1(localhost意思是代理软件安装在本机上) [端口] 在代理软件的设置里找。虽然不同的代理软件界面不一样,但端口号都应该在最显眼的位置上 """ - # 代理网络的地址,打开你的*学*网软件查看代理的协议(socks5h / http)、地址(localhost)和端口(11284) proxies = { # [协议]:// [地址] :[端口] "http": "socks5h://localhost:11284", # 再例如 "http": "http://127.0.0.1:7890", @@ -100,6 +100,10 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3" +# 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用) +QWEN_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8" + + # 百度千帆(LLM_MODEL="qianfan") BAIDU_CLOUD_API_KEY = '' BAIDU_CLOUD_SECRET_KEY = '' diff --git a/request_llms/bridge_qwen_7B.py b/request_llms/bridge_qwen.py similarity index 77% rename from request_llms/bridge_qwen_7B.py rename to request_llms/bridge_qwen.py index dfe1fc44..1bd846be 100644 --- a/request_llms/bridge_qwen_7B.py +++ b/request_llms/bridge_qwen.py @@ -1,13 +1,7 @@ -model_name = "Qwen-7B" +model_name = "Qwen" cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`" - -from transformers import AutoModel, AutoTokenizer -import time -import threading -import importlib -from toolbox import update_ui, get_conf, ProxyNetworkActivate -from multiprocessing import Process, Pipe +from toolbox import ProxyNetworkActivate, get_conf from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns @@ -24,16 +18,14 @@ class GetQwenLMHandle(LocalLLMHandle): def load_model_and_tokenizer(self): # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 - import os, glob - import os - import platform - from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig - + # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig + from transformers import AutoModelForCausalLM, AutoTokenizer + from transformers.generation import GenerationConfig with ProxyNetworkActivate('Download_LLM'): - model_id = 'qwen/Qwen-7B-Chat' #在这里更改路径,如果你已经下载好了的话,同时,别忘记tokenizer - self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True) + model_id = get_conf('QWEN_MODEL_SELECTION') #在这里更改路径,如果你已经下载好了的话,同时,别忘记tokenizer + self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True) # use fp16 - model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval() + model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval() model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 self._model = model diff --git a/request_llms/requirements_qwen.txt b/request_llms/requirements_qwen.txt index 3d7d62a0..ea65dee7 100644 --- a/request_llms/requirements_qwen.txt +++ b/request_llms/requirements_qwen.txt @@ -1,2 +1,4 @@ modelscope -transformers_stream_generator \ No newline at end of file +transformers_stream_generator +auto-gptq +optimum \ No newline at end of file From 95504f0bb75a2835e168b033d5c945d58170a451 Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Mon, 4 Dec 2023 10:31:12 +0800 Subject: [PATCH 41/88] Resolve conflicts --- crazy_functions/图片生成.py | 74 +++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/crazy_functions/图片生成.py b/crazy_functions/图片生成.py index 104d4034..d5c4eb05 100644 --- a/crazy_functions/图片生成.py +++ b/crazy_functions/图片生成.py @@ -150,18 +150,27 @@ class ImageEditState(GptAcademicState): file = None if not confirm else file_manifest[0] return confirm, file + def lock_plugin(self, chatbot): + chatbot._cookies['lock_plugin'] = 'crazy_functions.图片生成->图片修改_DALLE2' + self.dump_state(chatbot) + + def unlock_plugin(self, chatbot): + self.reset() + chatbot._cookies['lock_plugin'] = None + self.dump_state(chatbot) + def get_resolution(self, x): return (x in ['256x256', '512x512', '1024x1024']), x - + def get_prompt(self, x): confirm = (len(x)>=5) and (not self.get_resolution(x)[0]) and (not self.get_image_file(x)[0]) return confirm, x - + def reset(self): self.req = [ - {'value':None, 'description': '请先上传图像(必须是.png格式), 然后再次点击本插件', 'verify_fn': self.get_image_file}, - {'value':None, 'description': '请输入分辨率,可选:256x256, 512x512 或 1024x1024', 'verify_fn': self.get_resolution}, - {'value':None, 'description': '请输入修改需求,建议您使用英文提示词', 'verify_fn': self.get_prompt}, + {'value':None, 'description': '请先上传图像(必须是.png格式), 然后再次点击本插件', 'verify_fn': self.get_image_file}, + {'value':None, 'description': '请输入分辨率,可选:256x256, 512x512 或 1024x1024, 然后再次点击本插件', 'verify_fn': self.get_resolution}, + {'value':None, 'description': '请输入修改需求,建议您使用英文提示词, 然后再次点击本插件', 'verify_fn': self.get_prompt}, ] self.info = "" @@ -171,7 +180,7 @@ class ImageEditState(GptAcademicState): confirm, res = r['verify_fn'](prompt) if confirm: r['value'] = res - self.set_state(chatbot, 'dummy_key', 'dummy_value') + self.dump_state(chatbot) break return self @@ -190,22 +199,63 @@ def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys history = [] # 清空历史 state = ImageEditState.get_state(chatbot, ImageEditState) state = state.feed(prompt, chatbot) + state.lock_plugin(chatbot) if not state.already_obtained_all_materials(): - chatbot.append(["图片修改(先上传图片,再输入修改需求,最后输入分辨率)", state.next_req()]) + chatbot.append(["图片修改\n\n1. 上传图片(图片中需要修改的位置用橡皮擦擦除为纯白色,即RGB=255,255,255)\n2. 输入分辨率 \n3. 输入修改需求", state.next_req()]) yield from update_ui(chatbot=chatbot, history=history) return - image_path = state.req[0] - resolution = state.req[1] - prompt = state.req[2] + image_path = state.req[0]['value'] + resolution = state.req[1]['value'] + prompt = state.req[2]['value'] chatbot.append(["图片修改, 执行中", f"图片:`{image_path}`
分辨率:`{resolution}`
修改需求:`{prompt}`"]) yield from update_ui(chatbot=chatbot, history=history) - image_url, image_path = edit_image(llm_kwargs, prompt, image_path, resolution) - chatbot.append([state.prompt, + chatbot.append([prompt, f'图像中转网址:
`{image_url}`
'+ f'中转网址预览:
' f'本地文件地址:
`{image_path}`
'+ f'本地文件预览:
' ]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新 + state.unlock_plugin(chatbot) + +def make_transparent(input_image_path, output_image_path): + from PIL import Image + image = Image.open(input_image_path) + image = image.convert("RGBA") + data = image.getdata() + new_data = [] + for item in data: + if item[0] == 255 and item[1] == 255 and item[2] == 255: + new_data.append((255, 255, 255, 0)) + else: + new_data.append(item) + image.putdata(new_data) + image.save(output_image_path, "PNG") + +def resize_image(input_path, output_path, max_size=1024): + from PIL import Image + with Image.open(input_path) as img: + width, height = img.size + if width > max_size or height > max_size: + if width >= height: + new_width = max_size + new_height = int((max_size / width) * height) + else: + new_height = max_size + new_width = int((max_size / height) * width) + + resized_img = img.resize(size=(new_width, new_height)) + resized_img.save(output_path) + else: + img.save(output_path) + +def make_square_image(input_path, output_path): + from PIL import Image + with Image.open(input_path) as img: + width, height = img.size + size = max(width, height) + new_img = Image.new("RGBA", (size, size), color="black") + new_img.paste(img, ((size - width) // 2, (size - height) // 2)) + new_img.save(output_path) From 3c03f240ba0b759b34e8d70d1bc9a6f7ba7791e3 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 4 Dec 2023 10:39:10 +0800 Subject: [PATCH 42/88] move token limit conf to bridge_all.py --- config.py | 5 +---- request_llms/bridge_all.py | 2 +- request_llms/bridge_deepseekcoder.py | 5 ++--- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/config.py b/config.py index fcad051e..87f736c4 100644 --- a/config.py +++ b/config.py @@ -91,7 +91,7 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4", - "chatglm3", "moss", "claude-2", "deepseekcoder"] + "chatglm3", "moss", "claude-2"] # P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" # "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"] @@ -114,9 +114,6 @@ CHATGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda" LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本 -# 设置deepseekcoder运行时输入的最大token数(超过4096没有意义),对话过程爆显存可以适当调小 -MAX_INPUT_TOKEN_LENGTH = 2048 - # 设置gradio的并行线程数(不需要修改) CONCURRENT_COUNT = 100 diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 8dece548..dcfeba92 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -552,7 +552,7 @@ if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder "fn_with_ui": deepseekcoder_ui, "fn_without_ui": deepseekcoder_noui, "endpoint": None, - "max_token": 4096, + "max_token": 2048, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, } diff --git a/request_llms/bridge_deepseekcoder.py b/request_llms/bridge_deepseekcoder.py index 09bd0b38..89964abe 100644 --- a/request_llms/bridge_deepseekcoder.py +++ b/request_llms/bridge_deepseekcoder.py @@ -8,7 +8,6 @@ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns from threading import Thread import torch -MAX_INPUT_TOKEN_LENGTH = get_conf("MAX_INPUT_TOKEN_LENGTH") def download_huggingface_model(model_name, max_retry, local_dir): from huggingface_hub import snapshot_download for i in range(1, max_retry): @@ -94,8 +93,8 @@ class GetCoderLMHandle(LocalLLMHandle): history.append({ 'role': 'user', 'content': query}) messages = history inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt") - if inputs.shape[1] > MAX_INPUT_TOKEN_LENGTH: - inputs = inputs[:, -MAX_INPUT_TOKEN_LENGTH:] + if inputs.shape[1] > max_length: + inputs = inputs[:, -max_length:] inputs = inputs.to(self._model.device) generation_kwargs = dict( inputs=inputs, From 692ff4b59cb94d7b31e9dd625935a701d8167daa Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 4 Dec 2023 10:47:07 +0800 Subject: [PATCH 43/88] remove line break --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index c0e0a836..81a82282 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,6 @@ **如果喜欢这个项目,请给它一个Star;如果您发明了好用的快捷键或插件,欢迎发pull requests!** If you like this project, please give it a Star. - Read this in [English](docs/README.English.md) | [日本語](docs/README.Japanese.md) | [한국어](docs/README.Korean.md) | [Русский](docs/README.Russian.md) | [Français](docs/README.French.md). All translations have been provided by the project itself. To translate this project to arbitrary language with GPT, read and run [`multi_language.py`](multi_language.py) (experimental).
From 6d2f1262533129770e9e9c78134fb9d2f739e3cc Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 4 Dec 2023 10:53:07 +0800 Subject: [PATCH 44/88] recv requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 94fac531..a5782f77 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ tiktoken>=0.3.3 requests[socks] pydantic==1.10.11 transformers>=4.27.1 -scipdf_parser +scipdf_parser>=0.52 python-markdown-math websocket-client beautifulsoup4 From b0c627909a386051a4e2abe98b17788ddb7d5587 Mon Sep 17 00:00:00 2001 From: Alpha <1526147838@qq.com> Date: Mon, 4 Dec 2023 12:51:41 +0800 Subject: [PATCH 45/88] =?UTF-8?q?=E6=9B=B4=E6=94=B9=E4=BA=86=E4=B8=80?= =?UTF-8?q?=E4=BA=9B=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 ++ request_llms/bridge_qwen.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 44e9f079..a5117245 100644 --- a/config.py +++ b/config.py @@ -101,6 +101,8 @@ MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3" # 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用) +# 如果你选择Qwen系列的模型,那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型 +# 也可以是具体的模型路径 QWEN_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8" diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py index 1bd846be..940c41d5 100644 --- a/request_llms/bridge_qwen.py +++ b/request_llms/bridge_qwen.py @@ -22,7 +22,7 @@ class GetQwenLMHandle(LocalLLMHandle): from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation import GenerationConfig with ProxyNetworkActivate('Download_LLM'): - model_id = get_conf('QWEN_MODEL_SELECTION') #在这里更改路径,如果你已经下载好了的话,同时,别忘记tokenizer + model_id = get_conf('QWEN_MODEL_SELECTION') self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True) # use fp16 model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval() From ec60a85cac6c68ce74b21cdd646f534a390a3e78 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 5 Dec 2023 00:15:17 +0800 Subject: [PATCH 46/88] new vector store establishment --- crazy_functional.py | 4 +- crazy_functions/crazy_utils.py | 85 +---- .../vector_fns/general_file_loader.py | 70 ++++ crazy_functions/vector_fns/vector_database.py | 349 ++++++++++++++++++ .../{Langchain知识库.py => 知识库问答.py} | 2 +- docs/translate_english.json | 2 +- docs/translate_japanese.json | 2 +- docs/translate_std.json | 2 +- docs/translate_traditionalchinese.json | 2 +- tests/test_plugins.py | 6 +- 10 files changed, 430 insertions(+), 94 deletions(-) create mode 100644 crazy_functions/vector_fns/general_file_loader.py create mode 100644 crazy_functions/vector_fns/vector_database.py rename crazy_functions/{Langchain知识库.py => 知识库问答.py} (98%) diff --git a/crazy_functional.py b/crazy_functional.py index c3ee50a2..0d665f18 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -440,7 +440,7 @@ def get_crazy_functions(): print('Load function plugin failed') try: - from crazy_functions.Langchain知识库 import 知识库问答 + from crazy_functions.知识库问答 import 知识库问答 function_plugins.update({ "构建知识库(先上传文件素材,再运行此插件)": { "Group": "对话", @@ -456,7 +456,7 @@ def get_crazy_functions(): print('Load function plugin failed') try: - from crazy_functions.Langchain知识库 import 读取知识库作答 + from crazy_functions.知识库问答 import 读取知识库作答 function_plugins.update({ "知识库问答(构建知识库后,再运行此插件)": { "Group": "对话", diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index afe079f4..9778053a 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -1,4 +1,4 @@ -from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token +from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton import threading import os import logging @@ -631,89 +631,6 @@ def get_files_from_everything(txt, type): # type='.md' - -def Singleton(cls): - _instance = {} - - def _singleton(*args, **kargs): - if cls not in _instance: - _instance[cls] = cls(*args, **kargs) - return _instance[cls] - - return _singleton - - -@Singleton -class knowledge_archive_interface(): - def __init__(self) -> None: - self.threadLock = threading.Lock() - self.current_id = "" - self.kai_path = None - self.qa_handle = None - self.text2vec_large_chinese = None - - def get_chinese_text2vec(self): - if self.text2vec_large_chinese is None: - # < -------------------预热文本向量化模组--------------- > - from toolbox import ProxyNetworkActivate - print('Checking Text2vec ...') - from langchain.embeddings.huggingface import HuggingFaceEmbeddings - with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络 - self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") - - return self.text2vec_large_chinese - - - def feed_archive(self, file_manifest, id="default"): - self.threadLock.acquire() - # import uuid - self.current_id = id - from zh_langchain import construct_vector_store - self.qa_handle, self.kai_path = construct_vector_store( - vs_id=self.current_id, - files=file_manifest, - sentence_size=100, - history=[], - one_conent="", - one_content_segmentation="", - text2vec = self.get_chinese_text2vec(), - ) - self.threadLock.release() - - def get_current_archive_id(self): - return self.current_id - - def get_loaded_file(self): - return self.qa_handle.get_loaded_file() - - def answer_with_archive_by_id(self, txt, id): - self.threadLock.acquire() - if not self.current_id == id: - self.current_id = id - from zh_langchain import construct_vector_store - self.qa_handle, self.kai_path = construct_vector_store( - vs_id=self.current_id, - files=[], - sentence_size=100, - history=[], - one_conent="", - one_content_segmentation="", - text2vec = self.get_chinese_text2vec(), - ) - VECTOR_SEARCH_SCORE_THRESHOLD = 0 - VECTOR_SEARCH_TOP_K = 4 - CHUNK_SIZE = 512 - resp, prompt = self.qa_handle.get_knowledge_based_conent_test( - query = txt, - vs_path = self.kai_path, - score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, - vector_search_top_k=VECTOR_SEARCH_TOP_K, - chunk_conent=True, - chunk_size=CHUNK_SIZE, - text2vec = self.get_chinese_text2vec(), - ) - self.threadLock.release() - return resp, prompt @Singleton class nougat_interface(): diff --git a/crazy_functions/vector_fns/general_file_loader.py b/crazy_functions/vector_fns/general_file_loader.py new file mode 100644 index 00000000..a512c483 --- /dev/null +++ b/crazy_functions/vector_fns/general_file_loader.py @@ -0,0 +1,70 @@ +# From project chatglm-langchain + + +from langchain.document_loaders import UnstructuredFileLoader +from langchain.text_splitter import CharacterTextSplitter +import re +from typing import List + +class ChineseTextSplitter(CharacterTextSplitter): + def __init__(self, pdf: bool = False, sentence_size: int = None, **kwargs): + super().__init__(**kwargs) + self.pdf = pdf + self.sentence_size = sentence_size + + def split_text1(self, text: str) -> List[str]: + if self.pdf: + text = re.sub(r"\n{3,}", "\n", text) + text = re.sub('\s', ' ', text) + text = text.replace("\n\n", "") + sent_sep_pattern = re.compile('([﹒﹔﹖﹗.。!?]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))') # del :; + sent_list = [] + for ele in sent_sep_pattern.split(text): + if sent_sep_pattern.match(ele) and sent_list: + sent_list[-1] += ele + elif ele: + sent_list.append(ele) + return sent_list + + def split_text(self, text: str) -> List[str]: ##此处需要进一步优化逻辑 + if self.pdf: + text = re.sub(r"\n{3,}", r"\n", text) + text = re.sub('\s', " ", text) + text = re.sub("\n\n", "", text) + + text = re.sub(r'([;;.!?。!?\?])([^”’])', r"\1\n\2", text) # 单字符断句符 + text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text) # 英文省略号 + text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text) # 中文省略号 + text = re.sub(r'([;;!?。!?\?]["’”」』]{0,2})([^;;!?,。!?\?])', r'\1\n\2', text) + # 如果双引号前有终止符,那么双引号才是句子的终点,把分句符\n放到双引号后,注意前面的几句都小心保留了双引号 + text = text.rstrip() # 段尾如果有多余的\n就去掉它 + # 很多规则中会考虑分号;,但是这里我把它忽略不计,破折号、英文双引号等同样忽略,需要的再做些简单调整即可。 + ls = [i for i in text.split("\n") if i] + for ele in ls: + if len(ele) > self.sentence_size: + ele1 = re.sub(r'([,,.]["’”」』]{0,2})([^,,.])', r'\1\n\2', ele) + ele1_ls = ele1.split("\n") + for ele_ele1 in ele1_ls: + if len(ele_ele1) > self.sentence_size: + ele_ele2 = re.sub(r'([\n]{1,}| {2,}["’”」』]{0,2})([^\s])', r'\1\n\2', ele_ele1) + ele2_ls = ele_ele2.split("\n") + for ele_ele2 in ele2_ls: + if len(ele_ele2) > self.sentence_size: + ele_ele3 = re.sub('( ["’”」』]{0,2})([^ ])', r'\1\n\2', ele_ele2) + ele2_id = ele2_ls.index(ele_ele2) + ele2_ls = ele2_ls[:ele2_id] + [i for i in ele_ele3.split("\n") if i] + ele2_ls[ + ele2_id + 1:] + ele_id = ele1_ls.index(ele_ele1) + ele1_ls = ele1_ls[:ele_id] + [i for i in ele2_ls if i] + ele1_ls[ele_id + 1:] + + id = ls.index(ele) + ls = ls[:id] + [i for i in ele1_ls if i] + ls[id + 1:] + return ls + +def load_file(filepath, sentence_size): + loader = UnstructuredFileLoader(filepath, mode="elements") + textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size) + docs = loader.load_and_split(text_splitter=textsplitter) + # write_check_file(filepath, docs) + return docs + diff --git a/crazy_functions/vector_fns/vector_database.py b/crazy_functions/vector_fns/vector_database.py new file mode 100644 index 00000000..2fa2cee5 --- /dev/null +++ b/crazy_functions/vector_fns/vector_database.py @@ -0,0 +1,349 @@ +# From project chatglm-langchain + +import threading +from toolbox import Singleton +import os +import shutil +import os +import uuid +import tqdm +from langchain.vectorstores import FAISS +from langchain.docstore.document import Document +from typing import List, Tuple +import numpy as np +from crazy_functions.vector_fns.general_file_loader import load_file + +embedding_model_dict = { + "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", + "ernie-base": "nghuyong/ernie-3.0-base-zh", + "text2vec-base": "shibing624/text2vec-base-chinese", + "text2vec": "GanymedeNil/text2vec-large-chinese", +} + +# Embedding model name +EMBEDDING_MODEL = "text2vec" + +# Embedding running device +EMBEDDING_DEVICE = "cpu" + +VS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "vector_store") + +UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content") + +# 基于上下文的prompt模版,请务必保留"{question}"和"{context}" +PROMPT_TEMPLATE = """已知信息: +{context} + +根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}""" + +# 文本分句长度 +SENTENCE_SIZE = 100 + +# 匹配后单段上下文长度 +CHUNK_SIZE = 250 + +# LLM input history length +LLM_HISTORY_LEN = 3 + +# return top-k text chunk from vector store +VECTOR_SEARCH_TOP_K = 5 + +# 知识检索内容相关度 Score, 数值范围约为0-1100,如果为0,则不生效,经测试设置为小于500时,匹配结果更精准 +VECTOR_SEARCH_SCORE_THRESHOLD = 0 + +NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data") + +FLAG_USER_NAME = uuid.uuid4().hex + +# 是否开启跨域,默认为False,如果需要开启,请设置为True +# is open cross domain +OPEN_CROSS_DOMAIN = False + +def similarity_search_with_score_by_vector( + self, embedding: List[float], k: int = 4 +) -> List[Tuple[Document, float]]: + + def seperate_list(ls: List[int]) -> List[List[int]]: + lists = [] + ls1 = [ls[0]] + for i in range(1, len(ls)): + if ls[i - 1] + 1 == ls[i]: + ls1.append(ls[i]) + else: + lists.append(ls1) + ls1 = [ls[i]] + lists.append(ls1) + return lists + + scores, indices = self.index.search(np.array([embedding], dtype=np.float32), k) + docs = [] + id_set = set() + store_len = len(self.index_to_docstore_id) + for j, i in enumerate(indices[0]): + if i == -1 or 0 < self.score_threshold < scores[0][j]: + # This happens when not enough docs are returned. + continue + _id = self.index_to_docstore_id[i] + doc = self.docstore.search(_id) + if not self.chunk_conent: + if not isinstance(doc, Document): + raise ValueError(f"Could not find document for id {_id}, got {doc}") + doc.metadata["score"] = int(scores[0][j]) + docs.append(doc) + continue + id_set.add(i) + docs_len = len(doc.page_content) + for k in range(1, max(i, store_len - i)): + break_flag = False + for l in [i + k, i - k]: + if 0 <= l < len(self.index_to_docstore_id): + _id0 = self.index_to_docstore_id[l] + doc0 = self.docstore.search(_id0) + if docs_len + len(doc0.page_content) > self.chunk_size: + break_flag = True + break + elif doc0.metadata["source"] == doc.metadata["source"]: + docs_len += len(doc0.page_content) + id_set.add(l) + if break_flag: + break + if not self.chunk_conent: + return docs + if len(id_set) == 0 and self.score_threshold > 0: + return [] + id_list = sorted(list(id_set)) + id_lists = seperate_list(id_list) + for id_seq in id_lists: + for id in id_seq: + if id == id_seq[0]: + _id = self.index_to_docstore_id[id] + doc = self.docstore.search(_id) + else: + _id0 = self.index_to_docstore_id[id] + doc0 = self.docstore.search(_id0) + doc.page_content += " " + doc0.page_content + if not isinstance(doc, Document): + raise ValueError(f"Could not find document for id {_id}, got {doc}") + doc_score = min([scores[0][id] for id in [indices[0].tolist().index(i) for i in id_seq if i in indices[0]]]) + doc.metadata["score"] = int(doc_score) + docs.append(doc) + return docs + + +class LocalDocQA: + llm: object = None + embeddings: object = None + top_k: int = VECTOR_SEARCH_TOP_K + chunk_size: int = CHUNK_SIZE + chunk_conent: bool = True + score_threshold: int = VECTOR_SEARCH_SCORE_THRESHOLD + + def init_cfg(self, + top_k=VECTOR_SEARCH_TOP_K, + ): + + self.llm = None + self.top_k = top_k + + def init_knowledge_vector_store(self, + filepath, + vs_path: str or os.PathLike = None, + sentence_size=SENTENCE_SIZE, + text2vec=None): + loaded_files = [] + failed_files = [] + if isinstance(filepath, str): + if not os.path.exists(filepath): + print("路径不存在") + return None + elif os.path.isfile(filepath): + file = os.path.split(filepath)[-1] + try: + docs = load_file(filepath, sentence_size) + print(f"{file} 已成功加载") + loaded_files.append(filepath) + except Exception as e: + print(e) + print(f"{file} 未能成功加载") + return None + elif os.path.isdir(filepath): + docs = [] + for file in tqdm(os.listdir(filepath), desc="加载文件"): + fullfilepath = os.path.join(filepath, file) + try: + docs += load_file(fullfilepath, sentence_size) + loaded_files.append(fullfilepath) + except Exception as e: + print(e) + failed_files.append(file) + + if len(failed_files) > 0: + print("以下文件未能成功加载:") + for file in failed_files: + print(f"{file}\n") + + else: + docs = [] + for file in filepath: + try: + docs += load_file(file) + print(f"{file} 已成功加载") + loaded_files.append(file) + except Exception as e: + print(e) + print(f"{file} 未能成功加载") + + if len(docs) > 0: + print("文件加载完毕,正在生成向量库") + if vs_path and os.path.isdir(vs_path): + self.vector_store = FAISS.load_local(vs_path, text2vec) + self.vector_store.add_documents(docs) + else: + if not vs_path: assert False + self.vector_store = FAISS.from_documents(docs, text2vec) # docs 为Document列表 + + self.vector_store.save_local(vs_path) + return vs_path, loaded_files + else: + self.vector_store = FAISS.load_local(vs_path, text2vec) + return vs_path, loaded_files + + def get_loaded_file(self): + ds = self.vector_store.docstore + return set([ds._dict[k].metadata['source'].split(UPLOAD_ROOT_PATH)[-1] for k in ds._dict]) + + + # query 查询内容 + # vs_path 知识库路径 + # chunk_conent 是否启用上下文关联 + # score_threshold 搜索匹配score阈值 + # vector_search_top_k 搜索知识库内容条数,默认搜索5条结果 + # chunk_sizes 匹配单段内容的连接上下文长度 + def get_knowledge_based_conent_test(self, query, vs_path, chunk_conent, + score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, + vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_size=CHUNK_SIZE, + text2vec=None): + self.vector_store = FAISS.load_local(vs_path, text2vec) + self.vector_store.chunk_conent = chunk_conent + self.vector_store.score_threshold = score_threshold + self.vector_store.chunk_size = chunk_size + + embedding = self.vector_store.embedding_function(query) + related_docs_with_score = similarity_search_with_score_by_vector(self.vector_store, embedding, k=vector_search_top_k) + + if not related_docs_with_score: + response = {"query": query, + "source_documents": []} + return response, "" + # prompt = f"{query}. You should answer this question using information from following documents: \n\n" + prompt = f"{query}. 你必须利用以下文档中包含的信息回答这个问题: \n\n---\n\n" + prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)]) + prompt += "\n\n---\n\n" + prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + # print(prompt) + response = {"query": query, "source_documents": related_docs_with_score} + return response, prompt + + + + +def construct_vector_store(vs_id, files, sentence_size, history, one_conent, one_content_segmentation, text2vec): + for file in files: + assert os.path.exists(file), "输入文件不存在" + import nltk + if NLTK_DATA_PATH not in nltk.data.path: nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path + local_doc_qa = LocalDocQA() + local_doc_qa.init_cfg() + vs_path = os.path.join(VS_ROOT_PATH, vs_id) + filelist = [] + if not os.path.exists(os.path.join(UPLOAD_ROOT_PATH, vs_id)): + os.makedirs(os.path.join(UPLOAD_ROOT_PATH, vs_id)) + if isinstance(files, list): + for file in files: + file_name = file.name if not isinstance(file, str) else file + filename = os.path.split(file_name)[-1] + shutil.copyfile(file_name, os.path.join(UPLOAD_ROOT_PATH, vs_id, filename)) + filelist.append(os.path.join(UPLOAD_ROOT_PATH, vs_id, filename)) + vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, vs_path, sentence_size, text2vec) + else: + vs_path, loaded_files = local_doc_qa.one_knowledge_add(vs_path, files, one_conent, one_content_segmentation, + sentence_size, text2vec) + if len(loaded_files): + file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问" + else: + pass + # file_status = "文件未成功加载,请重新上传文件" + # print(file_status) + return local_doc_qa, vs_path + +@Singleton +class knowledge_archive_interface(): + def __init__(self) -> None: + self.threadLock = threading.Lock() + self.current_id = "" + self.kai_path = None + self.qa_handle = None + self.text2vec_large_chinese = None + + def get_chinese_text2vec(self): + if self.text2vec_large_chinese is None: + # < -------------------预热文本向量化模组--------------- > + from toolbox import ProxyNetworkActivate + print('Checking Text2vec ...') + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络 + self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") + + return self.text2vec_large_chinese + + + def feed_archive(self, file_manifest, id="default"): + self.threadLock.acquire() + # import uuid + self.current_id = id + from zh_langchain import construct_vector_store + self.qa_handle, self.kai_path = construct_vector_store( + vs_id=self.current_id, + files=file_manifest, + sentence_size=100, + history=[], + one_conent="", + one_content_segmentation="", + text2vec = self.get_chinese_text2vec(), + ) + self.threadLock.release() + + def get_current_archive_id(self): + return self.current_id + + def get_loaded_file(self): + return self.qa_handle.get_loaded_file() + + def answer_with_archive_by_id(self, txt, id): + self.threadLock.acquire() + if not self.current_id == id: + self.current_id = id + from zh_langchain import construct_vector_store + self.qa_handle, self.kai_path = construct_vector_store( + vs_id=self.current_id, + files=[], + sentence_size=100, + history=[], + one_conent="", + one_content_segmentation="", + text2vec = self.get_chinese_text2vec(), + ) + VECTOR_SEARCH_SCORE_THRESHOLD = 0 + VECTOR_SEARCH_TOP_K = 4 + CHUNK_SIZE = 512 + resp, prompt = self.qa_handle.get_knowledge_based_conent_test( + query = txt, + vs_path = self.kai_path, + score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, + vector_search_top_k=VECTOR_SEARCH_TOP_K, + chunk_conent=True, + chunk_size=CHUNK_SIZE, + text2vec = self.get_chinese_text2vec(), + ) + self.threadLock.release() + return resp, prompt \ No newline at end of file diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/知识库问答.py similarity index 98% rename from crazy_functions/Langchain知识库.py rename to crazy_functions/知识库问答.py index 8433895f..9d53848c 100644 --- a/crazy_functions/Langchain知识库.py +++ b/crazy_functions/知识库问答.py @@ -68,7 +68,7 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"]) # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id() - # chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答' + # chatbot._cookies['lock_plugin'] = 'crazy_functions.知识库问答->读取知识库作答' # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"]) chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 diff --git a/docs/translate_english.json b/docs/translate_english.json index 955dcaf9..622a1d6c 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -1666,7 +1666,7 @@ "连接bing搜索回答问题": "ConnectBingSearchAnswerQuestion", "联网的ChatGPT_bing版": "OnlineChatGPT_BingEdition", "Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage", - "Langchain知识库": "LangchainKnowledgeBase", + "知识库问答": "LangchainKnowledgeBase", "Latex英文纠错加PDF对比": "CorrectEnglishInLatexWithPDFComparison", "Latex输出PDF结果": "OutputPDFFromLatex", "Latex翻译中文并重新编译PDF": "TranslateChineseToEnglishInLatexAndRecompilePDF", diff --git a/docs/translate_japanese.json b/docs/translate_japanese.json index 2f80792c..2184d4a5 100644 --- a/docs/translate_japanese.json +++ b/docs/translate_japanese.json @@ -1487,7 +1487,7 @@ "数学动画生成manim": "GenerateMathematicalAnimationManim", "Markdown翻译指定语言": "TranslateMarkdownSpecifiedLanguage", "知识库问答": "KnowledgeBaseQuestionAnswer", - "Langchain知识库": "LangchainKnowledgeBase", + "知识库问答": "LangchainKnowledgeBase", "读取知识库作答": "ReadKnowledgeBaseAnswer", "交互功能模板函数": "InteractiveFunctionTemplateFunction", "交互功能函数模板": "InteractiveFunctionFunctionTemplate", diff --git a/docs/translate_std.json b/docs/translate_std.json index ee8b2c69..0c2a7bd6 100644 --- a/docs/translate_std.json +++ b/docs/translate_std.json @@ -15,7 +15,7 @@ "删除所有本地对话历史记录": "DeleteAllLocalConversationHistoryRecords", "批量Markdown翻译": "BatchTranslateMarkdown", "连接bing搜索回答问题": "ConnectBingSearchAnswerQuestion", - "Langchain知识库": "LangchainKnowledgeBase", + "知识库问答": "LangchainKnowledgeBase", "Latex输出PDF结果": "OutputPDFFromLatex", "把字符太少的块清除为回车": "ClearBlocksWithTooFewCharactersToNewline", "Latex精细分解与转化": "DecomposeAndConvertLatex", diff --git a/docs/translate_traditionalchinese.json b/docs/translate_traditionalchinese.json index 9ca7cbaa..f0338fca 100644 --- a/docs/translate_traditionalchinese.json +++ b/docs/translate_traditionalchinese.json @@ -1463,7 +1463,7 @@ "数学动画生成manim": "GenerateMathematicalAnimationsWithManim", "Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage", "知识库问答": "KnowledgeBaseQA", - "Langchain知识库": "LangchainKnowledgeBase", + "知识库问答": "LangchainKnowledgeBase", "读取知识库作答": "ReadKnowledgeBaseAndAnswerQuestions", "交互功能模板函数": "InteractiveFunctionTemplateFunctions", "交互功能函数模板": "InteractiveFunctionFunctionTemplates", diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 8470895d..aeefc19a 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -48,11 +48,11 @@ if __name__ == "__main__": # for lang in ["English", "French", "Japanese", "Korean", "Russian", "Italian", "German", "Portuguese", "Arabic"]: # plugin_test(plugin='crazy_functions.批量Markdown翻译->Markdown翻译指定语言', main_input="README.md", advanced_arg={"advanced_arg": lang}) - # plugin_test(plugin='crazy_functions.Langchain知识库->知识库问答', main_input="./") + # plugin_test(plugin='crazy_functions.知识库问答->知识库问答', main_input="./") - # plugin_test(plugin='crazy_functions.Langchain知识库->读取知识库作答', main_input="What is the installation method?") + # plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="What is the installation method?") - # plugin_test(plugin='crazy_functions.Langchain知识库->读取知识库作答', main_input="远程云服务器部署?") + # plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="远程云服务器部署?") # plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="2210.03629") From 49f3fcf2c08ca8ccad52784bda5b5816d0327b8b Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 5 Dec 2023 21:22:15 +0800 Subject: [PATCH 47/88] vector store external to internal --- crazy_functional.py | 6 +++--- crazy_functions/vector_fns/__init__.py | 0 crazy_functions/知识库问答.py | 16 ++++++++-------- docs/translate_english.json | 2 +- docs/translate_japanese.json | 2 +- docs/translate_std.json | 2 +- docs/translate_traditionalchinese.json | 2 +- tests/test_plugins.py | 6 +++--- tests/test_utils.py | 16 ++++++++-------- tests/test_vector_plugins.py | 17 +++++++++++++++++ 10 files changed, 43 insertions(+), 26 deletions(-) create mode 100644 crazy_functions/vector_fns/__init__.py create mode 100644 tests/test_vector_plugins.py diff --git a/crazy_functional.py b/crazy_functional.py index 0d665f18..0c1560f3 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -440,7 +440,7 @@ def get_crazy_functions(): print('Load function plugin failed') try: - from crazy_functions.知识库问答 import 知识库问答 + from crazy_functions.知识库问答 import 知识库文件注入 function_plugins.update({ "构建知识库(先上传文件素材,再运行此插件)": { "Group": "对话", @@ -448,7 +448,7 @@ def get_crazy_functions(): "AsButton": False, "AdvancedArgs": True, "ArgsReminder": "此处待注入的知识库名称id, 默认为default。文件进入知识库后可长期保存。可以通过再次调用本插件的方式,向知识库追加更多文档。", - "Function": HotReload(知识库问答) + "Function": HotReload(知识库文件注入) } }) except: @@ -458,7 +458,7 @@ def get_crazy_functions(): try: from crazy_functions.知识库问答 import 读取知识库作答 function_plugins.update({ - "知识库问答(构建知识库后,再运行此插件)": { + "知识库文件注入(构建知识库后,再运行此插件)": { "Group": "对话", "Color": "stop", "AsButton": False, diff --git a/crazy_functions/vector_fns/__init__.py b/crazy_functions/vector_fns/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/crazy_functions/知识库问答.py b/crazy_functions/知识库问答.py index 9d53848c..8521ca12 100644 --- a/crazy_functions/知识库问答.py +++ b/crazy_functions/知识库问答.py @@ -4,7 +4,7 @@ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_file @CatchException -def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): +def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): """ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行 @@ -25,9 +25,9 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro # resolve deps try: - from zh_langchain import construct_vector_store - from langchain.embeddings.huggingface import HuggingFaceEmbeddings - from .crazy_utils import knowledge_archive_interface + # from zh_langchain import construct_vector_store + # from langchain.embeddings.huggingface import HuggingFaceEmbeddings + from crazy_functions.vector_fns.vector_database import knowledge_archive_interface except Exception as e: chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 @@ -68,7 +68,7 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"]) # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id() - # chatbot._cookies['lock_plugin'] = 'crazy_functions.知识库问答->读取知识库作答' + # chatbot._cookies['lock_plugin'] = 'crazy_functions.知识库文件注入->读取知识库作答' # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"]) chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 @@ -77,9 +77,9 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1): # resolve deps try: - from zh_langchain import construct_vector_store - from langchain.embeddings.huggingface import HuggingFaceEmbeddings - from .crazy_utils import knowledge_archive_interface + # from zh_langchain import construct_vector_store + # from langchain.embeddings.huggingface import HuggingFaceEmbeddings + from crazy_functions.vector_fns.vector_database import knowledge_archive_interface except Exception as e: chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 diff --git a/docs/translate_english.json b/docs/translate_english.json index 622a1d6c..8221a1fb 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -1666,7 +1666,7 @@ "连接bing搜索回答问题": "ConnectBingSearchAnswerQuestion", "联网的ChatGPT_bing版": "OnlineChatGPT_BingEdition", "Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage", - "知识库问答": "LangchainKnowledgeBase", + "知识库文件注入": "LangchainKnowledgeBase", "Latex英文纠错加PDF对比": "CorrectEnglishInLatexWithPDFComparison", "Latex输出PDF结果": "OutputPDFFromLatex", "Latex翻译中文并重新编译PDF": "TranslateChineseToEnglishInLatexAndRecompilePDF", diff --git a/docs/translate_japanese.json b/docs/translate_japanese.json index 2184d4a5..820751a5 100644 --- a/docs/translate_japanese.json +++ b/docs/translate_japanese.json @@ -1487,7 +1487,7 @@ "数学动画生成manim": "GenerateMathematicalAnimationManim", "Markdown翻译指定语言": "TranslateMarkdownSpecifiedLanguage", "知识库问答": "KnowledgeBaseQuestionAnswer", - "知识库问答": "LangchainKnowledgeBase", + "知识库文件注入": "LangchainKnowledgeBase", "读取知识库作答": "ReadKnowledgeBaseAnswer", "交互功能模板函数": "InteractiveFunctionTemplateFunction", "交互功能函数模板": "InteractiveFunctionFunctionTemplate", diff --git a/docs/translate_std.json b/docs/translate_std.json index 0c2a7bd6..d286bac1 100644 --- a/docs/translate_std.json +++ b/docs/translate_std.json @@ -75,7 +75,7 @@ "解析docx": "ParseDocx", "解析源代码新": "ParsingSourceCodeNew", "总结音视频": "SummaryAudioVideo", - "知识库问答": "UpdateKnowledgeArchive", + "知识库文件注入": "UpdateKnowledgeArchive", "多文件润色": "ProofreadMultipleFiles", "多文件翻译": "TranslateMultipleFiles", "解析PDF": "ParsePDF", diff --git a/docs/translate_traditionalchinese.json b/docs/translate_traditionalchinese.json index f0338fca..586e0291 100644 --- a/docs/translate_traditionalchinese.json +++ b/docs/translate_traditionalchinese.json @@ -1463,7 +1463,7 @@ "数学动画生成manim": "GenerateMathematicalAnimationsWithManim", "Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage", "知识库问答": "KnowledgeBaseQA", - "知识库问答": "LangchainKnowledgeBase", + "知识库文件注入": "LangchainKnowledgeBase", "读取知识库作答": "ReadKnowledgeBaseAndAnswerQuestions", "交互功能模板函数": "InteractiveFunctionTemplateFunctions", "交互功能函数模板": "InteractiveFunctionFunctionTemplates", diff --git a/tests/test_plugins.py b/tests/test_plugins.py index aeefc19a..13ec259e 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -48,11 +48,11 @@ if __name__ == "__main__": # for lang in ["English", "French", "Japanese", "Korean", "Russian", "Italian", "German", "Portuguese", "Arabic"]: # plugin_test(plugin='crazy_functions.批量Markdown翻译->Markdown翻译指定语言', main_input="README.md", advanced_arg={"advanced_arg": lang}) - # plugin_test(plugin='crazy_functions.知识库问答->知识库问答', main_input="./") + # plugin_test(plugin='crazy_functions.知识库文件注入->知识库文件注入', main_input="./") - # plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="What is the installation method?") + # plugin_test(plugin='crazy_functions.知识库文件注入->读取知识库作答', main_input="What is the installation method?") - # plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="远程云服务器部署?") + # plugin_test(plugin='crazy_functions.知识库文件注入->读取知识库作答', main_input="远程云服务器部署?") # plugin_test(plugin='crazy_functions.Latex输出PDF结果->Latex翻译中文并重新编译PDF', main_input="2210.03629") diff --git a/tests/test_utils.py b/tests/test_utils.py index 1fdca1eb..346f58fa 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -49,14 +49,14 @@ class VoidTerminal(): pass vt = VoidTerminal() -vt.get_conf = silence_stdout_fn(get_conf) -vt.set_conf = silence_stdout_fn(set_conf) -vt.set_multi_conf = silence_stdout_fn(set_multi_conf) -vt.get_plugin_handle = silence_stdout_fn(get_plugin_handle) -vt.get_plugin_default_kwargs = silence_stdout_fn(get_plugin_default_kwargs) -vt.get_chat_handle = silence_stdout_fn(get_chat_handle) -vt.get_chat_default_kwargs = silence_stdout_fn(get_chat_default_kwargs) -vt.chat_to_markdown_str = chat_to_markdown_str +vt.get_conf = (get_conf) +vt.set_conf = (set_conf) +vt.set_multi_conf = (set_multi_conf) +vt.get_plugin_handle = (get_plugin_handle) +vt.get_plugin_default_kwargs = (get_plugin_default_kwargs) +vt.get_chat_handle = (get_chat_handle) +vt.get_chat_default_kwargs = (get_chat_default_kwargs) +vt.chat_to_markdown_str = (chat_to_markdown_str) proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \ vt.get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY') diff --git a/tests/test_vector_plugins.py b/tests/test_vector_plugins.py new file mode 100644 index 00000000..9b75463b --- /dev/null +++ b/tests/test_vector_plugins.py @@ -0,0 +1,17 @@ +""" +对项目中的各个插件进行测试。运行方法:直接运行 python tests/test_plugins.py +""" + + +import os, sys +def validate_path(): dir_name = os.path.dirname(__file__); root_dir_assume = os.path.abspath(dir_name + '/..'); os.chdir(root_dir_assume); sys.path.append(root_dir_assume) +validate_path() # 返回项目根路径 + +if __name__ == "__main__": + from tests.test_utils import plugin_test + + plugin_test(plugin='crazy_functions.知识库问答->知识库文件注入', main_input="./README.md") + + plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="What is the installation method?") + + plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="远程云服务器部署?") \ No newline at end of file From 8a6e96c369d87bd89f3d94d3d9097b68120ead84 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 5 Dec 2023 22:56:19 +0800 Subject: [PATCH 48/88] =?UTF-8?q?=E7=9F=A5=E8=AF=86=E5=BA=93=E6=8F=92?= =?UTF-8?q?=E4=BB=B6=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 12 +++++------ crazy_functions/vector_fns/vector_database.py | 2 -- crazy_functions/知识库问答.py | 21 +++++++++++-------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index f51befa1..fe9579b8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,13 +23,11 @@ RUN pip3 install -r requirements.txt # 装载项目文件,安装剩余依赖(必要) +RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu +RUN pip3 install langchain sentence-transformers unstructured[local-inference] faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk + +COPY .cache /root/.cache COPY . . RUN pip3 install -r requirements.txt - - -# 非必要步骤,用于预热模块(可以删除) -RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' - - # 启动(必要) -CMD ["python3", "-u", "main.py"] +CMD ["python3", "-u", "tests/test_vector_plugins.py"] diff --git a/crazy_functions/vector_fns/vector_database.py b/crazy_functions/vector_fns/vector_database.py index 2fa2cee5..098eb225 100644 --- a/crazy_functions/vector_fns/vector_database.py +++ b/crazy_functions/vector_fns/vector_database.py @@ -301,7 +301,6 @@ class knowledge_archive_interface(): self.threadLock.acquire() # import uuid self.current_id = id - from zh_langchain import construct_vector_store self.qa_handle, self.kai_path = construct_vector_store( vs_id=self.current_id, files=file_manifest, @@ -323,7 +322,6 @@ class knowledge_archive_interface(): self.threadLock.acquire() if not self.current_id == id: self.current_id = id - from zh_langchain import construct_vector_store self.qa_handle, self.kai_path = construct_vector_store( vs_id=self.current_id, files=[], diff --git a/crazy_functions/知识库问答.py b/crazy_functions/知识库问答.py index 8521ca12..30153282 100644 --- a/crazy_functions/知识库问答.py +++ b/crazy_functions/知识库问答.py @@ -1,7 +1,10 @@ from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything - +install_msg =""" +pip3 install torch --index-url https://download.pytorch.org/whl/cpu +pip3 install langchain sentence-transformers unstructured[local-inference] faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk +""" @CatchException def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): @@ -29,11 +32,11 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst # from langchain.embeddings.huggingface import HuggingFaceEmbeddings from crazy_functions.vector_fns.vector_database import knowledge_archive_interface except Exception as e: - chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) + chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - from .crazy_utils import try_install_deps - try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain']) - yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history) + # from .crazy_utils import try_install_deps + # try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain']) + # yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history) return # < --------------------读取文件--------------- > @@ -81,11 +84,11 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst # from langchain.embeddings.huggingface import HuggingFaceEmbeddings from crazy_functions.vector_fns.vector_database import knowledge_archive_interface except Exception as e: - chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) + chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - from .crazy_utils import try_install_deps - try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain']) - yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history) + # from .crazy_utils import try_install_deps + # try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain']) + # yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history) return # < ------------------- --------------- > From 7bac8f4bd3333ff80c2a63e5757446bf91313e53 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Wed, 6 Dec 2023 22:45:14 +0800 Subject: [PATCH 49/88] fix local vector store bug --- crazy_functions/vector_fns/vector_database.py | 66 ++++++++----------- crazy_functions/知识库问答.py | 13 ++-- tests/test_utils.py | 21 +++--- 3 files changed, 49 insertions(+), 51 deletions(-) diff --git a/crazy_functions/vector_fns/vector_database.py b/crazy_functions/vector_fns/vector_database.py index 098eb225..b256e702 100644 --- a/crazy_functions/vector_fns/vector_database.py +++ b/crazy_functions/vector_fns/vector_database.py @@ -26,10 +26,6 @@ EMBEDDING_MODEL = "text2vec" # Embedding running device EMBEDDING_DEVICE = "cpu" -VS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "vector_store") - -UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content") - # 基于上下文的prompt模版,请务必保留"{question}"和"{context}" PROMPT_TEMPLATE = """已知信息: {context} @@ -159,7 +155,7 @@ class LocalDocQA: elif os.path.isfile(filepath): file = os.path.split(filepath)[-1] try: - docs = load_file(filepath, sentence_size) + docs = load_file(filepath, SENTENCE_SIZE) print(f"{file} 已成功加载") loaded_files.append(filepath) except Exception as e: @@ -171,7 +167,7 @@ class LocalDocQA: for file in tqdm(os.listdir(filepath), desc="加载文件"): fullfilepath = os.path.join(filepath, file) try: - docs += load_file(fullfilepath, sentence_size) + docs += load_file(fullfilepath, SENTENCE_SIZE) loaded_files.append(fullfilepath) except Exception as e: print(e) @@ -185,21 +181,19 @@ class LocalDocQA: else: docs = [] for file in filepath: - try: - docs += load_file(file) - print(f"{file} 已成功加载") - loaded_files.append(file) - except Exception as e: - print(e) - print(f"{file} 未能成功加载") + docs += load_file(file, SENTENCE_SIZE) + print(f"{file} 已成功加载") + loaded_files.append(file) if len(docs) > 0: print("文件加载完毕,正在生成向量库") if vs_path and os.path.isdir(vs_path): - self.vector_store = FAISS.load_local(vs_path, text2vec) - self.vector_store.add_documents(docs) + try: + self.vector_store = FAISS.load_local(vs_path, text2vec) + self.vector_store.add_documents(docs) + except: + self.vector_store = FAISS.from_documents(docs, text2vec) else: - if not vs_path: assert False self.vector_store = FAISS.from_documents(docs, text2vec) # docs 为Document列表 self.vector_store.save_local(vs_path) @@ -208,9 +202,9 @@ class LocalDocQA: self.vector_store = FAISS.load_local(vs_path, text2vec) return vs_path, loaded_files - def get_loaded_file(self): + def get_loaded_file(self, vs_path): ds = self.vector_store.docstore - return set([ds._dict[k].metadata['source'].split(UPLOAD_ROOT_PATH)[-1] for k in ds._dict]) + return set([ds._dict[k].metadata['source'].split(vs_path)[-1] for k in ds._dict]) # query 查询内容 @@ -228,7 +222,7 @@ class LocalDocQA: self.vector_store.score_threshold = score_threshold self.vector_store.chunk_size = chunk_size - embedding = self.vector_store.embedding_function(query) + embedding = self.vector_store.embedding_function.embed_query(query) related_docs_with_score = similarity_search_with_score_by_vector(self.vector_store, embedding, k=vector_search_top_k) if not related_docs_with_score: @@ -247,27 +241,23 @@ class LocalDocQA: -def construct_vector_store(vs_id, files, sentence_size, history, one_conent, one_content_segmentation, text2vec): +def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_conent, one_content_segmentation, text2vec): for file in files: assert os.path.exists(file), "输入文件不存在" import nltk if NLTK_DATA_PATH not in nltk.data.path: nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path local_doc_qa = LocalDocQA() local_doc_qa.init_cfg() - vs_path = os.path.join(VS_ROOT_PATH, vs_id) filelist = [] - if not os.path.exists(os.path.join(UPLOAD_ROOT_PATH, vs_id)): - os.makedirs(os.path.join(UPLOAD_ROOT_PATH, vs_id)) - if isinstance(files, list): - for file in files: - file_name = file.name if not isinstance(file, str) else file - filename = os.path.split(file_name)[-1] - shutil.copyfile(file_name, os.path.join(UPLOAD_ROOT_PATH, vs_id, filename)) - filelist.append(os.path.join(UPLOAD_ROOT_PATH, vs_id, filename)) - vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, vs_path, sentence_size, text2vec) - else: - vs_path, loaded_files = local_doc_qa.one_knowledge_add(vs_path, files, one_conent, one_content_segmentation, - sentence_size, text2vec) + if not os.path.exists(os.path.join(vs_path, vs_id)): + os.makedirs(os.path.join(vs_path, vs_id)) + for file in files: + file_name = file.name if not isinstance(file, str) else file + filename = os.path.split(file_name)[-1] + shutil.copyfile(file_name, os.path.join(vs_path, vs_id, filename)) + filelist.append(os.path.join(vs_path, vs_id, filename)) + vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, os.path.join(vs_path, vs_id), sentence_size, text2vec) + if len(loaded_files): file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问" else: @@ -297,12 +287,13 @@ class knowledge_archive_interface(): return self.text2vec_large_chinese - def feed_archive(self, file_manifest, id="default"): + def feed_archive(self, file_manifest, vs_path, id="default"): self.threadLock.acquire() # import uuid self.current_id = id self.qa_handle, self.kai_path = construct_vector_store( vs_id=self.current_id, + vs_path=vs_path, files=file_manifest, sentence_size=100, history=[], @@ -315,15 +306,16 @@ class knowledge_archive_interface(): def get_current_archive_id(self): return self.current_id - def get_loaded_file(self): - return self.qa_handle.get_loaded_file() + def get_loaded_file(self, vs_path): + return self.qa_handle.get_loaded_file(vs_path) - def answer_with_archive_by_id(self, txt, id): + def answer_with_archive_by_id(self, txt, id, vs_path): self.threadLock.acquire() if not self.current_id == id: self.current_id = id self.qa_handle, self.kai_path = construct_vector_store( vs_id=self.current_id, + vs_path=vs_path, files=[], sentence_size=100, history=[], diff --git a/crazy_functions/知识库问答.py b/crazy_functions/知识库问答.py index 30153282..48988353 100644 --- a/crazy_functions/知识库问答.py +++ b/crazy_functions/知识库问答.py @@ -1,9 +1,10 @@ -from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg +from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg, get_log_folder, get_user from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything install_msg =""" pip3 install torch --index-url https://download.pytorch.org/whl/cpu -pip3 install langchain sentence-transformers unstructured[local-inference] faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk +pip3 install transformers --upgrade +pip3 install langchain sentence-transformers unstructured[all-docs] faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade """ @CatchException @@ -65,8 +66,9 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst print('Establishing knowledge archive ...') with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络 kai = knowledge_archive_interface() - kai.feed_archive(file_manifest=file_manifest, id=kai_id) - kai_files = kai.get_loaded_file() + vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store') + kai.feed_archive(file_manifest=file_manifest, vs_path=vs_path, id=kai_id) + kai_files = kai.get_loaded_file(vs_path=vs_path) kai_files = '
'.join(kai_files) # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"]) # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 @@ -96,7 +98,8 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") kai_id = plugin_kwargs.get("advanced_arg", 'default') - resp, prompt = kai.answer_with_archive_by_id(txt, kai_id) + vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store') + resp, prompt = kai.answer_with_archive_by_id(txt, kai_id, vs_path) chatbot.append((txt, f'[知识库 {kai_id}] ' + prompt)) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 diff --git a/tests/test_utils.py b/tests/test_utils.py index 346f58fa..c87908f6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -49,18 +49,18 @@ class VoidTerminal(): pass vt = VoidTerminal() -vt.get_conf = (get_conf) -vt.set_conf = (set_conf) -vt.set_multi_conf = (set_multi_conf) -vt.get_plugin_handle = (get_plugin_handle) -vt.get_plugin_default_kwargs = (get_plugin_default_kwargs) -vt.get_chat_handle = (get_chat_handle) -vt.get_chat_default_kwargs = (get_chat_default_kwargs) +vt.get_conf = silence_stdout_fn(get_conf) +vt.set_conf = silence_stdout_fn(set_conf) +vt.set_multi_conf = silence_stdout_fn(set_multi_conf) +vt.get_plugin_handle = silence_stdout_fn(get_plugin_handle) +vt.get_plugin_default_kwargs = silence_stdout_fn(get_plugin_default_kwargs) +vt.get_chat_handle = silence_stdout_fn(get_chat_handle) +vt.get_chat_default_kwargs = silence_stdout_fn(get_chat_default_kwargs) vt.chat_to_markdown_str = (chat_to_markdown_str) proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \ vt.get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY') -def plugin_test(main_input, plugin, advanced_arg=None): +def plugin_test(main_input, plugin, advanced_arg=None, debug=True): from rich.live import Live from rich.markdown import Markdown @@ -72,7 +72,10 @@ def plugin_test(main_input, plugin, advanced_arg=None): plugin_kwargs['main_input'] = main_input if advanced_arg is not None: plugin_kwargs['plugin_kwargs'] = advanced_arg - my_working_plugin = silence_stdout(plugin)(**plugin_kwargs) + if debug: + my_working_plugin = (plugin)(**plugin_kwargs) + else: + my_working_plugin = silence_stdout(plugin)(**plugin_kwargs) with Live(Markdown(""), auto_refresh=False, vertical_overflow="visible") as live: for cookies, chat, hist, msg in my_working_plugin: From 21bccf69d22cb7110822b5ea48cc02bb8632bc70 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 7 Dec 2023 21:29:41 +0800 Subject: [PATCH 50/88] add installation info --- crazy_functions/vector_fns/vector_database.py | 3 +-- crazy_functions/知识库问答.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/crazy_functions/vector_fns/vector_database.py b/crazy_functions/vector_fns/vector_database.py index b256e702..def2ccca 100644 --- a/crazy_functions/vector_fns/vector_database.py +++ b/crazy_functions/vector_fns/vector_database.py @@ -199,8 +199,7 @@ class LocalDocQA: self.vector_store.save_local(vs_path) return vs_path, loaded_files else: - self.vector_store = FAISS.load_local(vs_path, text2vec) - return vs_path, loaded_files + raise RuntimeError("文件加载失败,请检查文件格式是否正确") def get_loaded_file(self, vs_path): ds = self.vector_store.docstore diff --git a/crazy_functions/知识库问答.py b/crazy_functions/知识库问答.py index 48988353..b6ddb65b 100644 --- a/crazy_functions/知识库问答.py +++ b/crazy_functions/知识库问答.py @@ -2,9 +2,14 @@ from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_l from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything install_msg =""" -pip3 install torch --index-url https://download.pytorch.org/whl/cpu -pip3 install transformers --upgrade -pip3 install langchain sentence-transformers unstructured[all-docs] faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade + +1. python -m pip install torch --index-url https://download.pytorch.org/whl/cpu + +2. python -m pip install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade + +3. python -m pip install unstructured[all-docs] --upgrade + +4. python -c 'import nltk; nltk.download("punkt")' """ @CatchException From 892ccb14c76d2592fb05c17dd72d608333caec69 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 8 Dec 2023 00:18:04 +0800 Subject: [PATCH 51/88] =?UTF-8?q?=E4=BA=92=E5=8A=A8=E6=B8=B8=E6=88=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 14 ++ crazy_functions/game_fns/game_utils.py | 35 ++++ .../multi_stage/multi_stage_utils.py | 58 ++++- crazy_functions/互动小游戏.py | 198 +++++------------- 4 files changed, 153 insertions(+), 152 deletions(-) create mode 100644 crazy_functions/game_fns/game_utils.py diff --git a/crazy_functional.py b/crazy_functional.py index c3ee50a2..31766f0e 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -590,6 +590,20 @@ def get_crazy_functions(): print(trimmed_format_exc()) print('Load function plugin failed') + # try: + # from crazy_functions.互动小游戏 import 随机小游戏 + # function_plugins.update({ + # "随机小游戏": { + # "Group": "智能体", + # "Color": "stop", + # "AsButton": True, + # "Function": HotReload(随机小游戏) + # } + # }) + # except: + # print(trimmed_format_exc()) + # print('Load function plugin failed') + # try: # from crazy_functions.chatglm微调工具 import 微调数据集生成 # function_plugins.update({ diff --git a/crazy_functions/game_fns/game_utils.py b/crazy_functions/game_fns/game_utils.py new file mode 100644 index 00000000..09b6f7a9 --- /dev/null +++ b/crazy_functions/game_fns/game_utils.py @@ -0,0 +1,35 @@ + +from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError +from request_llms.bridge_all import predict_no_ui_long_connection +def get_code_block(reply): + import re + pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks + matches = re.findall(pattern, reply) # find all code blocks in text + if len(matches) == 1: + return "```" + matches[0] + "```" # code block + raise RuntimeError("GPT is not generating proper code.") + +def is_same_thing(a, b, llm_kwargs): + from pydantic import BaseModel, Field + class IsSameThing(BaseModel): + is_same_thing: bool = Field(description="determine whether two objects are same thing.", default=False) + + def run_gpt_fn(inputs, sys_prompt, history=[]): + return predict_no_ui_long_connection( + inputs=inputs, llm_kwargs=llm_kwargs, + history=history, sys_prompt=sys_prompt, observe_window=[] + ) + + gpt_json_io = GptJsonIO(IsSameThing) + inputs_01 = "Identity whether the user input and the target is the same thing: \n target object: {a} \n user input object: {b} \n\n\n".format(a=a, b=b) + inputs_01 += "\n\n\n Note that the user may describe the target object with a different language, e.g. cat and 猫 are the same thing." + analyze_res_cot_01 = run_gpt_fn(inputs_01, "", []) + + inputs_02 = inputs_01 + gpt_json_io.format_instructions + analyze_res = run_gpt_fn(inputs_02, "", [inputs_01, analyze_res_cot_01]) + + try: + res = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn) + return res.is_same_thing + except JsonStringError as e: + return False \ No newline at end of file diff --git a/crazy_functions/multi_stage/multi_stage_utils.py b/crazy_functions/multi_stage/multi_stage_utils.py index f85d35aa..1395e79f 100644 --- a/crazy_functions/multi_stage/multi_stage_utils.py +++ b/crazy_functions/multi_stage/multi_stage_utils.py @@ -1,6 +1,7 @@ from pydantic import BaseModel, Field from typing import List from toolbox import update_ui_lastest_msg, disable_auto_promotion +from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder from request_llms.bridge_all import predict_no_ui_long_connection from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError import time @@ -36,6 +37,57 @@ class GptAcademicState(): state.chatbot = chatbot return state -class GatherMaterials(): - def __init__(self, materials) -> None: - materials = ['image', 'prompt'] \ No newline at end of file + +class GptAcademicGameBaseState(): + """ + 1. first init: __init__ -> + """ + def init_game(self, chatbot, lock_plugin): + self.plugin_name = None + self.callback_fn = None + self.delete_game = False + self.step_cnt = 0 + + def lock_plugin(self, chatbot): + if self.callback_fn is None: + raise ValueError("callback_fn is None") + chatbot._cookies['lock_plugin'] = self.callback_fn + self.dump_state(chatbot) + + def get_plugin_name(self): + if self.plugin_name is None: + raise ValueError("plugin_name is None") + return self.plugin_name + + def dump_state(self, chatbot): + chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self) + + def set_state(self, chatbot, key, value): + setattr(self, key, value) + chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self) + + @staticmethod + def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True): + state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None) + if state is not None: + state = pickle.loads(state) + else: + state = cls() + state.init_game(chatbot, lock_plugin) + state.plugin_name = plugin_name + state.llm_kwargs = llm_kwargs + state.chatbot = chatbot + state.callback_fn = callback_fn + return state + + def continue_game(self, prompt, chatbot, history): + # 游戏主体 + yield from self.step(prompt, chatbot, history) + self.step_cnt += 1 + # 保存状态,收尾 + self.dump_state(chatbot) + # 如果游戏结束,清理 + if self.delete_game: + chatbot._cookies['lock_plugin'] = None + chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None + yield from update_ui(chatbot=chatbot, history=history) diff --git a/crazy_functions/互动小游戏.py b/crazy_functions/互动小游戏.py index e00ef32b..a6871b34 100644 --- a/crazy_functions/互动小游戏.py +++ b/crazy_functions/互动小游戏.py @@ -1,159 +1,59 @@ -from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder -from crazy_functions.multi_stage.multi_stage_utils import GptAcademicState +from toolbox import CatchException, update_ui, update_ui_lastest_msg +from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from request_llms.bridge_all import predict_no_ui_long_connection +from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing import random -class 小游戏(GptAcademicState): - def __init__(self): - self.need_game_reset = True - self.llm_kwargs = None - super().__init__() + +class MiniGame_ASCII_Art(GptAcademicGameBaseState): - def lock_plugin(self, chatbot): - chatbot._cookies['lock_plugin'] = 'crazy_functions.互动小游戏->谁是卧底' - self.dump_state(chatbot) - - def unlock_plugin(self, chatbot): - self.reset() - chatbot._cookies['lock_plugin'] = None - self.dump_state(chatbot) - - def set_state(self, chatbot, key, value): - return super().set_state(chatbot, key, value) - - def init_game(self, chatbot): - chatbot.get_cookies()['lock_plugin'] = '' - - def clean_up_game(self, chatbot): - chatbot.get_cookies()['lock_plugin'] = None - - def init_player(self): - pass - - def step(self, prompt, chatbot): - pass - - def continue_game(self, prompt, chatbot): - if self.need_game_reset: - self.need_game_reset = False - yield from self.init_game(chatbot) - yield from self.step(prompt, chatbot) - self.dump_state(chatbot) - yield update_ui(chatbot=chatbot, history=[]) - -class 小游戏_谁是卧底_玩家(): - def __init__(self, game_handle, card, llm_model, name) -> None: - self.game_handle = game_handle - self.card = card - self.name = name - self.is_out = False - self.llm_model = llm_model - self.is_human = llm_model == 'human' - self.what_player_has_spoken = [] - - def speek(self, content=None): - if content is None: - assert not self.is_human - speak_what = yield from + def step(self, prompt, chatbot, history): + if self.step_cnt == 0: + chatbot.append(["我画你猜(动物)", "请稍等..."]) else: - self.what_player_has_spoken.append(content) + if prompt.strip() == 'exit': + self.delete_game = True + yield from update_ui_lastest_msg(lastmsg=f"谜底是{self.obj},游戏结束。", chatbot=chatbot, history=history, delay=0.) + return + chatbot.append([prompt, ""]) + yield from update_ui(chatbot=chatbot, history=history) - def agi_speek(self): - inputs = f'please say something about {self.card}' - res = yield from request_gpt_model_in_new_thread_with_ui_alive( - inputs = inputs, - inputs_show_user=inputs, - llm_kwargs=self.game_handle.llm_kwargs, - chatbot=chatbot, - history=history, - sys_prompt=sys_prompt - ) - pass + if self.step_cnt == 0: + self.lock_plugin(chatbot) + self.cur_task = 'draw' - def vote(self, content=None): - if content is None: - assert not self.is_human - self.vote_who = yield from - else: - try: - self.vote_who = int(content) - except: - self.vote_who = None + if self.cur_task == 'draw': + avail_obj = ["狗","猫","鸟","鱼","老鼠","蛇"] + self.obj = random.choice(avail_obj) + inputs = "I want to play a game called Guess the ASCII art. You can draw the ASCII art and I will try to guess it. " + f"This time you draw a {self.obj}. Note that you must not indicate what you have draw in the text, and you should only produce the ASCII art wrapped by ```. " + raw_res = predict_no_ui_long_connection(inputs=inputs, llm_kwargs=self.llm_kwargs, history=[], sys_prompt="") + self.cur_task = 'identify user guess' + res = get_code_block(raw_res) + history += ['', f'the answer is {self.obj}', inputs, res] + yield from update_ui_lastest_msg(lastmsg=res, chatbot=chatbot, history=history, delay=0.) - def agi_vote(self): - pass - -class 小游戏_谁是卧底(小游戏): - def __init__(self): - self.game_phase = '发言' # 投票 - super().__init__() - - def init_game(self, chatbot): - self.n_players = 3 - self.n_ai_players = self.n_players - 1 - card = "橙子" - undercover_card = "橘子" - llm_model = self.llm_kwargs['llm_model'] - self.players = [ - 小游戏_谁是卧底(self, card, llm_model, str(i)) for i in range(self.n_players) - ] - - undercover = random.randint(0, self.n_players-1) - human = 0 - - self.players[undercover].card = undercover_card - self.players[human].llm_model = 'human' - super().init_game(chatbot) - - def who_is_out(self): - votes = {} - for player in self.players: - if player.is_out: continue - if player.vote is None: continue - if player.vote not in votes: votes[player.vote] = 0 - votes[player.vote] += 1 - max_votes = max(votes.values()) - players_with_max_votes = [player for player, vote_count in votes.items() if vote_count == max_votes] - for player in players_with_max_votes: - print('淘汰了', player.name) - player.is_out = True - return players_with_max_votes - - def step(self, prompt, chatbot): - - if self.game_phase == '发言': - for player in self.players: - if player.is_out: continue - if player.is_human: - player.speek(prompt) - else: - player.speek() - self.game_phase = '投票' - - elif self.game_phase == '投票': - for player in self.players: - if player.is_out: continue - if player.is_human: - player.vote(prompt) - else: - player.vote() - self.who_is_out() - if len([player for player in self.players if not player.is_out]) <= 2: - if sum([player for player in self.players if player.is_undercover]) == 1: - print('卧底获胜') - else: - print('平民获胜') - self.need_game_reset = True - self.game_phase = '发言' - - else: - raise RuntimeError - + elif self.cur_task == 'identify user guess': + if is_same_thing(self.obj, prompt, self.llm_kwargs): + self.delete_game = True + yield from update_ui_lastest_msg(lastmsg="你猜对了!", chatbot=chatbot, history=history, delay=0.) + else: + self.cur_task = 'identify user guess' + yield from update_ui_lastest_msg(lastmsg="猜错了,再试试,输入“exit”获取答案。", chatbot=chatbot, history=history, delay=0.) + @CatchException -def 谁是卧底(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): - # 尚未完成 - history = [] # 清空历史 - state = 小游戏_谁是卧底.get_state(chatbot, 小游戏_谁是卧底) - state.llm_kwargs = llm_kwargs - yield from state.continue_game(prompt, chatbot) +def 随机小游戏(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + # 清空历史 + history = [] + # 选择游戏 + cls = MiniGame_ASCII_Art + # 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化 + state = cls.sync_state(chatbot, + llm_kwargs, + cls, + plugin_name='MiniGame_ASCII_Art', + callback_fn='crazy_functions.互动小游戏->随机小游戏', + lock_plugin=True + ) + yield from state.continue_game(prompt, chatbot, history) From 0cb7dd5280081dbc2bb3548c1d8542e967602d9b Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 8 Dec 2023 22:22:01 +0800 Subject: [PATCH 52/88] test vector store on docker --- Dockerfile | 12 +++++---- check_proxy.py | 8 ++++++ crazy_functions/vector_fns/vector_database.py | 2 +- crazy_functions/知识库问答.py | 2 +- docs/GithubAction+NoLocal+Vectordb | 26 +++++++++++++++++++ 5 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 docs/GithubAction+NoLocal+Vectordb diff --git a/Dockerfile b/Dockerfile index fe9579b8..f51befa1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,11 +23,13 @@ RUN pip3 install -r requirements.txt # 装载项目文件,安装剩余依赖(必要) -RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu -RUN pip3 install langchain sentence-transformers unstructured[local-inference] faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk - -COPY .cache /root/.cache COPY . . RUN pip3 install -r requirements.txt + + +# 非必要步骤,用于预热模块(可以删除) +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + + # 启动(必要) -CMD ["python3", "-u", "tests/test_vector_plugins.py"] +CMD ["python3", "-u", "main.py"] diff --git a/check_proxy.py b/check_proxy.py index e2ba3f1a..2df81855 100644 --- a/check_proxy.py +++ b/check_proxy.py @@ -159,7 +159,15 @@ def warm_up_modules(): enc.encode("模块预热", disallowed_special=()) enc = model_info["gpt-4"]['tokenizer'] enc.encode("模块预热", disallowed_special=()) + +def warm_up_vectordb(): + print('正在执行一些模块的预热 ...') + from toolbox import ProxyNetworkActivate + with ProxyNetworkActivate("Warmup_Modules"): + import nltk + with ProxyNetworkActivate("Warmup_Modules"): nltk.download("punkt") + if __name__ == '__main__': import os os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染 diff --git a/crazy_functions/vector_fns/vector_database.py b/crazy_functions/vector_fns/vector_database.py index def2ccca..cffa22cf 100644 --- a/crazy_functions/vector_fns/vector_database.py +++ b/crazy_functions/vector_fns/vector_database.py @@ -242,7 +242,7 @@ class LocalDocQA: def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_conent, one_content_segmentation, text2vec): for file in files: - assert os.path.exists(file), "输入文件不存在" + assert os.path.exists(file), "输入文件不存在:" + file import nltk if NLTK_DATA_PATH not in nltk.data.path: nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path local_doc_qa = LocalDocQA() diff --git a/crazy_functions/知识库问答.py b/crazy_functions/知识库问答.py index b6ddb65b..e1cd00ca 100644 --- a/crazy_functions/知识库问答.py +++ b/crazy_functions/知识库问答.py @@ -51,7 +51,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst for sp in spl: _, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}') file_manifest += file_manifest_tmp - + if len(file_manifest) == 0: chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 diff --git a/docs/GithubAction+NoLocal+Vectordb b/docs/GithubAction+NoLocal+Vectordb new file mode 100644 index 00000000..98595e32 --- /dev/null +++ b/docs/GithubAction+NoLocal+Vectordb @@ -0,0 +1,26 @@ +# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM +# 如何构建: 先修改 `config.py`, 然后 docker build -t gpt-academic-nolocal-vs -f docs/GithubAction+NoLocal+Vectordb . +# 如何运行: docker run --rm -it --net=host gpt-academic-nolocal-vs +FROM python:3.11 + +# 指定路径 +WORKDIR /gpt + +# 装载项目文件 +COPY . . + +# 安装依赖 +RUN pip3 install -r requirements.txt + +# 安装知识库插件的额外依赖 +RUN apt-get update && apt-get install libgl1 -y +RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu +RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade +RUN pip3 install unstructured[all-docs] --upgrade + +# 可选步骤,用于预热模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' +RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()' + +# 启动 +CMD ["python3", "-u", "main.py"] From 916b2e8aa76f9bd680dfd9e385b8a1a68a4ddf3f Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sat, 9 Dec 2023 20:18:44 +0800 Subject: [PATCH 53/88] support azure in multi-lang translation --- docs/translate_english.json | 104 +++++++++++++++++++++++++++++++++++- multi_language.py | 22 ++++---- 2 files changed, 114 insertions(+), 12 deletions(-) diff --git a/docs/translate_english.json b/docs/translate_english.json index 955dcaf9..4c995cc8 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -2903,5 +2903,107 @@ "高优先级": "High priority", "请配置ZHIPUAI_API_KEY": "Please configure ZHIPUAI_API_KEY", "单个azure模型": "Single Azure model", - "预留参数 context 未实现": "Reserved parameter 'context' not implemented" + "预留参数 context 未实现": "Reserved parameter 'context' not implemented", + "在输入区输入临时API_KEY后提交": "Submit after entering temporary API_KEY in the input area", + "鸟": "Bird", + "图片中需要修改的位置用橡皮擦擦除为纯白色": "Erase the areas in the image that need to be modified with an eraser to pure white", + "└── PDF文档精准解析": "└── Accurate parsing of PDF documents", + "└── ALLOW_RESET_CONFIG 是否允许通过自然语言描述修改本页的配置": "└── ALLOW_RESET_CONFIG Whether to allow modifying the configuration of this page through natural language description", + "等待指令": "Waiting for instructions", + "不存在": "Does not exist", + "选择游戏": "Select game", + "本地大模型示意图": "Local large model diagram", + "无视此消息即可": "You can ignore this message", + "即RGB=255": "That is, RGB=255", + "如需追问": "If you have further questions", + "也可以是具体的模型路径": "It can also be a specific model path", + "才会起作用": "Will take effect", + "下载失败": "Download failed", + "网页刷新后失效": "Invalid after webpage refresh", + "crazy_functions.互动小游戏-": "crazy_functions.Interactive mini game-", + "右对齐": "Right alignment", + "您可以调用下拉菜单中的“LoadConversationHistoryArchive”还原当下的对话": "You can use the 'LoadConversationHistoryArchive' in the drop-down menu to restore the current conversation", + "左对齐": "Left alignment", + "使用默认的 FP16": "Use default FP16", + "一小时": "One hour", + "从而方便内存的释放": "Thus facilitating memory release", + "如何临时更换API_KEY": "How to temporarily change API_KEY", + "请输入 1024x1024-HD": "Please enter 1024x1024-HD", + "使用 INT8 量化": "Use INT8 quantization", + "3. 输入修改需求": "3. Enter modification requirements", + "刷新界面 由于请求gpt需要一段时间": "Refreshing the interface takes some time due to the request for gpt", + "随机小游戏": "Random mini game", + "那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型": "So please specify the specific model in QWEN_MODEL_SELECTION below", + "表值": "Table value", + "我画你猜": "I draw, you guess", + "狗": "Dog", + "2. 输入分辨率": "2. Enter resolution", + "鱼": "Fish", + "尚未完成": "Not yet completed", + "表头": "Table header", + "填localhost或者127.0.0.1": "Fill in localhost or 127.0.0.1", + "请上传jpg格式的图片": "Please upload images in jpg format", + "API_URL_REDIRECT填写格式是错误的": "The format of API_URL_REDIRECT is incorrect", + "├── RWKV的支持见Wiki": "Support for RWKV is available in the Wiki", + "如果中文Prompt效果不理想": "If the Chinese prompt is not effective", + "/SEAFILE_LOCAL/50503047/我的资料库/学位/paperlatex/aaai/Fu_8368_with_appendix": "/SEAFILE_LOCAL/50503047/My Library/Degree/paperlatex/aaai/Fu_8368_with_appendix", + "只有当AVAIL_LLM_MODELS包含了对应本地模型时": "Only when AVAIL_LLM_MODELS contains the corresponding local model", + "选择本地模型变体": "Choose the local model variant", + "如果您确信自己没填错": "If you are sure you haven't made a mistake", + "PyPDF2这个库有严重的内存泄露问题": "PyPDF2 library has serious memory leak issues", + "整理文件集合 输出消息": "Organize file collection and output message", + "没有检测到任何近期上传的图像文件": "No recently uploaded image files detected", + "游戏结束": "Game over", + "调用结束": "Call ended", + "猫": "Cat", + "请及时切换模型": "Please switch models in time", + "次中": "In the meantime", + "如需生成高清图像": "If you need to generate high-definition images", + "CPU 模式": "CPU mode", + "项目目录": "Project directory", + "动物": "Animal", + "居中对齐": "Center alignment", + "请注意拓展名需要小写": "Please note that the extension name needs to be lowercase", + "重试第": "Retry", + "实验性功能": "Experimental feature", + "猜错了": "Wrong guess", + "打开你的代理软件查看代理协议": "Open your proxy software to view the proxy agreement", + "您不需要再重复强调该文件的路径了": "You don't need to emphasize the file path again", + "请阅读": "Please read", + "请直接输入您的问题": "Please enter your question directly", + "API_URL_REDIRECT填错了": "API_URL_REDIRECT is filled incorrectly", + "谜底是": "The answer is", + "第一个模型": "The first model", + "你猜对了!": "You guessed it right!", + "已经接收到您上传的文件": "The file you uploaded has been received", + "您正在调用“图像生成”插件": "You are calling the 'Image Generation' plugin", + "刷新界面 界面更新": "Refresh the interface, interface update", + "如果之前已经初始化了游戏实例": "If the game instance has been initialized before", + "文件": "File", + "老鼠": "Mouse", + "列2": "Column 2", + "等待图片": "Waiting for image", + "使用 INT4 量化": "Use INT4 quantization", + "from crazy_functions.互动小游戏 import 随机小游戏": "TranslatedText", + "游戏主体": "TranslatedText", + "该模型不具备上下文对话能力": "TranslatedText", + "列3": "TranslatedText", + "清理": "TranslatedText", + "检查量化配置": "TranslatedText", + "如果游戏结束": "TranslatedText", + "蛇": "TranslatedText", + "则继续该实例;否则重新初始化": "TranslatedText", + "e.g. cat and 猫 are the same thing": "TranslatedText", + "第三个模型": "TranslatedText", + "如果你选择Qwen系列的模型": "TranslatedText", + "列4": "TranslatedText", + "输入“exit”获取答案": "TranslatedText", + "把它放到子进程中运行": "TranslatedText", + "列1": "TranslatedText", + "使用该模型需要额外依赖": "TranslatedText", + "再试试": "TranslatedText", + "1. 上传图片": "TranslatedText", + "保存状态": "TranslatedText", + "GPT-Academic对话存档": "TranslatedText", + "Arxiv论文精细翻译": "TranslatedText" } \ No newline at end of file diff --git a/multi_language.py b/multi_language.py index a20fb5af..a807dbd0 100644 --- a/multi_language.py +++ b/multi_language.py @@ -182,12 +182,12 @@ cached_translation = read_map_from_json(language=LANG) def trans(word_to_translate, language, special=False): if len(word_to_translate) == 0: return {} from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency - from toolbox import get_conf, ChatBotWithCookies - proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \ - get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY') + from toolbox import get_conf, ChatBotWithCookies, load_chat_cookies + + cookies = load_chat_cookies() llm_kwargs = { - 'api_key': API_KEY, - 'llm_model': LLM_MODEL, + 'api_key': cookies['api_key'], + 'llm_model': cookies['llm_model'], 'top_p':1.0, 'max_length': None, 'temperature':0.4, @@ -245,15 +245,15 @@ def trans(word_to_translate, language, special=False): def trans_json(word_to_translate, language, special=False): if len(word_to_translate) == 0: return {} from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency - from toolbox import get_conf, ChatBotWithCookies - proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \ - get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY') + from toolbox import get_conf, ChatBotWithCookies, load_chat_cookies + + cookies = load_chat_cookies() llm_kwargs = { - 'api_key': API_KEY, - 'llm_model': LLM_MODEL, + 'api_key': cookies['api_key'], + 'llm_model': cookies['llm_model'], 'top_p':1.0, 'max_length': None, - 'temperature':0.1, + 'temperature':0.4, } import random N_EACH_REQ = random.randint(16, 32) From 2f2b869efdd937ad6ad96f4bd1301fa15e5642e3 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sat, 9 Dec 2023 21:54:34 +0800 Subject: [PATCH 54/88] turn off plugin hot-reload by default --- config.py | 4 ++++ toolbox.py | 24 +++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/config.py b/config.py index dcd02cc8..3d809628 100644 --- a/config.py +++ b/config.py @@ -237,6 +237,10 @@ WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid", BLOCK_INVALID_APIKEY = False +# 启用插件热加载 +PLUGIN_HOT_RELOAD = False + + # 自定义按钮的最大数量限制 NUM_CUSTOM_BASIC_BTN = 4 diff --git a/toolbox.py b/toolbox.py index f830a3d7..8d910351 100644 --- a/toolbox.py +++ b/toolbox.py @@ -180,12 +180,15 @@ def HotReload(f): 最后,使用yield from语句返回重新加载过的函数,并在被装饰的函数上执行。 最终,装饰器函数返回内部函数。这个内部函数可以将函数的原始定义更新为最新版本,并执行函数的新版本。 """ - @wraps(f) - def decorated(*args, **kwargs): - fn_name = f.__name__ - f_hot_reload = getattr(importlib.reload(inspect.getmodule(f)), fn_name) - yield from f_hot_reload(*args, **kwargs) - return decorated + if get_conf('PLUGIN_HOT_RELOAD'): + @wraps(f) + def decorated(*args, **kwargs): + fn_name = f.__name__ + f_hot_reload = getattr(importlib.reload(inspect.getmodule(f)), fn_name) + yield from f_hot_reload(*args, **kwargs) + return decorated + else: + return f """ @@ -916,7 +919,14 @@ def read_single_conf_with_lru_cache(arg): @lru_cache(maxsize=128) def get_conf(*args): - # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 + """ + 本项目的所有配置都集中在config.py中。 修改配置有三种方法,您只需要选择其中一种即可: + - 直接修改config.py + - 创建并修改config_private.py + - 修改环境变量(修改docker-compose.yml等价于修改容器内部的环境变量) + + 注意:如果您使用docker-compose部署,请修改docker-compose(等价于修改容器内部的环境变量) + """ res = [] for arg in args: r = read_single_conf_with_lru_cache(arg) From fa374bf1fc3357ff0ddaed19785290dc377e85a1 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 11 Dec 2023 22:50:19 +0800 Subject: [PATCH 55/88] try full dockerfile with vector store --- .../build-with-all-capacity-beta.yml | 44 +++++++++++++++ docs/GithubAction+AllCapacityBeta | 53 +++++++++++++++++++ docs/GithubAction+NoLocal+Vectordb | 2 +- 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build-with-all-capacity-beta.yml create mode 100644 docs/GithubAction+AllCapacityBeta diff --git a/.github/workflows/build-with-all-capacity-beta.yml b/.github/workflows/build-with-all-capacity-beta.yml new file mode 100644 index 00000000..1f02fed8 --- /dev/null +++ b/.github/workflows/build-with-all-capacity-beta.yml @@ -0,0 +1,44 @@ +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages +name: build-with-all-capacity + +on: + push: + branches: + - 'master' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}_with_all_capacity_beta + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + push: true + file: docs/GithubAction+AllCapacityBeta + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/docs/GithubAction+AllCapacityBeta b/docs/GithubAction+AllCapacityBeta new file mode 100644 index 00000000..d3a06ee1 --- /dev/null +++ b/docs/GithubAction+AllCapacityBeta @@ -0,0 +1,53 @@ +# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacity --network=host --build-arg http_proxy=http://localhost:10881 --build-arg https_proxy=http://localhost:10881 . +# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacityBeta --network=host . +# docker run -it --net=host gpt-academic-all-capacity bash + +# 从NVIDIA源,从而支持显卡(检查宿主的nvidia-smi中的cuda版本必须>=11.3) +FROM fuqingxu/11.3.1-runtime-ubuntu20.04-with-texlive:latest + +# use python3 as the system default python +WORKDIR /gpt +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8 + +# # 非必要步骤,更换pip源 (以下三行,可以删除) +# RUN echo '[global]' > /etc/pip.conf && \ +# echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \ +# echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf + +# 下载pytorch +RUN python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113 +# 准备pip依赖 +RUN python3 -m pip install openai numpy arxiv rich +RUN python3 -m pip install colorama Markdown pygments pymupdf +RUN python3 -m pip install python-docx moviepy pdfminer +RUN python3 -m pip install zh_langchain==0.2.1 pypinyin +RUN python3 -m pip install rarfile py7zr +RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git +# 下载分支 +WORKDIR /gpt +RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git +WORKDIR /gpt/gpt_academic +RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss + +RUN python3 -m pip install -r requirements.txt +RUN python3 -m pip install -r request_llms/requirements_moss.txt +RUN python3 -m pip install -r request_llms/requirements_qwen.txt +RUN python3 -m pip install -r request_llms/requirements_chatglm.txt +RUN python3 -m pip install -r request_llms/requirements_newbing.txt +RUN python3 -m pip install nougat-ocr + +# 预热Tiktoken模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 安装知识库插件的额外依赖 +RUN apt-get update && apt-get install libgl1 -y +RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade +RUN pip3 install unstructured[all-docs] --upgrade +RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()' +RUN rm -rf /usr/local/lib/python3.8/dist-packages/tests + + +# COPY .cache /root/.cache +# COPY config_private.py config_private.py +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/docs/GithubAction+NoLocal+Vectordb b/docs/GithubAction+NoLocal+Vectordb index 98595e32..45074d93 100644 --- a/docs/GithubAction+NoLocal+Vectordb +++ b/docs/GithubAction+NoLocal+Vectordb @@ -17,10 +17,10 @@ RUN apt-get update && apt-get install libgl1 -y RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade RUN pip3 install unstructured[all-docs] --upgrade +RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()' # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' -RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()' # 启动 CMD ["python3", "-u", "main.py"] From 8c7569b689c57a633880e29770f8c8a65dd777aa Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 14 Dec 2023 11:00:55 +0800 Subject: [PATCH 56/88] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dprotobuf=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- request_llms/requirements_chatglm_onnx.txt | 2 -- request_llms/requirements_moss.txt | 1 - requirements.txt | 1 + 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/request_llms/requirements_chatglm_onnx.txt b/request_llms/requirements_chatglm_onnx.txt index 54811472..2cd11f69 100644 --- a/request_llms/requirements_chatglm_onnx.txt +++ b/request_llms/requirements_chatglm_onnx.txt @@ -6,5 +6,3 @@ sentencepiece numpy onnxruntime sentencepiece -streamlit -streamlit-chat diff --git a/request_llms/requirements_moss.txt b/request_llms/requirements_moss.txt index c27907c2..544b25f0 100644 --- a/request_llms/requirements_moss.txt +++ b/request_llms/requirements_moss.txt @@ -5,5 +5,4 @@ accelerate matplotlib huggingface_hub triton -streamlit diff --git a/requirements.txt b/requirements.txt index a5782f77..e2534151 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ pypdf2==2.12.1 tiktoken>=0.3.3 requests[socks] pydantic==1.10.11 +protobuf==3.18 transformers>=4.27.1 scipdf_parser>=0.52 python-markdown-math From c181ad38b454ca3705fafd0ddbab348410e3cd7b Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:23:49 +0800 Subject: [PATCH 57/88] Update build-with-all-capacity-beta.yml --- .github/workflows/build-with-all-capacity-beta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-with-all-capacity-beta.yml b/.github/workflows/build-with-all-capacity-beta.yml index 1f02fed8..5a2a1a54 100644 --- a/.github/workflows/build-with-all-capacity-beta.yml +++ b/.github/workflows/build-with-all-capacity-beta.yml @@ -1,5 +1,5 @@ # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages -name: build-with-all-capacity +name: build-with-all-capacity-beta on: push: From f4127a9c9c4a7610c5fa6aa9233a5554a46f11e5 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 15 Dec 2023 12:52:21 +0800 Subject: [PATCH 58/88] change clip history policy --- crazy_functions/latex_fns/latex_actions.py | 2 +- toolbox.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index 113a2785..b43d7d2f 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -404,7 +404,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI if modified_pdf_success: - yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面 + yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍后 ...', chatbot, history) # 刷新Gradio前端界面 result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path if os.path.exists(pj(work_folder, '..', 'translation')): diff --git a/toolbox.py b/toolbox.py index 8d910351..bb4ec667 100644 --- a/toolbox.py +++ b/toolbox.py @@ -1007,14 +1007,19 @@ def clip_history(inputs, history, tokenizer, max_token_limit): def get_token_num(txt): return len(tokenizer.encode(txt, disallowed_special=())) input_token_num = get_token_num(inputs) + + if max_token_limit < 5000: output_token_expect = 256 # 4k & 2k models + elif max_token_limit < 9000: output_token_expect = 512 # 8k models + else: output_token_expect = 1024 # 16k & 32k models + if input_token_num < max_token_limit * 3 / 4: # 当输入部分的token占比小于限制的3/4时,裁剪时 # 1. 把input的余量留出来 max_token_limit = max_token_limit - input_token_num # 2. 把输出用的余量留出来 - max_token_limit = max_token_limit - 128 + max_token_limit = max_token_limit - output_token_expect # 3. 如果余量太小了,直接清除历史 - if max_token_limit < 128: + if max_token_limit < output_token_expect: history = [] return history else: From c5f1e4e39236091f22c98f36ed74f6b57b06a08a Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 15 Dec 2023 13:03:52 +0800 Subject: [PATCH 59/88] version 3.63 --- version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version b/version index cb4df5ae..680e6e2f 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.62, + "version": 3.63, "show_feature": true, - "new_feature": "修复若干隐蔽的内存BUG <-> 修复多用户冲突问题 <-> 接入Deepseek Coder <-> AutoGen多智能体插件测试版 <-> 修复本地模型在Windows下的加载BUG <-> 支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮" + "new_feature": "支持将图片粘贴到输入区 <-> 修复若干隐蔽的内存BUG <-> 修复多用户冲突问题 <-> 接入Deepseek Coder <-> AutoGen多智能体插件测试版" } From 36e19d520282506ab6c0b9dfcec300515b48146b Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 15 Dec 2023 13:16:06 +0800 Subject: [PATCH 60/88] compat further with one api --- request_llms/bridge_chatgpt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py index e55ad37a..0bdebf0d 100644 --- a/request_llms/bridge_chatgpt.py +++ b/request_llms/bridge_chatgpt.py @@ -51,7 +51,8 @@ def decode_chunk(chunk): chunkjson = json.loads(chunk_decoded[6:]) has_choices = 'choices' in chunkjson if has_choices: choice_valid = (len(chunkjson['choices']) > 0) - if has_choices and choice_valid: has_content = "content" in chunkjson['choices'][0]["delta"] + if has_choices and choice_valid: has_content = ("content" in chunkjson['choices'][0]["delta"]) + if has_content: has_content = (chunkjson['choices'][0]["delta"]["content"] is not None) if has_choices and choice_valid: has_role = "role" in chunkjson['choices'][0]["delta"] except: pass From d169fb4b16bafd6c80d5a22ba7b0cd61642fe6f0 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 15 Dec 2023 13:32:39 +0800 Subject: [PATCH 61/88] fix typo --- crazy_functions/latex_fns/latex_actions.py | 2 +- docs/translate_english.json | 2 +- request_llms/local_llm_class.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index b43d7d2f..b80c01d8 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -404,7 +404,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI if modified_pdf_success: - yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍后 ...', chatbot, history) # 刷新Gradio前端界面 + yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍候 ...', chatbot, history) # 刷新Gradio前端界面 result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path if os.path.exists(pj(work_folder, '..', 'translation')): diff --git a/docs/translate_english.json b/docs/translate_english.json index 02d3b640..3920e1f6 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -2863,7 +2863,7 @@ "加载API_KEY": "Loading API_KEY", "协助您编写代码": "Assist you in writing code", "我可以为您提供以下服务": "I can provide you with the following services", - "排队中请稍后 ...": "Please wait in line ...", + "排队中请稍候 ...": "Please wait in line ...", "建议您使用英文提示词": "It is recommended to use English prompts", "不能支撑AutoGen运行": "Cannot support AutoGen operation", "帮助您解决编程问题": "Help you solve programming problems", diff --git a/request_llms/local_llm_class.py b/request_llms/local_llm_class.py index 413df03f..ec7cfd21 100644 --- a/request_llms/local_llm_class.py +++ b/request_llms/local_llm_class.py @@ -183,11 +183,11 @@ class LocalLLMHandle(Process): def stream_chat(self, **kwargs): # ⭐run in main process if self.get_state() == "`准备就绪`": - yield "`正在等待线程锁,排队中请稍后 ...`" + yield "`正在等待线程锁,排队中请稍候 ...`" with self.threadLock: if self.parent.poll(): - yield "`排队中请稍后 ...`" + yield "`排队中请稍候 ...`" self.clear_pending_messages() self.parent.send(kwargs) std_out = "" From dc68e601a51abc7d70742daa98a54d6d8ab3d48d Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 15 Dec 2023 16:28:42 +0800 Subject: [PATCH 62/88] optimize audio plugin --- themes/common.js | 54 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/themes/common.js b/themes/common.js index a164a070..1e491fbe 100644 --- a/themes/common.js +++ b/themes/common.js @@ -129,14 +129,7 @@ function chatbotAutoHeight(){ }, 50); // 每100毫秒执行一次 } -function GptAcademicJavaScriptInit(LAYOUT = "LEFT-RIGHT") { - chatbotIndicator = gradioApp().querySelector('#gpt-chatbot > div.wrap'); - var chatbotObserver = new MutationObserver(() => { - chatbotContentChanged(1); - }); - chatbotObserver.observe(chatbotIndicator, { attributes: true, childList: true, subtree: true }); - if (LAYOUT === "LEFT-RIGHT") {chatbotAutoHeight();} -} + function get_elements(consider_state_panel=false) { var chatbot = document.querySelector('#gpt-chatbot > div.wrap.svelte-18telvq'); @@ -263,3 +256,48 @@ window.addEventListener("DOMContentLoaded", function () { // const ga = document.getElementsByTagName("gradio-app"); gradioApp().addEventListener("render", monitoring_input_box); }); + +function audio_fn_init() { + let audio_component = document.getElementById('elem_audio'); + if (audio_component){ + let buttonElement = audio_component.querySelector('button'); + let specificElement = audio_component.querySelector('.hide.sr-only'); + specificElement.remove(); + + buttonElement.childNodes[1].nodeValue = '启动麦克风'; + buttonElement.addEventListener('click', function(event) { + event.stopPropagation(); + toast_push('您启动了麦克风!下一步请点击“实时语音对话”启动语音对话。'); + }); + + // 查找语音插件按钮 + let buttons = document.querySelectorAll('button'); + let audio_button = null; + for(let button of buttons){ + if (button.textContent.includes('语音')){ + audio_button = button; + break; + } + } + if (audio_button){ + audio_button.addEventListener('click', function() { + toast_push('您点击了“实时语音对话”启动语音对话。'); + }); + let parent_element = audio_component.parentElement; // 将buttonElement移动到audio_button的内部 + audio_button.appendChild(audio_component); + parent_element.remove(); + audio_component.style.cssText = 'width: 250px;right: 0px;display: inline-flex;flex-flow: row-reverse wrap;place-content: stretch space-between;align-items: center;background-color: #ffffff00;'; + } + + } +} + +function GptAcademicJavaScriptInit(LAYOUT = "LEFT-RIGHT") { + audio_fn_init(); + chatbotIndicator = gradioApp().querySelector('#gpt-chatbot > div.wrap'); + var chatbotObserver = new MutationObserver(() => { + chatbotContentChanged(1); + }); + chatbotObserver.observe(chatbotIndicator, { attributes: true, childList: true, subtree: true }); + if (LAYOUT === "LEFT-RIGHT") {chatbotAutoHeight();} +} \ No newline at end of file From 49fe06ed69a73fc3d948244724f314e63ae3bc91 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 15 Dec 2023 21:12:39 +0800 Subject: [PATCH 63/88] add light edge for audio btn --- themes/common.js | 1 + 1 file changed, 1 insertion(+) diff --git a/themes/common.js b/themes/common.js index 1e491fbe..afa87141 100644 --- a/themes/common.js +++ b/themes/common.js @@ -285,6 +285,7 @@ function audio_fn_init() { }); let parent_element = audio_component.parentElement; // 将buttonElement移动到audio_button的内部 audio_button.appendChild(audio_component); + buttonElement.style.cssText = 'border-color: #00ffe0;border-width: 2px; height: 25px;' parent_element.remove(); audio_component.style.cssText = 'width: 250px;right: 0px;display: inline-flex;flex-flow: row-reverse wrap;place-content: stretch space-between;align-items: center;background-color: #ffffff00;'; } From 8d138210992089cf949e34fed04a1e8e274b91a9 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 15 Dec 2023 23:27:12 +0800 Subject: [PATCH 64/88] a lm-based story writing game --- crazy_functional.py | 26 +-- crazy_functions/game_fns/game_ascii_art.py | 42 ++++ .../game_fns/game_interactive_story.py | 212 ++++++++++++++++++ crazy_functions/互动小游戏.py | 59 ++--- 4 files changed, 287 insertions(+), 52 deletions(-) create mode 100644 crazy_functions/game_fns/game_ascii_art.py create mode 100644 crazy_functions/game_fns/game_interactive_story.py diff --git a/crazy_functional.py b/crazy_functional.py index 7c2bf791..4cc63040 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -590,19 +590,19 @@ def get_crazy_functions(): print(trimmed_format_exc()) print('Load function plugin failed') - # try: - # from crazy_functions.互动小游戏 import 随机小游戏 - # function_plugins.update({ - # "随机小游戏": { - # "Group": "智能体", - # "Color": "stop", - # "AsButton": True, - # "Function": HotReload(随机小游戏) - # } - # }) - # except: - # print(trimmed_format_exc()) - # print('Load function plugin failed') + try: + from crazy_functions.互动小游戏 import 随机小游戏 + function_plugins.update({ + "随机互动小游戏(仅供测试)": { + "Group": "智能体", + "Color": "stop", + "AsButton": False, + "Function": HotReload(随机小游戏) + } + }) + except: + print(trimmed_format_exc()) + print('Load function plugin failed') # try: # from crazy_functions.chatglm微调工具 import 微调数据集生成 diff --git a/crazy_functions/game_fns/game_ascii_art.py b/crazy_functions/game_fns/game_ascii_art.py new file mode 100644 index 00000000..e0b70087 --- /dev/null +++ b/crazy_functions/game_fns/game_ascii_art.py @@ -0,0 +1,42 @@ +from toolbox import CatchException, update_ui, update_ui_lastest_msg +from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState +from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from request_llms.bridge_all import predict_no_ui_long_connection +from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing +import random + + +class MiniGame_ASCII_Art(GptAcademicGameBaseState): + def step(self, prompt, chatbot, history): + if self.step_cnt == 0: + chatbot.append(["我画你猜(动物)", "请稍等..."]) + else: + if prompt.strip() == 'exit': + self.delete_game = True + yield from update_ui_lastest_msg(lastmsg=f"谜底是{self.obj},游戏结束。", chatbot=chatbot, history=history, delay=0.) + return + chatbot.append([prompt, ""]) + yield from update_ui(chatbot=chatbot, history=history) + + if self.step_cnt == 0: + self.lock_plugin(chatbot) + self.cur_task = 'draw' + + if self.cur_task == 'draw': + avail_obj = ["狗","猫","鸟","鱼","老鼠","蛇"] + self.obj = random.choice(avail_obj) + inputs = "I want to play a game called Guess the ASCII art. You can draw the ASCII art and I will try to guess it. " + \ + f"This time you draw a {self.obj}. Note that you must not indicate what you have draw in the text, and you should only produce the ASCII art wrapped by ```. " + raw_res = predict_no_ui_long_connection(inputs=inputs, llm_kwargs=self.llm_kwargs, history=[], sys_prompt="") + self.cur_task = 'identify user guess' + res = get_code_block(raw_res) + history += ['', f'the answer is {self.obj}', inputs, res] + yield from update_ui_lastest_msg(lastmsg=res, chatbot=chatbot, history=history, delay=0.) + + elif self.cur_task == 'identify user guess': + if is_same_thing(self.obj, prompt, self.llm_kwargs): + self.delete_game = True + yield from update_ui_lastest_msg(lastmsg="你猜对了!", chatbot=chatbot, history=history, delay=0.) + else: + self.cur_task = 'identify user guess' + yield from update_ui_lastest_msg(lastmsg="猜错了,再试试,输入“exit”获取答案。", chatbot=chatbot, history=history, delay=0.) \ No newline at end of file diff --git a/crazy_functions/game_fns/game_interactive_story.py b/crazy_functions/game_fns/game_interactive_story.py new file mode 100644 index 00000000..5c25f4a3 --- /dev/null +++ b/crazy_functions/game_fns/game_interactive_story.py @@ -0,0 +1,212 @@ +prompts_hs = """ 请以“{headstart}”为开头,编写一个小说的第一幕。 + +- 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。 +- 出现人物时,给出人物的名字。 +- 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。 +- 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。 +- 字数要求:第一幕的字数少于300字,且少于2个段落。 +""" + +prompts_interact = """ 小说的前文回顾: +「 +{previously_on_story} +」 + +你是一个作家,根据以上的情节,给出4种不同的后续剧情发展方向,每个发展方向都精明扼要地用一句话说明。稍后,我将在这4个选择中,挑选一种剧情发展。 + +输出格式例如: +1. 后续剧情发展1 +2. 后续剧情发展2 +3. 后续剧情发展3 +4. 后续剧情发展4 +""" + + +prompts_resume = """小说的前文回顾: +「 +{previously_on_story} +」 + +你是一个作家,我们正在互相讨论,确定后续剧情的发展。 +在以下的剧情发展中, +「 +{choice} +」 +我认为更合理的是:{user_choice}。 +请在前文的基础上(不要重复前文),围绕我选定的剧情情节,编写小说的下一幕。 + +- 禁止杜撰不符合我选择的剧情。 +- 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。 +- 不要重复前文。 +- 出现人物时,给出人物的名字。 +- 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。 +- 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。 +- 小说的下一幕字数少于300字,且少于2个段落。 +""" + + +prompts_terminate = """小说的前文回顾: +「 +{previously_on_story} +」 + +你是一个作家,我们正在互相讨论,确定后续剧情的发展。 +现在,故事该结束了,我认为最合理的故事结局是:{user_choice}。 + +请在前文的基础上(不要重复前文),编写小说的最后一幕。 + +- 不要重复前文。 +- 出现人物时,给出人物的名字。 +- 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。 +- 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。 +- 字数要求:最后一幕的字数少于1000字。 +""" + + +from toolbox import CatchException, update_ui, update_ui_lastest_msg +from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState +from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from request_llms.bridge_all import predict_no_ui_long_connection +from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing +import random + + +class MiniGame_ResumeStory(GptAcademicGameBaseState): + story_headstart = [ + '先行者知道,他现在是全宇宙中唯一的一个人了。', + '深夜,一个年轻人穿过天安门广场向纪念堂走去。在二十二世纪编年史中,计算机把他的代号定为M102。', + '他知道,这最后一课要提前讲了。又一阵剧痛从肝部袭来,几乎使他晕厥过去。', + '在距地球五万光年的远方,在银河系的中心,一场延续了两万年的星际战争已接近尾声。那里的太空中渐渐隐现出一个方形区域,仿佛灿烂的群星的背景被剪出一个方口。', + '伊依一行三人乘坐一艘游艇在南太平洋上做吟诗航行,他们的目的地是南极,如果几天后能顺利到达那里,他们将钻出地壳去看诗云。', + '很多人生来就会莫名其妙地迷上一样东西,仿佛他的出生就是要和这东西约会似的,正是这样,圆圆迷上了肥皂泡。' + ] + + + def begin_game_step_0(self, prompt, chatbot, history): + # init game at step 0 + self.headstart = random.choice(self.story_headstart) + self.story = [] + chatbot.append(["互动写故事", f"这次的故事开头是:{self.headstart}"]) + self.sys_prompt_ = '你是一个想象力丰富的杰出作家。正在与你的朋友互动,一起写故事,因此你每次写的故事段落应少于300字(结局除外)。' + + + def generate_story_image(self, story_paragraph): + try: + from crazy_functions.图片生成 import gen_image + prompt_ = predict_no_ui_long_connection(inputs=story_paragraph, llm_kwargs=self.llm_kwargs, history=[], sys_prompt='你需要根据用户给出的小说段落,进行简短的环境描写。要求:80字以内。') + image_url, image_path = gen_image(self.llm_kwargs, prompt_, '512x512', model="dall-e-2", quality='standard', style='natural') + return f'
' + except: + return '' + + def step(self, prompt, chatbot, history): + + """ + 首先,处理游戏初始化等特殊情况 + """ + if self.step_cnt == 0: + self.begin_game_step_0(prompt, chatbot, history) + self.lock_plugin(chatbot) + self.cur_task = 'head_start' + else: + if prompt.strip() == 'exit' or prompt.strip() == '结束剧情': + # should we terminate game here? + self.delete_game = True + yield from update_ui_lastest_msg(lastmsg=f"游戏结束。", chatbot=chatbot, history=history, delay=0.) + return + if '剧情收尾' in prompt: + self.cur_task = 'story_terminate' + # # well, game resumes + # chatbot.append([prompt, ""]) + # update ui, don't keep the user waiting + yield from update_ui(chatbot=chatbot, history=history) + + + """ + 处理游戏的主体逻辑 + """ + if self.cur_task == 'head_start': + """ + 这是游戏的第一步 + """ + inputs_ = prompts_hs.format(headstart=self.headstart) + history_ = [] + story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs_, '故事开头', self.llm_kwargs, + chatbot, history_, self.sys_prompt_ + ) + self.story.append(story_paragraph) + # # 配图 + yield from update_ui_lastest_msg(lastmsg=story_paragraph + '
正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.) + yield from update_ui_lastest_msg(lastmsg=story_paragraph + '
'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.) + + # # 构建后续剧情引导 + previously_on_story = "" + for s in self.story: + previously_on_story += s + '\n' + inputs_ = prompts_interact.format(previously_on_story=previously_on_story) + history_ = [] + self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs_, '请在以下几种故事走向中,选择一种(当然,您也可以选择给出其他故事走向):', self.llm_kwargs, + chatbot, + history_, + self.sys_prompt_ + ) + self.cur_task = 'user_choice' + + + elif self.cur_task == 'user_choice': + """ + 根据用户的提示,确定故事的下一步 + """ + if '请在以下几种故事走向中,选择一种' in chatbot[-1][0]: chatbot.pop(-1) + previously_on_story = "" + for s in self.story: + previously_on_story += s + '\n' + inputs_ = prompts_resume.format(previously_on_story=previously_on_story, choice=self.next_choices, user_choice=prompt) + history_ = [] + story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs_, f'下一段故事(您的选择是:{prompt})。', self.llm_kwargs, + chatbot, history_, self.sys_prompt_ + ) + self.story.append(story_paragraph) + # # 配图 + yield from update_ui_lastest_msg(lastmsg=story_paragraph + '
正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.) + yield from update_ui_lastest_msg(lastmsg=story_paragraph + '
'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.) + + # # 构建后续剧情引导 + previously_on_story = "" + for s in self.story: + previously_on_story += s + '\n' + inputs_ = prompts_interact.format(previously_on_story=previously_on_story) + history_ = [] + self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs_, + '请在以下几种故事走向中,选择一种。当然,您也可以给出您心中的其他故事走向。另外,如果您希望剧情立即收尾,请输入剧情走向,并以“剧情收尾”四个字提示程序。', self.llm_kwargs, + chatbot, + history_, + self.sys_prompt_ + ) + self.cur_task = 'user_choice' + + + elif self.cur_task == 'story_terminate': + """ + 根据用户的提示,确定故事的结局 + """ + previously_on_story = "" + for s in self.story: + previously_on_story += s + '\n' + inputs_ = prompts_terminate.format(previously_on_story=previously_on_story, user_choice=prompt) + history_ = [] + story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs_, f'故事收尾(您的选择是:{prompt})。', self.llm_kwargs, + chatbot, history_, self.sys_prompt_ + ) + # # 配图 + yield from update_ui_lastest_msg(lastmsg=story_paragraph + '
正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.) + yield from update_ui_lastest_msg(lastmsg=story_paragraph + '
'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.) + + # terminate game + self.delete_game = True + return diff --git a/crazy_functions/互动小游戏.py b/crazy_functions/互动小游戏.py index a6871b34..f3786c31 100644 --- a/crazy_functions/互动小游戏.py +++ b/crazy_functions/互动小游戏.py @@ -3,47 +3,28 @@ from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseSta from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from request_llms.bridge_all import predict_no_ui_long_connection from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing -import random - - -class MiniGame_ASCII_Art(GptAcademicGameBaseState): - - def step(self, prompt, chatbot, history): - if self.step_cnt == 0: - chatbot.append(["我画你猜(动物)", "请稍等..."]) - else: - if prompt.strip() == 'exit': - self.delete_game = True - yield from update_ui_lastest_msg(lastmsg=f"谜底是{self.obj},游戏结束。", chatbot=chatbot, history=history, delay=0.) - return - chatbot.append([prompt, ""]) - yield from update_ui(chatbot=chatbot, history=history) - - if self.step_cnt == 0: - self.lock_plugin(chatbot) - self.cur_task = 'draw' - - if self.cur_task == 'draw': - avail_obj = ["狗","猫","鸟","鱼","老鼠","蛇"] - self.obj = random.choice(avail_obj) - inputs = "I want to play a game called Guess the ASCII art. You can draw the ASCII art and I will try to guess it. " + f"This time you draw a {self.obj}. Note that you must not indicate what you have draw in the text, and you should only produce the ASCII art wrapped by ```. " - raw_res = predict_no_ui_long_connection(inputs=inputs, llm_kwargs=self.llm_kwargs, history=[], sys_prompt="") - self.cur_task = 'identify user guess' - res = get_code_block(raw_res) - history += ['', f'the answer is {self.obj}', inputs, res] - yield from update_ui_lastest_msg(lastmsg=res, chatbot=chatbot, history=history, delay=0.) - - elif self.cur_task == 'identify user guess': - if is_same_thing(self.obj, prompt, self.llm_kwargs): - self.delete_game = True - yield from update_ui_lastest_msg(lastmsg="你猜对了!", chatbot=chatbot, history=history, delay=0.) - else: - self.cur_task = 'identify user guess' - yield from update_ui_lastest_msg(lastmsg="猜错了,再试试,输入“exit”获取答案。", chatbot=chatbot, history=history, delay=0.) - @CatchException def 随机小游戏(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + from crazy_functions.game_fns.game_interactive_story import MiniGame_ResumeStory + # 清空历史 + history = [] + # 选择游戏 + cls = MiniGame_ResumeStory + # 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化 + state = cls.sync_state(chatbot, + llm_kwargs, + cls, + plugin_name='MiniGame_ResumeStory', + callback_fn='crazy_functions.互动小游戏->随机小游戏', + lock_plugin=True + ) + yield from state.continue_game(prompt, chatbot, history) + + +@CatchException +def 随机小游戏1(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + from crazy_functions.game_fns.game_ascii_art import MiniGame_ASCII_Art # 清空历史 history = [] # 选择游戏 @@ -53,7 +34,7 @@ def 随机小游戏(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_ llm_kwargs, cls, plugin_name='MiniGame_ASCII_Art', - callback_fn='crazy_functions.互动小游戏->随机小游戏', + callback_fn='crazy_functions.互动小游戏->随机小游戏1', lock_plugin=True ) yield from state.continue_game(prompt, chatbot, history) From 439147e4b77732a845b2ec61fac9bef1293d3cbb Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 17 Dec 2023 15:55:15 +0800 Subject: [PATCH 65/88] re-arrange main.py --- main.py | 106 ++++++++++++++-------------------------------- themes/cookies.py | 0 themes/theme.py | 96 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 125 insertions(+), 77 deletions(-) create mode 100644 themes/cookies.py diff --git a/main.py b/main.py index 53fb6889..578dd7ca 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,17 @@ import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染 -import pickle -import base64 + +help_menu_description = \ +"""Github源代码开源和更新[地址🚀](https://github.com/binary-husky/gpt_academic), +感谢热情的[开发者们❤️](https://github.com/binary-husky/gpt_academic/graphs/contributors). +

常见问题请查阅[项目Wiki](https://github.com/binary-husky/gpt_academic/wiki), +如遇到Bug请前往[Bug反馈](https://github.com/binary-husky/gpt_academic/issues). +

普通对话使用说明: 1. 输入问题; 2. 点击提交 +

基础功能区使用说明: 1. 输入文本; 2. 点击任意基础功能区按钮 +

函数插件区使用说明: 1. 输入路径/问题, 或者上传文件; 2. 点击任意函数插件区按钮 +

虚空终端使用说明: 点击虚空终端, 然后根据提示输入指令, 再次点击虚空终端 +

如何保存对话: 点击保存当前的对话按钮 +

如何语音对话: 请阅读Wiki +

如何临时更换API_KEY: 在输入区输入临时API_KEY后提交(网页刷新后失效)""" def main(): import gradio as gr @@ -8,7 +19,7 @@ def main(): raise ModuleNotFoundError("使用项目内置Gradio获取最优体验! 请运行 `pip install -r requirements.txt` 指令安装内置Gradio及其他依赖, 详情信息见requirements.txt.") from request_llms.bridge_all import predict from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith - # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 + # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址 proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION') CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = get_conf('CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT') ENABLE_AUDIO, AUTO_CLEAR_TXT, PATH_LOGGING, AVAIL_THEMES, THEME = get_conf('ENABLE_AUDIO', 'AUTO_CLEAR_TXT', 'PATH_LOGGING', 'AVAIL_THEMES', 'THEME') @@ -18,21 +29,11 @@ def main(): # 如果WEB_PORT是-1, 则随机选取WEB端口 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT from check_proxy import get_current_version - from themes.theme import adjust_theme, advanced_css, theme_declaration, load_dynamic_theme - + from themes.theme import adjust_theme, advanced_css, theme_declaration + from themes.theme import js_code_for_css_changing, js_code_for_darkmode_init, js_code_for_toggle_darkmode, js_code_for_persistent_cookie_init + from themes.theme import load_dynamic_theme, to_cookie_str, from_cookie_str, init_cookie title_html = f"

GPT 学术优化 {get_current_version()}

{theme_declaration}" - description = "Github源代码开源和更新[地址🚀](https://github.com/binary-husky/gpt_academic), " - description += "感谢热情的[开发者们❤️](https://github.com/binary-husky/gpt_academic/graphs/contributors)." - description += "

常见问题请查阅[项目Wiki](https://github.com/binary-husky/gpt_academic/wiki), " - description += "如遇到Bug请前往[Bug反馈](https://github.com/binary-husky/gpt_academic/issues)." - description += "

普通对话使用说明: 1. 输入问题; 2. 点击提交" - description += "

基础功能区使用说明: 1. 输入文本; 2. 点击任意基础功能区按钮" - description += "

函数插件区使用说明: 1. 输入路径/问题, 或者上传文件; 2. 点击任意函数插件区按钮" - description += "

虚空终端使用说明: 点击虚空终端, 然后根据提示输入指令, 再次点击虚空终端" - description += "

如何保存对话: 点击保存当前的对话按钮" - description += "

如何语音对话: 请阅读Wiki" - description += "

如何临时更换API_KEY: 在输入区输入临时API_KEY后提交(网页刷新后失效)" - + # 问询记录, python 版本建议3.9+(越新越好) import logging, uuid os.makedirs(PATH_LOGGING, exist_ok=True) @@ -162,16 +163,10 @@ def main(): checkboxes_2 = gr.CheckboxGroup(["自定义菜单"], value=[], label="显示/隐藏自定义菜单", elem_id='cbs').style(container=False) dark_mode_btn = gr.Button("切换界面明暗 ☀", variant="secondary").style(size="sm") - dark_mode_btn.click(None, None, None, _js="""() => { - if (document.querySelectorAll('.dark').length) { - document.querySelectorAll('.dark').forEach(el => el.classList.remove('dark')); - } else { - document.querySelector('body').classList.add('dark'); - } - }""", + dark_mode_btn.click(None, None, None, _js=js_code_for_toggle_darkmode, ) with gr.Tab("帮助", elem_id="interact-panel"): - gr.Markdown(description) + gr.Markdown(help_menu_description) with gr.Floating(init_x="20%", init_y="50%", visible=False, width="40%", drag="top") as area_input_secondary: with gr.Accordion("浮动输入区", open=True, elem_id="input-panel2"): @@ -186,16 +181,6 @@ def main(): stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm") clearBtn2 = gr.Button("清除", variant="secondary", visible=False); clearBtn2.style(size="sm") - def to_cookie_str(d): - # Pickle the dictionary and encode it as a string - pickled_dict = pickle.dumps(d) - cookie_value = base64.b64encode(pickled_dict).decode('utf-8') - return cookie_value - - def from_cookie_str(c): - # Decode the base64-encoded string and unpickle it into a dictionary - pickled_dict = base64.b64decode(c.encode('utf-8')) - return pickle.loads(pickled_dict) with gr.Floating(init_x="20%", init_y="50%", visible=False, width="40%", drag="top") as area_customize: with gr.Accordion("自定义菜单", open=True, elem_id="edit-panel"): @@ -227,11 +212,11 @@ def main(): else: ret.update({predefined_btns[basic_btn_dropdown_]: gr.update(visible=True, value=basic_fn_title)}) ret.update({cookies: cookies_}) - try: persistent_cookie_ = from_cookie_str(persistent_cookie_) # persistent cookie to dict + try: persistent_cookie_ = from_cookie_str(persistent_cookie_) # persistent cookie to dict except: persistent_cookie_ = {} - persistent_cookie_["custom_bnt"] = customize_fn_overwrite_ # dict update new value - persistent_cookie_ = to_cookie_str(persistent_cookie_) # persistent cookie to dict - ret.update({persistent_cookie: persistent_cookie_}) # write persistent cookie + persistent_cookie_["custom_bnt"] = customize_fn_overwrite_ # dict update new value + persistent_cookie_ = to_cookie_str(persistent_cookie_) # persistent cookie to dict + ret.update({persistent_cookie: persistent_cookie_}) # write persistent cookie return ret def reflesh_btn(persistent_cookie_, cookies_): @@ -252,10 +237,11 @@ def main(): else: ret.update({predefined_btns[k]: gr.update(visible=True, value=v['Title'])}) return ret - basic_fn_load.click(reflesh_btn, [persistent_cookie, cookies],[cookies, *customize_btns.values(), *predefined_btns.values()]) + basic_fn_load.click(reflesh_btn, [persistent_cookie, cookies], [cookies, *customize_btns.values(), *predefined_btns.values()]) h = basic_fn_confirm.click(assign_btn, [persistent_cookie, cookies, basic_btn_dropdown, basic_fn_title, basic_fn_prefix, basic_fn_suffix], [persistent_cookie, cookies, *customize_btns.values(), *predefined_btns.values()]) - h.then(None, [persistent_cookie], None, _js="""(persistent_cookie)=>{setCookie("persistent_cookie", persistent_cookie, 5);}""") # save persistent cookie + # save persistent cookie + h.then(None, [persistent_cookie], None, _js="""(persistent_cookie)=>{setCookie("persistent_cookie", persistent_cookie, 5);}""") # 功能区显示开关与功能区的互动 def fn_area_visibility(a): @@ -342,18 +328,7 @@ def main(): None, [secret_css], None, - _js="""(css) => { - var existingStyles = document.querySelectorAll("style[data-loaded-css]"); - for (var i = 0; i < existingStyles.length; i++) { - var style = existingStyles[i]; - style.parentNode.removeChild(style); - } - var styleElement = document.createElement('style'); - styleElement.setAttribute('data-loaded-css', css); - styleElement.innerHTML = css; - document.head.appendChild(styleElement); - } - """ + _js=js_code_for_css_changing ) # 随变按钮的回调函数注册 def route(request: gr.Request, k, *args, **kwargs): @@ -385,27 +360,10 @@ def main(): rad.feed(cookies['uuid'].hex, audio) audio_mic.stream(deal_audio, inputs=[audio_mic, cookies]) - def init_cookie(cookies, chatbot): - # 为每一位访问的用户赋予一个独一无二的uuid编码 - cookies.update({'uuid': uuid.uuid4()}) - return cookies + demo.load(init_cookie, inputs=[cookies, chatbot], outputs=[cookies]) - darkmode_js = """(dark) => { - dark = dark == "True"; - if (document.querySelectorAll('.dark').length) { - if (!dark){ - document.querySelectorAll('.dark').forEach(el => el.classList.remove('dark')); - } - } else { - if (dark){ - document.querySelector('body').classList.add('dark'); - } - } - }""" - load_cookie_js = """(persistent_cookie) => { - return getCookie("persistent_cookie"); - }""" - demo.load(None, inputs=None, outputs=[persistent_cookie], _js=load_cookie_js) + darkmode_js = js_code_for_darkmode_init + demo.load(None, inputs=None, outputs=[persistent_cookie], _js=js_code_for_persistent_cookie_init) demo.load(None, inputs=[dark_mode], outputs=None, _js=darkmode_js) # 配置暗色主题或亮色主题 demo.load(None, inputs=[gr.Textbox(LAYOUT, visible=False)], outputs=None, _js='(LAYOUT)=>{GptAcademicJavaScriptInit(LAYOUT);}') @@ -418,7 +376,7 @@ def main(): def auto_updates(): time.sleep(0); auto_update() def open_browser(): time.sleep(2); webbrowser.open_new_tab(f"http://localhost:{PORT}") - def warm_up_mods(): time.sleep(4); warm_up_modules() + def warm_up_mods(): time.sleep(6); warm_up_modules() threading.Thread(target=auto_updates, name="self-upgrade", daemon=True).start() # 查看自动更新 threading.Thread(target=open_browser, name="open-browser", daemon=True).start() # 打开浏览器页面 diff --git a/themes/cookies.py b/themes/cookies.py new file mode 100644 index 00000000..e69de29b diff --git a/themes/theme.py b/themes/theme.py index f59db9f8..ca6ab3a1 100644 --- a/themes/theme.py +++ b/themes/theme.py @@ -1,6 +1,14 @@ -import gradio as gr +import pickle +import base64 +import uuid from toolbox import get_conf -THEME = get_conf('THEME') + +""" +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +第 1 部分 +加载主题相关的工具函数 +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +""" def load_dynamic_theme(THEME): adjust_dynamic_theme = None @@ -20,4 +28,86 @@ def load_dynamic_theme(THEME): theme_declaration = "" return adjust_theme, advanced_css, theme_declaration, adjust_dynamic_theme -adjust_theme, advanced_css, theme_declaration, _ = load_dynamic_theme(THEME) \ No newline at end of file +adjust_theme, advanced_css, theme_declaration, _ = load_dynamic_theme(get_conf('THEME')) + + + + + + +""" +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +第 2 部分 +cookie相关工具函数 +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +""" + +def init_cookie(cookies, chatbot): + # 为每一位访问的用户赋予一个独一无二的uuid编码 + cookies.update({'uuid': uuid.uuid4()}) + return cookies + +def to_cookie_str(d): + # Pickle the dictionary and encode it as a string + pickled_dict = pickle.dumps(d) + cookie_value = base64.b64encode(pickled_dict).decode('utf-8') + return cookie_value + +def from_cookie_str(c): + # Decode the base64-encoded string and unpickle it into a dictionary + pickled_dict = base64.b64decode(c.encode('utf-8')) + return pickle.loads(pickled_dict) + + + + + +""" +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +第 3 部分 +内嵌的javascript代码 +-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +""" + +js_code_for_css_changing = """(css) => { + var existingStyles = document.querySelectorAll("style[data-loaded-css]"); + for (var i = 0; i < existingStyles.length; i++) { + var style = existingStyles[i]; + style.parentNode.removeChild(style); + } + var styleElement = document.createElement('style'); + styleElement.setAttribute('data-loaded-css', css); + styleElement.innerHTML = css; + document.head.appendChild(styleElement); +} +""" + +js_code_for_darkmode_init = """(dark) => { + dark = dark == "True"; + if (document.querySelectorAll('.dark').length) { + if (!dark){ + document.querySelectorAll('.dark').forEach(el => el.classList.remove('dark')); + } + } else { + if (dark){ + document.querySelector('body').classList.add('dark'); + } + } +} +""" + +js_code_for_toggle_darkmode = """() => { + if (document.querySelectorAll('.dark').length) { + document.querySelectorAll('.dark').forEach(el => el.classList.remove('dark')); + } else { + document.querySelector('body').classList.add('dark'); + } +}""" + + +js_code_for_persistent_cookie_init = """(persistent_cookie) => { + return getCookie("persistent_cookie"); +} +""" + + From 6e9936531df445050e058cca7ec6f3b7c10b68cd Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 17 Dec 2023 19:45:37 +0800 Subject: [PATCH 66/88] fix theme shifting bug --- themes/theme.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/themes/theme.py b/themes/theme.py index ca6ab3a1..5664f737 100644 --- a/themes/theme.py +++ b/themes/theme.py @@ -70,15 +70,20 @@ def from_cookie_str(c): """ js_code_for_css_changing = """(css) => { + var existingStyles = document.querySelectorAll("body > gradio-app > div > style") + for (var i = 0; i < existingStyles.length; i++) { + var style = existingStyles[i]; + style.parentNode.removeChild(style); + } var existingStyles = document.querySelectorAll("style[data-loaded-css]"); for (var i = 0; i < existingStyles.length; i++) { var style = existingStyles[i]; style.parentNode.removeChild(style); } var styleElement = document.createElement('style'); - styleElement.setAttribute('data-loaded-css', css); + styleElement.setAttribute('data-loaded-css', 'placeholder'); styleElement.innerHTML = css; - document.head.appendChild(styleElement); + document.body.appendChild(styleElement); } """ From 3c271302cc4b1e06dc1fa8e11ba3bcabce92b018 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 19 Dec 2023 19:30:44 +0800 Subject: [PATCH 67/88] improve long text breakdown perfomance --- crazy_functions/Latex全文润色.py | 4 +- crazy_functions/Latex全文翻译.py | 4 +- crazy_functions/crazy_utils.py | 90 --------------- crazy_functions/ipc_fns/mp.py | 37 ++++++ crazy_functions/latex_fns/latex_actions.py | 11 +- crazy_functions/pdf_fns/breakdown_txt.py | 125 +++++++++++++++++++++ crazy_functions/pdf_fns/parse_pdf.py | 4 +- crazy_functions/总结word文档.py | 8 +- crazy_functions/批量Markdown翻译.py | 4 +- crazy_functions/批量总结PDF文档.py | 11 +- crazy_functions/批量翻译PDF文档_多线程.py | 11 +- crazy_functions/理解PDF文档内容.py | 13 +-- crazy_functions/解析JupyterNotebook.py | 12 +- 13 files changed, 186 insertions(+), 148 deletions(-) create mode 100644 crazy_functions/ipc_fns/mp.py create mode 100644 crazy_functions/pdf_fns/breakdown_txt.py diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py index 0bc7d401..b736fe89 100644 --- a/crazy_functions/Latex全文润色.py +++ b/crazy_functions/Latex全文润色.py @@ -26,8 +26,8 @@ class PaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) diff --git a/crazy_functions/Latex全文翻译.py b/crazy_functions/Latex全文翻译.py index 846bd80d..49470c86 100644 --- a/crazy_functions/Latex全文翻译.py +++ b/crazy_functions/Latex全文翻译.py @@ -26,8 +26,8 @@ class PaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 9778053a..731da1ac 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -312,95 +312,6 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( return gpt_response_collection -def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit): - def cut(txt_tocut, must_break_at_empty_line): # 递归 - if get_token_fn(txt_tocut) <= limit: - return [txt_tocut] - else: - lines = txt_tocut.split('\n') - estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) - estimated_line_cut = int(estimated_line_cut) - for cnt in reversed(range(estimated_line_cut)): - if must_break_at_empty_line: - if lines[cnt] != "": - continue - print(cnt) - prev = "\n".join(lines[:cnt]) - post = "\n".join(lines[cnt:]) - if get_token_fn(prev) < limit: - break - if cnt == 0: - raise RuntimeError("存在一行极长的文本!") - # print(len(post)) - # 列表递归接龙 - result = [prev] - result.extend(cut(post, must_break_at_empty_line)) - return result - try: - return cut(txt, must_break_at_empty_line=True) - except RuntimeError: - return cut(txt, must_break_at_empty_line=False) - - -def force_breakdown(txt, limit, get_token_fn): - """ - 当无法用标点、空行分割时,我们用最暴力的方法切割 - """ - for i in reversed(range(len(txt))): - if get_token_fn(txt[:i]) < limit: - return txt[:i], txt[i:] - return "Tiktoken未知错误", "Tiktoken未知错误" - -def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit): - # 递归 - def cut(txt_tocut, must_break_at_empty_line, break_anyway=False): - if get_token_fn(txt_tocut) <= limit: - return [txt_tocut] - else: - lines = txt_tocut.split('\n') - estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) - estimated_line_cut = int(estimated_line_cut) - cnt = 0 - for cnt in reversed(range(estimated_line_cut)): - if must_break_at_empty_line: - if lines[cnt] != "": - continue - prev = "\n".join(lines[:cnt]) - post = "\n".join(lines[cnt:]) - if get_token_fn(prev) < limit: - break - if cnt == 0: - if break_anyway: - prev, post = force_breakdown(txt_tocut, limit, get_token_fn) - else: - raise RuntimeError(f"存在一行极长的文本!{txt_tocut}") - # print(len(post)) - # 列表递归接龙 - result = [prev] - result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway)) - return result - try: - # 第1次尝试,将双空行(\n\n)作为切分点 - return cut(txt, must_break_at_empty_line=True) - except RuntimeError: - try: - # 第2次尝试,将单空行(\n)作为切分点 - return cut(txt, must_break_at_empty_line=False) - except RuntimeError: - try: - # 第3次尝试,将英文句号(.)作为切分点 - res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在 - return [r.replace('。\n', '.') for r in res] - except RuntimeError as e: - try: - # 第4次尝试,将中文句号(。)作为切分点 - res = cut(txt.replace('。', '。。\n'), must_break_at_empty_line=False) - return [r.replace('。。\n', '。') for r in res] - except RuntimeError as e: - # 第5次尝试,没办法了,随便切一下敷衍吧 - return cut(txt, must_break_at_empty_line=False, break_anyway=True) - - def read_and_clean_pdf_text(fp): """ @@ -631,7 +542,6 @@ def get_files_from_everything(txt, type): # type='.md' - @Singleton class nougat_interface(): def __init__(self): diff --git a/crazy_functions/ipc_fns/mp.py b/crazy_functions/ipc_fns/mp.py new file mode 100644 index 00000000..575d47cc --- /dev/null +++ b/crazy_functions/ipc_fns/mp.py @@ -0,0 +1,37 @@ +import platform +import pickle +import multiprocessing + +def run_in_subprocess_wrapper_func(v_args): + func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args) + import sys + try: + result = func(*args, **kwargs) + return_dict['result'] = result + except Exception as e: + exc_info = sys.exc_info() + exception_dict['exception'] = exc_info + +def run_in_subprocess_with_timeout(func, timeout=60): + if platform.system() == 'Linux': + def wrapper(*args, **kwargs): + return_dict = multiprocessing.Manager().dict() + exception_dict = multiprocessing.Manager().dict() + v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict)) + process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,)) + process.start() + process.join(timeout) + if process.is_alive(): + process.terminate() + raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务') + process.close() + if 'exception' in exception_dict: + # ooops, the subprocess ran into an exception + exc_info = exception_dict['exception'] + raise exc_info[1].with_traceback(exc_info[2]) + if 'result' in return_dict.keys(): + # If the subprocess ran successfully, return the result + return return_dict['result'] + return wrapper + else: + return func \ No newline at end of file diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index b80c01d8..6638c12b 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -176,12 +176,6 @@ class LatexPaperFileGroup(): self.sp_file_index = [] self.sp_file_tag = [] - # count_token - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) - self.get_token_num = get_token_num - def run_file_split(self, max_token_limit=1900): """ use tokenizer to break down text according to max_token_limit @@ -192,13 +186,12 @@ class LatexPaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex") - print('Segmentation: done') def merge_result(self): self.file_result = ["" for _ in range(len(self.file_paths))] diff --git a/crazy_functions/pdf_fns/breakdown_txt.py b/crazy_functions/pdf_fns/breakdown_txt.py new file mode 100644 index 00000000..1db86964 --- /dev/null +++ b/crazy_functions/pdf_fns/breakdown_txt.py @@ -0,0 +1,125 @@ +from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout + +def force_breakdown(txt, limit, get_token_fn): + """ 当无法用标点、空行分割时,我们用最暴力的方法切割 + """ + for i in reversed(range(len(txt))): + if get_token_fn(txt[:i]) < limit: + return txt[:i], txt[i:] + return "Tiktoken未知错误", "Tiktoken未知错误" + + +def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage): + """ 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage + 当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出 + """ + _min = int(5e4) + _max = int(1e5) + # print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage)) + if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0: + remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage + remain_txt_to_cut_storage = "" + if len(remain_txt_to_cut) > _max: + remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage + remain_txt_to_cut = remain_txt_to_cut[:_max] + return remain_txt_to_cut, remain_txt_to_cut_storage + + +def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False): + """ 文本切分 + """ + res = [] + total_len = len(txt_tocut) + fin_len = 0 + remain_txt_to_cut = txt_tocut + remain_txt_to_cut_storage = "" + # 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage + remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage) + + while True: + if get_token_fn(remain_txt_to_cut) <= limit: + # 如果剩余文本的token数小于限制,那么就不用切了 + res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut) + break + else: + # 如果剩余文本的token数大于限制,那么就切 + lines = remain_txt_to_cut.split('\n') + + # 估计一个切分点 + estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines) + estimated_line_cut = int(estimated_line_cut) + + # 开始查找合适切分点的偏移(cnt) + cnt = 0 + for cnt in reversed(range(estimated_line_cut)): + if must_break_at_empty_line: + # 首先尝试用双空行(\n\n)作为切分点 + if lines[cnt] != "": + continue + prev = "\n".join(lines[:cnt]) + post = "\n".join(lines[cnt:]) + if get_token_fn(prev) < limit: + break + + if cnt == 0: + # 如果没有找到合适的切分点 + if break_anyway: + # 是否允许暴力切分 + prev, post = force_breakdown(txt_tocut, limit, get_token_fn) + else: + # 不允许直接报错 + raise RuntimeError(f"存在一行极长的文本!{txt_tocut}") + + # 追加列表 + res.append(prev); fin_len+=len(prev) + # 准备下一次迭代 + remain_txt_to_cut = post + remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage) + process = fin_len/total_len + print(f'\r正在文本切分 {int(process*100)}%', end='') + if len(remain_txt_to_cut.strip()) == 0: + break + return res + + +def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"): + """ 使用多种方式尝试切分文本,以满足 token 限制 + """ + from request_llms.bridge_all import model_info + enc = model_info[llm_model]['tokenizer'] + def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=())) + try: + # 第1次尝试,将双空行(\n\n)作为切分点 + return cut(limit, get_token_fn, txt, must_break_at_empty_line=True) + except RuntimeError: + try: + # 第2次尝试,将单空行(\n)作为切分点 + return cut(limit, get_token_fn, txt, must_break_at_empty_line=False) + except RuntimeError: + try: + # 第3次尝试,将英文句号(.)作为切分点 + res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在 + return [r.replace('。\n', '.') for r in res] + except RuntimeError as e: + try: + # 第4次尝试,将中文句号(。)作为切分点 + res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False) + return [r.replace('。。\n', '。') for r in res] + except RuntimeError as e: + # 第5次尝试,没办法了,随便切一下吧 + return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True) + +breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60) + +if __name__ == '__main__': + from crazy_functions.crazy_utils import read_and_clean_pdf_text + file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf") + + from request_llms.bridge_all import model_info + for i in range(5): + file_content += file_content + + print(len(file_content)) + TOKEN_LIMIT_PER_FRAGMENT = 2500 + res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT) + diff --git a/crazy_functions/pdf_fns/parse_pdf.py b/crazy_functions/pdf_fns/parse_pdf.py index 51f8811f..fa27de51 100644 --- a/crazy_functions/pdf_fns/parse_pdf.py +++ b/crazy_functions/pdf_fns/parse_pdf.py @@ -74,7 +74,7 @@ def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chat def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG): from crazy_functions.pdf_fns.report_gen_html import construct_html - from crazy_functions.crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency @@ -116,7 +116,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi # find a smooth token limit to achieve even seperation count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT)) token_limit_smooth = raw_token_num // count + count - return breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn=get_token_num, limit=token_limit_smooth) + return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model']) for section in article_dict.get('sections'): if len(section['text']) == 0: continue diff --git a/crazy_functions/总结word文档.py b/crazy_functions/总结word文档.py index b3923071..6dfe217f 100644 --- a/crazy_functions/总结word文档.py +++ b/crazy_functions/总结word文档.py @@ -31,15 +31,11 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot print(file_content) # private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit from request_llms.bridge_all import model_info max_token = model_info[llm_kwargs['llm_model']]['max_token'] TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4 - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, - get_token_fn=model_info[llm_kwargs['llm_model']]['token_cnt'], - limit=TOKEN_LIMIT_PER_FRAGMENT - ) + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) this_paper_history = [] for i, paper_frag in enumerate(paper_fragments): i_say = f'请对下面的文章片段用中文做概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{paper_frag}```' diff --git a/crazy_functions/批量Markdown翻译.py b/crazy_functions/批量Markdown翻译.py index 12b4ef09..8665d6df 100644 --- a/crazy_functions/批量Markdown翻译.py +++ b/crazy_functions/批量Markdown翻译.py @@ -28,8 +28,8 @@ class PaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py index 7fc3e415..e289c47b 100644 --- a/crazy_functions/批量总结PDF文档.py +++ b/crazy_functions/批量总结PDF文档.py @@ -20,14 +20,9 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, TOKEN_LIMIT_PER_FRAGMENT = 2500 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) - page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) + page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model']) # 为了更好的效果,我们剥离Introduction之后的部分(如果有) paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 73cf5920..a1f0f312 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -91,14 +91,9 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars # 递归地切割PDF文件 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) - page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=page_one, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) + page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=page_one, limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model']) # 为了更好的效果,我们剥离Introduction之后的部分(如果有) paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py index ef967889..439d78ea 100644 --- a/crazy_functions/理解PDF文档内容.py +++ b/crazy_functions/理解PDF文档内容.py @@ -18,14 +18,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro TOKEN_LIMIT_PER_FRAGMENT = 2500 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) - page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) + page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model']) # 为了更好的效果,我们剥离Introduction之后的部分(如果有) paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] @@ -45,7 +40,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro for i in range(n_fragment): NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}" - i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]}" + i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]} ...." gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, # i_say=真正给chatgpt的提问, i_say_show_user=给用户看的提问 llm_kwargs, chatbot, history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果 diff --git a/crazy_functions/解析JupyterNotebook.py b/crazy_functions/解析JupyterNotebook.py index eeccadf7..3c2b5783 100644 --- a/crazy_functions/解析JupyterNotebook.py +++ b/crazy_functions/解析JupyterNotebook.py @@ -12,13 +12,6 @@ class PaperFileGroup(): self.sp_file_index = [] self.sp_file_tag = [] - # count_token - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len( - enc.encode(txt, disallowed_special=())) - self.get_token_num = get_token_num - def run_file_split(self, max_token_limit=1900): """ 将长文本分离开来 @@ -29,9 +22,8 @@ class PaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf( - file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) From 9479dd984c3ff07bfe0cf963be220299607fbad7 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 19 Dec 2023 19:43:03 +0800 Subject: [PATCH 68/88] avoid adding the same file multiple times to the chatbot's files_to_promote list --- crazy_functions/pdf_fns/breakdown_txt.py | 2 +- crazy_functions/总结word文档.py | 1 - toolbox.py | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crazy_functions/pdf_fns/breakdown_txt.py b/crazy_functions/pdf_fns/breakdown_txt.py index 1db86964..a9614814 100644 --- a/crazy_functions/pdf_fns/breakdown_txt.py +++ b/crazy_functions/pdf_fns/breakdown_txt.py @@ -76,7 +76,7 @@ def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=F remain_txt_to_cut = post remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage) process = fin_len/total_len - print(f'\r正在文本切分 {int(process*100)}%', end='') + print(f'正在文本切分 {int(process*100)}%') if len(remain_txt_to_cut.strip()) == 0: break return res diff --git a/crazy_functions/总结word文档.py b/crazy_functions/总结word文档.py index 6dfe217f..01ee1e6b 100644 --- a/crazy_functions/总结word文档.py +++ b/crazy_functions/总结word文档.py @@ -29,7 +29,6 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot except: raise RuntimeError('请先将.doc文档转换为.docx文档。') - print(file_content) # private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名 from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit from request_llms.bridge_all import model_info diff --git a/toolbox.py b/toolbox.py index bb4ec667..e44d61e5 100644 --- a/toolbox.py +++ b/toolbox.py @@ -583,7 +583,8 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): if chatbot is not None: if 'files_to_promote' in chatbot._cookies: current = chatbot._cookies['files_to_promote'] else: current = [] - chatbot._cookies.update({'files_to_promote': [new_path] + current}) + if new_path not in current: # 避免把同一个文件添加多次 + chatbot._cookies.update({'files_to_promote': [new_path] + current}) return new_path From ac3d4cf073e10fdb854e6daf163af33ec0de1490 Mon Sep 17 00:00:00 2001 From: leike0813 Date: Wed, 20 Dec 2023 07:37:26 +0800 Subject: [PATCH 69/88] Add support to aliyun qwen online models. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename model tag "qwen" to "qwen-local" Add model tag "qwen-turbo", "qwen-plus", "qwen-max" Add corresponding model interfaces in request_llms/bridge_all.py Add configuration variable “DASHSCOPE_API_KEY" Rename request_llms/bridge_qwen.py to bridge_qwen_local.py to distinguish it from the online model interface --- config.py | 16 +++- docs/translate_english.json | 2 +- request_llms/bridge_all.py | 38 +++++++- request_llms/bridge_qwen.py | 107 ++++++++++++----------- request_llms/bridge_qwen_local.py | 59 +++++++++++++ request_llms/com_qwenapi.py | 85 ++++++++++++++++++ request_llms/requirements_qwen.txt | 5 +- request_llms/requirements_qwen_local.txt | 4 + tests/test_llms.py | 2 +- 9 files changed, 255 insertions(+), 63 deletions(-) create mode 100644 request_llms/bridge_qwen_local.py create mode 100644 request_llms/com_qwenapi.py create mode 100644 request_llms/requirements_qwen_local.txt diff --git a/config.py b/config.py index 3d809628..17dac34a 100644 --- a/config.py +++ b/config.py @@ -92,8 +92,9 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo-1106","gpt-4-1106-preview","gpt-4-vision-prev "api2d-gpt-3.5-turbo", 'api2d-gpt-3.5-turbo-16k', "gpt-4", "gpt-4-32k", "azure-gpt-4", "api2d-gpt-4", "chatglm3", "moss", "claude-2"] -# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" -# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama"] +# P.S. 其他可用的模型还包括 ["zhipuai", "qianfan", "deepseekcoder", "llama2", "qwen-local", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-random" +# "spark", "sparkv2", "sparkv3", "chatglm_onnx", "claude-1-100k", "claude-2", "internlm", "jittorllms_pangualpha", "jittorllms_llama" +# “qwen-turbo", "qwen-plus", "qwen-max"] # 定义界面上“询问多个GPT模型”插件应该使用哪些模型,请从AVAIL_LLM_MODELS中选择,并在不同模型之间用`&`间隔,例如"gpt-3.5-turbo&chatglm3&azure-gpt-4" @@ -103,7 +104,11 @@ MULTI_QUERY_LLM_MODELS = "gpt-3.5-turbo&chatglm3" # 选择本地模型变体(只有当AVAIL_LLM_MODELS包含了对应本地模型时,才会起作用) # 如果你选择Qwen系列的模型,那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型 # 也可以是具体的模型路径 -QWEN_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8" +QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8" + + +# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/ +DASHSCOPE_API_KEY = "此处填阿里灵积云API秘钥" # 阿里灵积云API_KEY # 百度千帆(LLM_MODEL="qianfan") @@ -284,6 +289,9 @@ NUM_CUSTOM_BASIC_BTN = 4 │ ├── ZHIPUAI_API_KEY │ └── ZHIPUAI_MODEL │ +├── "qwen-turbo" 等通义千问大模型 +│ └── DASHSCOPE_API_KEY +│ └── "newbing" Newbing接口不再稳定,不推荐使用 ├── NEWBING_STYLE └── NEWBING_COOKIES @@ -300,7 +308,7 @@ NUM_CUSTOM_BASIC_BTN = 4 ├── "jittorllms_pangualpha" ├── "jittorllms_llama" ├── "deepseekcoder" -├── "qwen" +├── "qwen-local" ├── RWKV的支持见Wiki └── "llama2" diff --git a/docs/translate_english.json b/docs/translate_english.json index 3920e1f6..c48ec6bc 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -2932,7 +2932,7 @@ "3. 输入修改需求": "3. Enter modification requirements", "刷新界面 由于请求gpt需要一段时间": "Refreshing the interface takes some time due to the request for gpt", "随机小游戏": "Random mini game", - "那么请在下面的QWEN_MODEL_SELECTION中指定具体的模型": "So please specify the specific model in QWEN_MODEL_SELECTION below", + "那么请在下面的QWEN_LOCAL_MODEL_SELECTION中指定具体的模型": "So please specify the specific model in QWEN_LOCAL_MODEL_SELECTION below", "表值": "Table value", "我画你猜": "I draw, you guess", "狗": "Dog", diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index dcfeba92..689b1f97 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -431,16 +431,48 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS: }) except: print(trimmed_format_exc()) -if "qwen" in AVAIL_LLM_MODELS: +if "qwen-local" in AVAIL_LLM_MODELS: + try: + from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui + from .bridge_qwen_local import predict as qwen_local_ui + model_info.update({ + "qwen-local": { + "fn_with_ui": qwen_local_ui, + "fn_without_ui": qwen_local_noui, + "endpoint": None, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + } + }) + except: + print(trimmed_format_exc()) +if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai try: from .bridge_qwen import predict_no_ui_long_connection as qwen_noui from .bridge_qwen import predict as qwen_ui model_info.update({ - "qwen": { + "qwen-turbo": { "fn_with_ui": qwen_ui, "fn_without_ui": qwen_noui, "endpoint": None, - "max_token": 4096, + "max_token": 6144, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen-plus": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "endpoint": None, + "max_token": 30720, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "qwen-max": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "endpoint": None, + "max_token": 28672, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, } diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py index 940c41d5..583def8b 100644 --- a/request_llms/bridge_qwen.py +++ b/request_llms/bridge_qwen.py @@ -1,59 +1,66 @@ -model_name = "Qwen" -cmd_to_install = "`pip install -r request_llms/requirements_qwen.txt`" +import time +import os +from toolbox import update_ui, get_conf, update_ui_lastest_msg +from toolbox import check_packages, report_exception -from toolbox import ProxyNetworkActivate, get_conf -from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns +model_name = 'Qwen' + +def validate_key(): + DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY") + if DASHSCOPE_API_KEY == '': return False + return True + +if not validate_key(): + raise RuntimeError('请配置DASHSCOPE_API_KEY') +os.environ['DASHSCOPE_API_KEY'] = get_conf("DASHSCOPE_API_KEY") +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): + """ + ⭐多线程方法 + 函数的说明请见 request_llms/bridge_all.py + """ + watch_dog_patience = 5 + response = "" -# ------------------------------------------------------------------------------------------------------------------------ -# 🔌💻 Local Model -# ------------------------------------------------------------------------------------------------------------------------ -class GetQwenLMHandle(LocalLLMHandle): + from .com_qwenapi import QwenRequestInstance + sri = QwenRequestInstance() + for response in sri.generate(inputs, llm_kwargs, history, sys_prompt): + if len(observe_window) >= 1: + observe_window[0] = response + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。") + return response - def load_model_info(self): - # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 - self.model_name = model_name - self.cmd_to_install = cmd_to_install +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + ⭐单线程方法 + 函数的说明请见 request_llms/bridge_all.py + """ + chatbot.append((inputs, "")) + yield from update_ui(chatbot=chatbot, history=history) - def load_model_and_tokenizer(self): - # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 - # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig - from transformers import AutoModelForCausalLM, AutoTokenizer - from transformers.generation import GenerationConfig - with ProxyNetworkActivate('Download_LLM'): - model_id = get_conf('QWEN_MODEL_SELECTION') - self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True) - # use fp16 - model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval() - model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 - self._model = model + # 尝试导入依赖,如果缺少依赖,则给出安装建议 + try: + check_packages(["dashscope"]) + except: + yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade dashscope```。", + chatbot=chatbot, history=history, delay=0) + return - return self._model, self._tokenizer + if additional_fn is not None: + from core_functional import handle_core_functionality + inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) - def llm_stream_generator(self, **kwargs): - # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 - def adaptor(kwargs): - query = kwargs['query'] - max_length = kwargs['max_length'] - top_p = kwargs['top_p'] - temperature = kwargs['temperature'] - history = kwargs['history'] - return query, max_length, top_p, temperature, history + # 开始接收回复 + from .com_qwenapi import QwenRequestInstance + sri = QwenRequestInstance() + for response in sri.generate(inputs, llm_kwargs, history, system_prompt): + chatbot[-1] = (inputs, response) + yield from update_ui(chatbot=chatbot, history=history) - query, max_length, top_p, temperature, history = adaptor(kwargs) - - for response in self._model.chat_stream(self._tokenizer, query, history=history): - yield response - - def try_to_import_special_deps(self, **kwargs): - # import something that will raise error if the user does not install requirement_*.txt - # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行 - import importlib - importlib.import_module('modelscope') - - -# ------------------------------------------------------------------------------------------------------------------------ -# 🔌💻 GPT-Academic Interface -# ------------------------------------------------------------------------------------------------------------------------ -predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name) \ No newline at end of file + # 总结输出 + if response == f"[Local Message] 等待{model_name}响应中 ...": + response = f"[Local Message] {model_name}响应异常 ..." + history.extend([inputs, response]) + yield from update_ui(chatbot=chatbot, history=history) \ No newline at end of file diff --git a/request_llms/bridge_qwen_local.py b/request_llms/bridge_qwen_local.py new file mode 100644 index 00000000..4a0fa69a --- /dev/null +++ b/request_llms/bridge_qwen_local.py @@ -0,0 +1,59 @@ +model_name = "Qwen_local" +cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`" + +from toolbox import ProxyNetworkActivate, get_conf +from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns + + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 Local Model +# ------------------------------------------------------------------------------------------------------------------------ +class GetQwenLMHandle(LocalLLMHandle): + + def load_model_info(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + self.model_name = model_name + self.cmd_to_install = cmd_to_install + + def load_model_and_tokenizer(self): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig + from transformers import AutoModelForCausalLM, AutoTokenizer + from transformers.generation import GenerationConfig + with ProxyNetworkActivate('Download_LLM'): + model_id = get_conf('QWEN_LOCAL_MODEL_SELECTION') + self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True) + # use fp16 + model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval() + model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参 + self._model = model + + return self._model, self._tokenizer + + def llm_stream_generator(self, **kwargs): + # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行 + def adaptor(kwargs): + query = kwargs['query'] + max_length = kwargs['max_length'] + top_p = kwargs['top_p'] + temperature = kwargs['temperature'] + history = kwargs['history'] + return query, max_length, top_p, temperature, history + + query, max_length, top_p, temperature, history = adaptor(kwargs) + + for response in self._model.chat_stream(self._tokenizer, query, history=history): + yield response + + def try_to_import_special_deps(self, **kwargs): + # import something that will raise error if the user does not install requirement_*.txt + # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行 + import importlib + importlib.import_module('modelscope') + + +# ------------------------------------------------------------------------------------------------------------------------ +# 🔌💻 GPT-Academic Interface +# ------------------------------------------------------------------------------------------------------------------------ +predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name) \ No newline at end of file diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py new file mode 100644 index 00000000..63ebdea2 --- /dev/null +++ b/request_llms/com_qwenapi.py @@ -0,0 +1,85 @@ +from http import HTTPStatus +from toolbox import get_conf +import threading +import logging + +timeout_bot_msg = '[Local Message] Request timeout. Network error.' + +class QwenRequestInstance(): + def __init__(self): + + self.time_to_yield_event = threading.Event() + self.time_to_exit_event = threading.Event() + + self.result_buf = "" + + def generate(self, inputs, llm_kwargs, history, system_prompt): + # import _thread as thread + from dashscope import Generation + QWEN_MODEL = { + 'qwen-turbo': Generation.Models.qwen_turbo, + 'qwen-plus': Generation.Models.qwen_plus, + 'qwen-max': Generation.Models.qwen_max, + }[llm_kwargs['llm_model']] + top_p = llm_kwargs.get('top_p', 0.8) + if top_p == 0: top_p += 1e-5 + if top_p == 1: top_p -= 1e-5 + + self.result_buf = "" + responses = Generation.call( + model=QWEN_MODEL, + messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt), + top_p=top_p, + temperature=llm_kwargs.get('temperature', 1.0), + result_format='message', + stream=True, + incremental_output=True + ) + + for response in responses: + if response.status_code == HTTPStatus.OK: + if response.output.choices[0].finish_reason == 'stop': + yield self.result_buf + break + elif response.output.choices[0].finish_reason == 'length': + self.result_buf += "[Local Message] 生成长度过长,后续输出被截断" + yield self.result_buf + break + else: + self.result_buf += response.output.choices[0].message.content + yield self.result_buf + else: + self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}" + yield self.result_buf + break + logging.info(f'[raw_input] {inputs}') + logging.info(f'[response] {self.result_buf}') + return self.result_buf + + +def generate_message_payload(inputs, llm_kwargs, history, system_prompt): + conversation_cnt = len(history) // 2 + if system_prompt == '': system_prompt = 'Hello!' + messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}] + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": + continue + if what_gpt_answer["content"] == timeout_bot_msg: + continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]['content'] = what_gpt_answer['content'] + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = inputs + messages.append(what_i_ask_now) + return messages diff --git a/request_llms/requirements_qwen.txt b/request_llms/requirements_qwen.txt index ea65dee7..5899464f 100644 --- a/request_llms/requirements_qwen.txt +++ b/request_llms/requirements_qwen.txt @@ -1,4 +1 @@ -modelscope -transformers_stream_generator -auto-gptq -optimum \ No newline at end of file +dashscope \ No newline at end of file diff --git a/request_llms/requirements_qwen_local.txt b/request_llms/requirements_qwen_local.txt new file mode 100644 index 00000000..ea65dee7 --- /dev/null +++ b/request_llms/requirements_qwen_local.txt @@ -0,0 +1,4 @@ +modelscope +transformers_stream_generator +auto-gptq +optimum \ No newline at end of file diff --git a/tests/test_llms.py b/tests/test_llms.py index bdb622b7..347c6b99 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -18,7 +18,7 @@ if __name__ == "__main__": # from request_llms.bridge_internlm import predict_no_ui_long_connection # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection - from request_llms.bridge_qwen import predict_no_ui_long_connection + from request_llms.bridge_qwen_local import predict_no_ui_long_connection # from request_llms.bridge_spark import predict_no_ui_long_connection # from request_llms.bridge_zhipu import predict_no_ui_long_connection # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection From 68a49d3758782772afb912b327acd504fe0f1e99 Mon Sep 17 00:00:00 2001 From: leike0813 Date: Wed, 20 Dec 2023 07:44:53 +0800 Subject: [PATCH 70/88] Add 2 plugins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 相当于将“批量总结PDF文档”插件拆成了两部分,目的在于使用廉价的模型干粗活,再将关键的最终总结交给GPT-4,降低使用成本 批量总结PDF文档_初步:初步总结PDF,每个PDF输出一个md文档 批量总结Markdown文档_进阶:将所有md文档高度凝练并汇总至一个md文档,可直接使用“批量总结PDF文档_初步”的输出结果作为输入 --- crazy_functional.py | 28 ++++ crazy_functions/批量总结Markdown文档_进阶.py | 127 ++++++++++++++++++ crazy_functions/批量总结PDF文档_初步.py | 131 +++++++++++++++++++ 3 files changed, 286 insertions(+) create mode 100644 crazy_functions/批量总结Markdown文档_进阶.py create mode 100644 crazy_functions/批量总结PDF文档_初步.py diff --git a/crazy_functional.py b/crazy_functional.py index 4cc63040..3275f79f 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -603,7 +603,35 @@ def get_crazy_functions(): except: print(trimmed_format_exc()) print('Load function plugin failed') + try: + from crazy_functions.批量总结PDF文档_初步 import 批量总结PDF文档_初步 + function_plugins.update({ + "批量总结PDF文档_初步": { + "Group": "学术", + "Color": "stop", + "AsButton": False, + "Info": "批量总结PDF文档的内容(仅做初步提炼) | 输入参数为路径", + "Function": HotReload(批量总结PDF文档_初步) + } + }) + except: + print(trimmed_format_exc()) + print('Load function plugin failed') + try: + from crazy_functions.批量总结Markdown文档_进阶 import 批量总结Markdown文档_进阶 + function_plugins.update({ + "批量总结Markdown文档_进阶": { + "Group": "学术", + "Color": "stop", + "AsButton": False, + "Info": "批量总结Markdown文档的内容(在初步提炼的基础上进一步总结) | 输入参数为路径", + "Function": HotReload(批量总结Markdown文档_进阶) + } + }) + except: + print(trimmed_format_exc()) + print('Load function plugin failed') # try: # from crazy_functions.chatglm微调工具 import 微调数据集生成 # function_plugins.update({ diff --git a/crazy_functions/批量总结Markdown文档_进阶.py b/crazy_functions/批量总结Markdown文档_进阶.py new file mode 100644 index 00000000..cdbff7a5 --- /dev/null +++ b/crazy_functions/批量总结Markdown文档_进阶.py @@ -0,0 +1,127 @@ +import logging, os +from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str, get_log_folder +from toolbox import CatchException, report_exception, trimmed_format_exc +from toolbox import write_history_to_file, promote_file_to_downloadzone +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency +from .crazy_utils import input_clipping + + +def 总结Markdown(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): + file_write_buffer = [] + SUMMARY_WORD_LIMIT = 800 + meta_inputs_array = [] + meta_inputs_show_user_array = [] + meta_sys_prompt_array = [] + inputs_array = [] + inputs_show_user_array = [] + sys_prompt_array = [] + file_name_array = [] + for idx, file_name in enumerate(file_manifest): + print('begin analysis on:', file_name) + file_name_array.append(f'# {idx}.{os.path.basename(file_name)}') + + with open(file_name, 'r', encoding='utf-8', errors='replace') as f: + file_content = f.read() + + _ = file_content.split('## metadata') + if len(_) >= 2: + file_meta = _[-2] + file_content = _[-1] + else: + file_meta = file_name + + meta_inputs_array.append( + "我需要你从一段文本中识别并提取出这篇文章的1.标题、2.作者、3.作者单位、4.关键词。" + "其中,1.标题和4.关键词需要给出中文和英文的双语结果,2.作者和3.作者单位按原文语言给出。" + "以下是需要你识别的文本: " + file_meta + ) + meta_inputs_show_user_array.append( + '开始分析元数据:' + file_name + ) + meta_sys_prompt_array.append("As an academic professional, you need to extract basic informations of the paper from its metadata") + + inputs_array.append( + "我需要你根据我提供的文本总结一份Markdown文档,分为四个部分:1.研究背景,2.文章主要内容,3.主要创新点,4.结论。" + + f"各部分的题目采用二级标题前缀(## ),内容可适当的分为若干条,总字数不超过{SUMMARY_WORD_LIMIT}个中文字符." + + "以下是需要你处理的文本: " + file_content) + inputs_show_user_array.append('开始总结:' + file_name) + sys_prompt_array.append(f"As an academic professional, you need to summarize the text with less than {SUMMARY_WORD_LIMIT} Chinese characters") + + gpt_meta_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array=meta_inputs_array, + inputs_show_user_array=meta_inputs_show_user_array, + llm_kwargs=llm_kwargs, + chatbot=chatbot, + history_array=[[""] for _ in range(len(inputs_array))], + sys_prompt_array=meta_sys_prompt_array, + # max_workers=5, # OpenAI所允许的最大并行过载 + scroller_max_len=80 + ) + + gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array=inputs_array, + inputs_show_user_array=inputs_show_user_array, + llm_kwargs=llm_kwargs, + chatbot=chatbot, + history_array=[[""] for _ in range(len(inputs_array))], + sys_prompt_array=sys_prompt_array, + # max_workers=5, # OpenAI所允许的最大并行过载 + scroller_max_len=80 + ) + try: + for idx, (gpt_say_meta, gpt_say) in enumerate(zip(gpt_meta_response_collection[1::2], gpt_response_collection[1::2])): + file_write_buffer.append(file_name_array[idx]) + file_write_buffer.append("## 元数据\n\n" + gpt_say_meta) + file_write_buffer.append(gpt_say) + except: + logging.error(trimmed_format_exc()) + + res = write_history_to_file(file_write_buffer, file_basename="result.md", auto_caption=False) + promote_file_to_downloadzone(res, chatbot=chatbot) + yield from update_ui(chatbot=chatbot, history=gpt_response_collection) # 刷新界面 + + +@CatchException +def 批量总结Markdown文档_进阶(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + import glob, os + + # 基本信息:功能、贡献者 + chatbot.append([ + "函数插件功能?", + "批量总结Markdown文档。函数插件贡献者: ValeriaWong,Eralien,Joshua Reed"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # 尝试导入依赖,如果缺少依赖,则给出安装建议 + try: + import fitz + except: + report_exception(chatbot, history, + a = f"解析项目: {txt}", + b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # 清空历史,以免输入溢出 + history = [] + + # 检测输入参数,如没有给定输入参数,直接退出 + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": txt = '空空如也的输入栏' + report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # 搜索需要处理的文件清单 + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.md', recursive=True)] + + # 如果没找到任何文件 + if len(file_manifest) == 0: + report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.md文件: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # 开始正式执行任务 + yield from 总结Markdown(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt) diff --git a/crazy_functions/批量总结PDF文档_初步.py b/crazy_functions/批量总结PDF文档_初步.py new file mode 100644 index 00000000..5628fa15 --- /dev/null +++ b/crazy_functions/批量总结PDF文档_初步.py @@ -0,0 +1,131 @@ +import zipfile +import os +from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str, get_log_folder +from toolbox import CatchException, report_exception +from toolbox import write_history_to_file, promote_file_to_downloadzone +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from .crazy_utils import read_and_clean_pdf_text +from .crazy_utils import input_clipping +pj = os.path.join + + +def move_file_to_zip(file_path, zip_file): + zip_file.write(file_path, os.path.basename(file_path)) + os.remove(file_path) + + +def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): + zip_file_path = pj(get_log_folder(), 'result.zip') + with zipfile.ZipFile(zip_file_path, 'w') as zip_file: + for file_name in file_manifest: + file_write_buffer = [] + print('begin analysis on:', file_name) + ############################## <第 0 步,切割PDF> ################################## + # 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割) + # 的长度必须小于 2500 个 Token + file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF + file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + + TOKEN_LIMIT_PER_FRAGMENT = 2500 + + from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + from request_llms.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] + def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) + paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( + txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) + page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( + txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + # 为了更好的效果,我们剥离Introduction之后的部分(如果有) + paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] + + ############################## <第 1 步,从摘要中提取高价值信息,放到history中> ################################## + final_results = [] + final_results.append("## metadata\n\n" + paper_meta + "\n\n## metadata") + + ############################## <第 2 步,迭代地历遍整个文章,提取精炼信息> ################################## + i_say_show_user = f'首先你在中文语境下通读整篇论文。'; gpt_say = "[Local Message] 收到。" # 用户提示 + chatbot.append([i_say_show_user, gpt_say]); yield from update_ui(chatbot=chatbot, history=[]) # 更新UI + + iteration_results = [] + last_iteration_result = paper_meta # 初始值是摘要 + MAX_WORD_TOTAL = 4096 * 0.7 + n_fragment = len(paper_fragments) + if n_fragment >= 20: print('文章极长,不能达到预期效果') + for i in range(n_fragment): + NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment + i_say = f"Read this section, recapitulate the content of this section in Chinese with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i]}" + i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i][:200]}" + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, # i_say=真正给chatgpt的提问, i_say_show_user=给用户看的提问 + llm_kwargs, chatbot, + history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果 + sys_prompt="Extract the main idea of this section with Chinese." # 提示 + ) + iteration_results.append(gpt_say) + last_iteration_result = gpt_say + + ############################## <第 3 步,整理history,提取总结> ################################## + final_results.extend(iteration_results) + file_write_buffer.extend(final_results) + + ############################## <第 4 步,设置一个token上限> ################################## + _, final_results = input_clipping("", final_results, max_token_limit=3200) + yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了 + + res = write_history_to_file( + file_write_buffer, + file_basename=os.path.splitext(os.path.basename(file_name))[0] + '.md', + auto_caption=False + ) + if len(file_manifest) == 1: + promote_file_to_downloadzone(res, chatbot=chatbot) + return + move_file_to_zip(res, zip_file) + + promote_file_to_downloadzone(zip_file_path, chatbot=chatbot) + + +@CatchException +def 批量总结PDF文档_初步(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + import glob, os + + # 基本信息:功能、贡献者 + chatbot.append([ + "函数插件功能?", + "批量总结PDF文档。函数插件贡献者: ValeriaWong,Eralien,Joshua Reed"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # 尝试导入依赖,如果缺少依赖,则给出安装建议 + try: + import fitz + except: + report_exception(chatbot, history, + a = f"解析项目: {txt}", + b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # 清空历史,以免输入溢出 + history = [] + + # 检测输入参数,如没有给定输入参数,直接退出 + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": txt = '空空如也的输入栏' + report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # 搜索需要处理的文件清单 + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] + + # 如果没找到任何文件 + if len(file_manifest) == 0: + report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或.pdf文件: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # 开始正式执行任务 + yield from 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt) From c60a7452bfcaa22588b9fe5bdae3b7fc94b8927b Mon Sep 17 00:00:00 2001 From: leike0813 Date: Wed, 20 Dec 2023 08:57:27 +0800 Subject: [PATCH 71/88] Improve NOUGAT pdf plugin Add an API version of NOUGAT plugin Add advanced argument support to NOUGAT plugin Adapt new text breakdown function bugfix --- config.py | 4 + crazy_functional.py | 13 ++- crazy_functions/crazy_utils.py | 101 +++++++++++++++++++++- crazy_functions/批量总结PDF文档_初步.py | 11 +-- crazy_functions/批量翻译PDF文档_NOUGAT.py | 90 ++++++++++++++++++- 5 files changed, 206 insertions(+), 13 deletions(-) diff --git a/config.py b/config.py index 17dac34a..dc8ef9b1 100644 --- a/config.py +++ b/config.py @@ -217,6 +217,10 @@ GROBID_URLS = [ ] +# NOUGAT_API主机地址 +NOUGAT_URLS = ["http://localhost:8503"] # 此处填写NOUGAT_API的主机地址 + + # 是否允许通过自然语言描述修改本页的配置,该功能具有一定的危险性,默认关闭 ALLOW_RESET_CONFIG = False diff --git a/crazy_functional.py b/crazy_functional.py index 3275f79f..d6a9dd93 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -549,13 +549,24 @@ def get_crazy_functions(): print('Load function plugin failed') try: - from crazy_functions.批量翻译PDF文档_NOUGAT import 批量翻译PDF文档 + from crazy_functions.批量翻译PDF文档_NOUGAT import 批量翻译PDF文档, 批量翻译PDF文档_API function_plugins.update({ "精准翻译PDF文档(NOUGAT)": { "Group": "学术", "Color": "stop", "AsButton": False, + "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) + "ArgsReminder": "在这里输入自定义参数, 支持的参数有: --batchsize BATCHSIZE, --model MODEL_TAG, --recompute, --full-precision, --no-markdown --no-skipping, --pages PAGES/-p PAGES", # 高级参数输入区的显示提示 "Function": HotReload(批量翻译PDF文档) + }, + "精准翻译PDF文档(NOUGAT_API)": { + "Group": "学术", + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) + "ArgsReminder": "在这里输入自定义参数, 支持的参数有: --batchsize BATCHSIZE, --recompute, --no-markdown --no-skipping, --pages PAGES/-p PAGES (官方版本的API仅支持--pages参数)", + # 高级参数输入区的显示提示 + "Function": HotReload(批量翻译PDF文档_API) } }) except: diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 731da1ac..a583e254 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -545,7 +545,20 @@ def get_files_from_everything(txt, type): # type='.md' @Singleton class nougat_interface(): def __init__(self): + def model_check(model_tag): + if model_tag in ['0.1.0-small', '0.1.0-base']: return model_tag + return '0.1.0-small' + + import argparse self.threadLock = threading.Lock() + self.arg_parser = argparse.ArgumentParser() + self.arg_parser.add_argument('--batchsize', type=int) + self.arg_parser.add_argument('--model', type=model_check) + self.arg_parser.add_argument('--recompute', action='store_true') + self.arg_parser.add_argument('--full-precision', action='store_true') + self.arg_parser.add_argument('--no-markdown', action='store_true') + self.arg_parser.add_argument('--no-skipping', action='store_true') + self.arg_parser.add_argument('--pages', type=str) def nougat_with_timeout(self, command, cwd, timeout=3600): import subprocess @@ -563,7 +576,7 @@ class nougat_interface(): return True - def NOUGAT_parse_pdf(self, fp, chatbot, history): + def NOUGAT_parse_pdf(self, fp, chatbot, history, advanced_cfg=''): from toolbox import update_ui_lastest_msg yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...", @@ -576,7 +589,10 @@ class nougat_interface(): yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)", chatbot=chatbot, history=history, delay=0) - self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600) + self.nougat_with_timeout( + f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}" {self.parse_argument(advanced_cfg)}', + os.getcwd(), timeout=3600 + ) res = glob.glob(os.path.join(dst,'*.mmd')) if len(res) == 0: self.threadLock.release() @@ -585,6 +601,87 @@ class nougat_interface(): return res[0] + def NOUGAT_API_parse_pdf(self, fp, chatbot, history, nougat_url, advanced_cfg=''): + from toolbox import update_ui_lastest_msg + + yield from update_ui_lastest_msg("正在解析论文, 请稍候。", + chatbot=chatbot, history=history, delay=0) + + import requests + from toolbox import get_log_folder, gen_time_str + dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str()) + os.makedirs(dst) + + ret = requests.post( + f'{nougat_url}/predict{self.parse_api_argument(advanced_cfg)}', + files={"file": open(fp, "rb")} + ) + if ret.status_code != 200: + raise RuntimeError("Nougat解析论文失败。") + + with open(os.path.join(dst, '*.mmd'), 'w') as f: + f.write(ret.json()) + return os.path.join(dst, '*.mmd') + + + def parse_argument(self, argument_string): + args, _ = self.arg_parser.parse_known_args(argument_string.split()) + reduce_args = [] + for k, v in args.__dict__.items(): + if (v is not None) and (v is not False): + reduce_args.append('--' + k.replace('_', '-')) + if not isinstance(v, bool) and v is not None: + reduce_args.append(str(v)) + + return ' '.join(reduce_args) + + + def parse_api_argument(self, argument_string): + def parse_pages(pages_string): + if pages_string.count(',') > 0: + pages_list = pages_string.split(',') + page_start = pages_list[0].split('-')[0] if '-' in pages_list[0] else pages_list[0] + page_end = pages_list[-1].split('-')[-1] if '-' in pages_list[-1] else pages_list[-1] + else: + if '-' in pages_string: + page_start = pages_string.split('-')[0] + page_end = pages_string.split('-')[-1] + else: + page_start = page_end = int(pages_string) + + return page_start, page_end + + args, _ = self.arg_parser.parse_known_args(argument_string.split()) + reduce_args = [] + for k, v in args.__dict__.items(): + arg_pair = '' + if (v is not None) and (v is not False): + if k == 'pages': + page_start, page_end = parse_pages(v) + arg_pair = f'start={page_start}&stop={page_end}' + elif k not in ['model', 'full_precision']: + arg_pair = f'{k}={int(v)}' + if arg_pair: + reduce_args.append(arg_pair) + + return '?' + '&'.join(reduce_args) + + @staticmethod + def get_avail_nougat_url(): + import random + import requests + NOUGAT_URLS = get_conf('NOUGAT_URLS') + if len(NOUGAT_URLS) == 0: return None + try: + _nougat_url = random.choice(NOUGAT_URLS) # 随机负载均衡 + if _nougat_url.endswith('/'): _nougat_url = _nougat_url.rstrip('/') + ret = requests.get(_nougat_url + '/') + if ret.status_code == 200: + return _nougat_url + else: + return None + except: + return None def try_install_deps(deps, reload_m=[]): diff --git a/crazy_functions/批量总结PDF文档_初步.py b/crazy_functions/批量总结PDF文档_初步.py index 5628fa15..52fd47b7 100644 --- a/crazy_functions/批量总结PDF文档_初步.py +++ b/crazy_functions/批量总结PDF文档_初步.py @@ -29,14 +29,9 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, TOKEN_LIMIT_PER_FRAGMENT = 2500 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) - page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) + page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT // 4, llm_model=llm_kwargs['llm_model']) # 为了更好的效果,我们剥离Introduction之后的部分(如果有) paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] diff --git a/crazy_functions/批量翻译PDF文档_NOUGAT.py b/crazy_functions/批量翻译PDF文档_NOUGAT.py index 97170d0e..3b841a1d 100644 --- a/crazy_functions/批量翻译PDF文档_NOUGAT.py +++ b/crazy_functions/批量翻译PDF文档_NOUGAT.py @@ -54,7 +54,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst # 基本信息:功能、贡献者 chatbot.append([ "函数插件功能?", - "批量翻译PDF文档。函数插件贡献者: Binary-Husky"]) + "批量翻译PDF文档。函数插件贡献者: Binary-Husky,Joshua Reed"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 清空历史,以免输入溢出 @@ -104,11 +104,13 @@ def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwa DST_LANG = "中文" from crazy_functions.crazy_utils import nougat_interface from crazy_functions.pdf_fns.report_gen_html import construct_html + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + advanced_cfg = plugin_kwargs.get("advanced_arg", '') nougat_handle = nougat_interface() for index, fp in enumerate(file_manifest): if fp.endswith('pdf'): chatbot.append(["当前进度:", f"正在解析论文,请稍候。(第一次运行时,需要花费较长时间下载NOUGAT参数)"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history) + fpp = yield from nougat_handle.NOUGAT_parse_pdf(fp, chatbot, history, advanced_cfg=advanced_cfg) promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot) else: chatbot.append(["当前论文无需解析:", fp]); yield from update_ui( chatbot=chatbot, history=history) @@ -123,3 +125,87 @@ def 解析PDF_基于NOUGAT(file_manifest, project_folder, llm_kwargs, plugin_kwa yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 +@CatchException +def 批量翻译PDF文档_API(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + + disable_auto_promotion(chatbot) + # 基本信息:功能、贡献者 + chatbot.append([ + "函数插件功能?", + "使用NOUGAT_API批量翻译PDF文档。函数插件贡献者: Binary-Husky,Joshua Reed。\n" + + "官方版本API仅支持页码范围选择,若要支持更多参数,请移步https://github.com/leike0813/nougat", + ]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # 清空历史,以免输入溢出 + history = [] + + from .crazy_utils import get_files_from_everything + success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf') + if len(file_manifest) > 0: + # 尝试导入依赖,如果缺少依赖,则给出安装建议 + try: + import tiktoken + except: + report_exception(chatbot, history, + a=f"解析项目: {txt}", + b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + success_mmd, file_manifest_mmd, _ = get_files_from_everything(txt, type='.mmd') + success = success or success_mmd + file_manifest += file_manifest_mmd + chatbot.append(["文件列表:", ", ".join([e.split('/')[-1] for e in file_manifest])]); + yield from update_ui(chatbot=chatbot, history=history) + # 检测输入参数,如没有给定输入参数,直接退出 + if not success: + if txt == "": txt = '空空如也的输入栏' + + # 如果没找到任何文件 + if len(file_manifest) == 0: + report_exception(chatbot, history, + a=f"解析项目: {txt}", b=f"找不到任何.pdf拓展名的文件: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # 开始正式执行任务 + yield from 解析PDF_基于NOUGAT_API(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt) + + +def 解析PDF_基于NOUGAT_API(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): + import copy + import tiktoken + TOKEN_LIMIT_PER_FRAGMENT = 1024 + generated_conclusion_files = [] + generated_html_files = [] + DST_LANG = "中文" + from crazy_functions.crazy_utils import nougat_interface + from crazy_functions.pdf_fns.report_gen_html import construct_html + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + advanced_cfg = plugin_kwargs.get("advanced_arg", '') + nougat_handle = nougat_interface() + chatbot.append(["当前进度:", f"正在检查NOUGAT服务可用性..."]); + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + nougat_url = nougat_handle.get_avail_nougat_url() + if nougat_url is None: + report_exception(chatbot, history, + a=f"检查结果:", b="NOUGAT服务不可用,请检查config中的NOUGAT_URL") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + for index, fp in enumerate(file_manifest): + if fp.endswith('pdf'): + chatbot.append(["当前进度:", f"正在解析论文,请稍候。"]); yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + fpp = yield from nougat_handle.NOUGAT_API_parse_pdf(fp, chatbot, history, nougat_url, advanced_cfg=advanced_cfg) + promote_file_to_downloadzone(fpp, rename_file=os.path.basename(fpp)+'.nougat.mmd', chatbot=chatbot) + else: + chatbot.append(["当前论文无需解析:", fp]); yield from update_ui(chatbot=chatbot, history=history) + fpp = fp + with open(fpp, 'r', encoding='utf8') as f: + article_content = f.readlines() + article_dict = markdown_to_dict(article_content) + logging.info(article_dict) + yield from translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG) + + chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files))) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 \ No newline at end of file From a0bfa7ba1c6e990bfc32772774093c4ab2a926a2 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 19 Dec 2023 19:30:44 +0800 Subject: [PATCH 72/88] improve long text breakdown perfomance --- crazy_functions/Latex全文润色.py | 4 +- crazy_functions/Latex全文翻译.py | 4 +- crazy_functions/crazy_utils.py | 90 --------------- crazy_functions/ipc_fns/mp.py | 37 ++++++ crazy_functions/latex_fns/latex_actions.py | 6 +- crazy_functions/pdf_fns/breakdown_txt.py | 125 +++++++++++++++++++++ crazy_functions/pdf_fns/parse_pdf.py | 4 +- crazy_functions/总结word文档.py | 8 +- crazy_functions/批量Markdown翻译.py | 4 +- crazy_functions/批量总结PDF文档.py | 11 +- crazy_functions/批量翻译PDF文档_多线程.py | 11 +- crazy_functions/理解PDF文档内容.py | 13 +-- crazy_functions/解析JupyterNotebook.py | 12 +- 13 files changed, 186 insertions(+), 143 deletions(-) create mode 100644 crazy_functions/ipc_fns/mp.py create mode 100644 crazy_functions/pdf_fns/breakdown_txt.py diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py index 0bc7d401..b736fe89 100644 --- a/crazy_functions/Latex全文润色.py +++ b/crazy_functions/Latex全文润色.py @@ -26,8 +26,8 @@ class PaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) diff --git a/crazy_functions/Latex全文翻译.py b/crazy_functions/Latex全文翻译.py index 846bd80d..49470c86 100644 --- a/crazy_functions/Latex全文翻译.py +++ b/crazy_functions/Latex全文翻译.py @@ -26,8 +26,8 @@ class PaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 9778053a..731da1ac 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -312,95 +312,6 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( return gpt_response_collection -def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit): - def cut(txt_tocut, must_break_at_empty_line): # 递归 - if get_token_fn(txt_tocut) <= limit: - return [txt_tocut] - else: - lines = txt_tocut.split('\n') - estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) - estimated_line_cut = int(estimated_line_cut) - for cnt in reversed(range(estimated_line_cut)): - if must_break_at_empty_line: - if lines[cnt] != "": - continue - print(cnt) - prev = "\n".join(lines[:cnt]) - post = "\n".join(lines[cnt:]) - if get_token_fn(prev) < limit: - break - if cnt == 0: - raise RuntimeError("存在一行极长的文本!") - # print(len(post)) - # 列表递归接龙 - result = [prev] - result.extend(cut(post, must_break_at_empty_line)) - return result - try: - return cut(txt, must_break_at_empty_line=True) - except RuntimeError: - return cut(txt, must_break_at_empty_line=False) - - -def force_breakdown(txt, limit, get_token_fn): - """ - 当无法用标点、空行分割时,我们用最暴力的方法切割 - """ - for i in reversed(range(len(txt))): - if get_token_fn(txt[:i]) < limit: - return txt[:i], txt[i:] - return "Tiktoken未知错误", "Tiktoken未知错误" - -def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit): - # 递归 - def cut(txt_tocut, must_break_at_empty_line, break_anyway=False): - if get_token_fn(txt_tocut) <= limit: - return [txt_tocut] - else: - lines = txt_tocut.split('\n') - estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) - estimated_line_cut = int(estimated_line_cut) - cnt = 0 - for cnt in reversed(range(estimated_line_cut)): - if must_break_at_empty_line: - if lines[cnt] != "": - continue - prev = "\n".join(lines[:cnt]) - post = "\n".join(lines[cnt:]) - if get_token_fn(prev) < limit: - break - if cnt == 0: - if break_anyway: - prev, post = force_breakdown(txt_tocut, limit, get_token_fn) - else: - raise RuntimeError(f"存在一行极长的文本!{txt_tocut}") - # print(len(post)) - # 列表递归接龙 - result = [prev] - result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway)) - return result - try: - # 第1次尝试,将双空行(\n\n)作为切分点 - return cut(txt, must_break_at_empty_line=True) - except RuntimeError: - try: - # 第2次尝试,将单空行(\n)作为切分点 - return cut(txt, must_break_at_empty_line=False) - except RuntimeError: - try: - # 第3次尝试,将英文句号(.)作为切分点 - res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在 - return [r.replace('。\n', '.') for r in res] - except RuntimeError as e: - try: - # 第4次尝试,将中文句号(。)作为切分点 - res = cut(txt.replace('。', '。。\n'), must_break_at_empty_line=False) - return [r.replace('。。\n', '。') for r in res] - except RuntimeError as e: - # 第5次尝试,没办法了,随便切一下敷衍吧 - return cut(txt, must_break_at_empty_line=False, break_anyway=True) - - def read_and_clean_pdf_text(fp): """ @@ -631,7 +542,6 @@ def get_files_from_everything(txt, type): # type='.md' - @Singleton class nougat_interface(): def __init__(self): diff --git a/crazy_functions/ipc_fns/mp.py b/crazy_functions/ipc_fns/mp.py new file mode 100644 index 00000000..575d47cc --- /dev/null +++ b/crazy_functions/ipc_fns/mp.py @@ -0,0 +1,37 @@ +import platform +import pickle +import multiprocessing + +def run_in_subprocess_wrapper_func(v_args): + func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args) + import sys + try: + result = func(*args, **kwargs) + return_dict['result'] = result + except Exception as e: + exc_info = sys.exc_info() + exception_dict['exception'] = exc_info + +def run_in_subprocess_with_timeout(func, timeout=60): + if platform.system() == 'Linux': + def wrapper(*args, **kwargs): + return_dict = multiprocessing.Manager().dict() + exception_dict = multiprocessing.Manager().dict() + v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict)) + process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,)) + process.start() + process.join(timeout) + if process.is_alive(): + process.terminate() + raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务') + process.close() + if 'exception' in exception_dict: + # ooops, the subprocess ran into an exception + exc_info = exception_dict['exception'] + raise exc_info[1].with_traceback(exc_info[2]) + if 'result' in return_dict.keys(): + # If the subprocess ran successfully, return the result + return return_dict['result'] + return wrapper + else: + return func \ No newline at end of file diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py index b80c01d8..8772f5e1 100644 --- a/crazy_functions/latex_fns/latex_actions.py +++ b/crazy_functions/latex_fns/latex_actions.py @@ -175,7 +175,6 @@ class LatexPaperFileGroup(): self.sp_file_contents = [] self.sp_file_index = [] self.sp_file_tag = [] - # count_token from request_llms.bridge_all import model_info enc = model_info["gpt-3.5-turbo"]['tokenizer'] @@ -192,13 +191,12 @@ class LatexPaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex") - print('Segmentation: done') def merge_result(self): self.file_result = ["" for _ in range(len(self.file_paths))] diff --git a/crazy_functions/pdf_fns/breakdown_txt.py b/crazy_functions/pdf_fns/breakdown_txt.py new file mode 100644 index 00000000..1db86964 --- /dev/null +++ b/crazy_functions/pdf_fns/breakdown_txt.py @@ -0,0 +1,125 @@ +from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout + +def force_breakdown(txt, limit, get_token_fn): + """ 当无法用标点、空行分割时,我们用最暴力的方法切割 + """ + for i in reversed(range(len(txt))): + if get_token_fn(txt[:i]) < limit: + return txt[:i], txt[i:] + return "Tiktoken未知错误", "Tiktoken未知错误" + + +def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage): + """ 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage + 当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出 + """ + _min = int(5e4) + _max = int(1e5) + # print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage)) + if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0: + remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage + remain_txt_to_cut_storage = "" + if len(remain_txt_to_cut) > _max: + remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage + remain_txt_to_cut = remain_txt_to_cut[:_max] + return remain_txt_to_cut, remain_txt_to_cut_storage + + +def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False): + """ 文本切分 + """ + res = [] + total_len = len(txt_tocut) + fin_len = 0 + remain_txt_to_cut = txt_tocut + remain_txt_to_cut_storage = "" + # 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage + remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage) + + while True: + if get_token_fn(remain_txt_to_cut) <= limit: + # 如果剩余文本的token数小于限制,那么就不用切了 + res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut) + break + else: + # 如果剩余文本的token数大于限制,那么就切 + lines = remain_txt_to_cut.split('\n') + + # 估计一个切分点 + estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines) + estimated_line_cut = int(estimated_line_cut) + + # 开始查找合适切分点的偏移(cnt) + cnt = 0 + for cnt in reversed(range(estimated_line_cut)): + if must_break_at_empty_line: + # 首先尝试用双空行(\n\n)作为切分点 + if lines[cnt] != "": + continue + prev = "\n".join(lines[:cnt]) + post = "\n".join(lines[cnt:]) + if get_token_fn(prev) < limit: + break + + if cnt == 0: + # 如果没有找到合适的切分点 + if break_anyway: + # 是否允许暴力切分 + prev, post = force_breakdown(txt_tocut, limit, get_token_fn) + else: + # 不允许直接报错 + raise RuntimeError(f"存在一行极长的文本!{txt_tocut}") + + # 追加列表 + res.append(prev); fin_len+=len(prev) + # 准备下一次迭代 + remain_txt_to_cut = post + remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage) + process = fin_len/total_len + print(f'\r正在文本切分 {int(process*100)}%', end='') + if len(remain_txt_to_cut.strip()) == 0: + break + return res + + +def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"): + """ 使用多种方式尝试切分文本,以满足 token 限制 + """ + from request_llms.bridge_all import model_info + enc = model_info[llm_model]['tokenizer'] + def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=())) + try: + # 第1次尝试,将双空行(\n\n)作为切分点 + return cut(limit, get_token_fn, txt, must_break_at_empty_line=True) + except RuntimeError: + try: + # 第2次尝试,将单空行(\n)作为切分点 + return cut(limit, get_token_fn, txt, must_break_at_empty_line=False) + except RuntimeError: + try: + # 第3次尝试,将英文句号(.)作为切分点 + res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在 + return [r.replace('。\n', '.') for r in res] + except RuntimeError as e: + try: + # 第4次尝试,将中文句号(。)作为切分点 + res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False) + return [r.replace('。。\n', '。') for r in res] + except RuntimeError as e: + # 第5次尝试,没办法了,随便切一下吧 + return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True) + +breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60) + +if __name__ == '__main__': + from crazy_functions.crazy_utils import read_and_clean_pdf_text + file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf") + + from request_llms.bridge_all import model_info + for i in range(5): + file_content += file_content + + print(len(file_content)) + TOKEN_LIMIT_PER_FRAGMENT = 2500 + res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT) + diff --git a/crazy_functions/pdf_fns/parse_pdf.py b/crazy_functions/pdf_fns/parse_pdf.py index 51f8811f..fa27de51 100644 --- a/crazy_functions/pdf_fns/parse_pdf.py +++ b/crazy_functions/pdf_fns/parse_pdf.py @@ -74,7 +74,7 @@ def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chat def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG): from crazy_functions.pdf_fns.report_gen_html import construct_html - from crazy_functions.crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency @@ -116,7 +116,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi # find a smooth token limit to achieve even seperation count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT)) token_limit_smooth = raw_token_num // count + count - return breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn=get_token_num, limit=token_limit_smooth) + return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model']) for section in article_dict.get('sections'): if len(section['text']) == 0: continue diff --git a/crazy_functions/总结word文档.py b/crazy_functions/总结word文档.py index b3923071..6dfe217f 100644 --- a/crazy_functions/总结word文档.py +++ b/crazy_functions/总结word文档.py @@ -31,15 +31,11 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot print(file_content) # private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit from request_llms.bridge_all import model_info max_token = model_info[llm_kwargs['llm_model']]['max_token'] TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4 - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, - get_token_fn=model_info[llm_kwargs['llm_model']]['token_cnt'], - limit=TOKEN_LIMIT_PER_FRAGMENT - ) + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) this_paper_history = [] for i, paper_frag in enumerate(paper_fragments): i_say = f'请对下面的文章片段用中文做概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{paper_frag}```' diff --git a/crazy_functions/批量Markdown翻译.py b/crazy_functions/批量Markdown翻译.py index 12b4ef09..8665d6df 100644 --- a/crazy_functions/批量Markdown翻译.py +++ b/crazy_functions/批量Markdown翻译.py @@ -28,8 +28,8 @@ class PaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) diff --git a/crazy_functions/批量总结PDF文档.py b/crazy_functions/批量总结PDF文档.py index 7fc3e415..e289c47b 100644 --- a/crazy_functions/批量总结PDF文档.py +++ b/crazy_functions/批量总结PDF文档.py @@ -20,14 +20,9 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, TOKEN_LIMIT_PER_FRAGMENT = 2500 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) - page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) + page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model']) # 为了更好的效果,我们剥离Introduction之后的部分(如果有) paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 73cf5920..a1f0f312 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -91,14 +91,9 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars # 递归地切割PDF文件 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) - page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=page_one, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) + page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=page_one, limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model']) # 为了更好的效果,我们剥离Introduction之后的部分(如果有) paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py index ef967889..439d78ea 100644 --- a/crazy_functions/理解PDF文档内容.py +++ b/crazy_functions/理解PDF文档内容.py @@ -18,14 +18,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro TOKEN_LIMIT_PER_FRAGMENT = 2500 - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) - paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) - page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( - txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model']) + page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model']) # 为了更好的效果,我们剥离Introduction之后的部分(如果有) paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0] @@ -45,7 +40,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro for i in range(n_fragment): NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}" - i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]}" + i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]} ...." gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, # i_say=真正给chatgpt的提问, i_say_show_user=给用户看的提问 llm_kwargs, chatbot, history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果 diff --git a/crazy_functions/解析JupyterNotebook.py b/crazy_functions/解析JupyterNotebook.py index eeccadf7..3c2b5783 100644 --- a/crazy_functions/解析JupyterNotebook.py +++ b/crazy_functions/解析JupyterNotebook.py @@ -12,13 +12,6 @@ class PaperFileGroup(): self.sp_file_index = [] self.sp_file_tag = [] - # count_token - from request_llms.bridge_all import model_info - enc = model_info["gpt-3.5-turbo"]['tokenizer'] - def get_token_num(txt): return len( - enc.encode(txt, disallowed_special=())) - self.get_token_num = get_token_num - def run_file_split(self, max_token_limit=1900): """ 将长文本分离开来 @@ -29,9 +22,8 @@ class PaperFileGroup(): self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: - from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - segments = breakdown_txt_to_satisfy_token_limit_for_pdf( - file_content, self.get_token_num, max_token_limit) + from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit + segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) From f7588d477685e8efc16a6bc5e8712b305fe583be Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 19 Dec 2023 19:43:03 +0800 Subject: [PATCH 73/88] avoid adding the same file multiple times to the chatbot's files_to_promote list --- crazy_functions/pdf_fns/breakdown_txt.py | 2 +- crazy_functions/总结word文档.py | 1 - toolbox.py | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crazy_functions/pdf_fns/breakdown_txt.py b/crazy_functions/pdf_fns/breakdown_txt.py index 1db86964..a9614814 100644 --- a/crazy_functions/pdf_fns/breakdown_txt.py +++ b/crazy_functions/pdf_fns/breakdown_txt.py @@ -76,7 +76,7 @@ def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=F remain_txt_to_cut = post remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage) process = fin_len/total_len - print(f'\r正在文本切分 {int(process*100)}%', end='') + print(f'正在文本切分 {int(process*100)}%') if len(remain_txt_to_cut.strip()) == 0: break return res diff --git a/crazy_functions/总结word文档.py b/crazy_functions/总结word文档.py index 6dfe217f..01ee1e6b 100644 --- a/crazy_functions/总结word文档.py +++ b/crazy_functions/总结word文档.py @@ -29,7 +29,6 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot except: raise RuntimeError('请先将.doc文档转换为.docx文档。') - print(file_content) # private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名 from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit from request_llms.bridge_all import model_info diff --git a/toolbox.py b/toolbox.py index bb4ec667..e44d61e5 100644 --- a/toolbox.py +++ b/toolbox.py @@ -583,7 +583,8 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): if chatbot is not None: if 'files_to_promote' in chatbot._cookies: current = chatbot._cookies['files_to_promote'] else: current = [] - chatbot._cookies.update({'files_to_promote': [new_path] + current}) + if new_path not in current: # 避免把同一个文件添加多次 + chatbot._cookies.update({'files_to_promote': [new_path] + current}) return new_path From 2b90302851d8d1eaa1c96f2ddbd00ab0d48d9c8c Mon Sep 17 00:00:00 2001 From: Keldos Date: Thu, 21 Dec 2023 10:24:11 +0800 Subject: [PATCH 74/88] =?UTF-8?q?feat:=20drag=20file=20to=20chatbot=20to?= =?UTF-8?q?=20upload=20=E6=8B=96=E5=8A=A8=E4=BB=A5=E4=B8=8A=E4=BC=A0?= =?UTF-8?q?=E6=96=87=E4=BB=B6=20(#1396)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: 拖动以上传文件 * 上传文件过程中转圈圈 * fix: 解决仅在第一次上传时才有上传动画的问题 --------- Co-authored-by: 505030475 --- main.py | 2 + themes/common.js | 191 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 145 insertions(+), 48 deletions(-) diff --git a/main.py b/main.py index 578dd7ca..4858cd77 100644 --- a/main.py +++ b/main.py @@ -292,7 +292,9 @@ def main(): cancel_handles.append(click_handle) # 文件上传区,接收文件后与chatbot的互动 file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]) + file_upload.upload(None, None, None, _js=r"()=>{toast_push('上传完毕, 请等待文件清单展现后继续操作 ...'); cancel_loading_status();}") file_upload_2.upload(on_file_uploaded, [file_upload_2, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]) + file_upload_2.upload(None, None, None, _js=r"()=>{toast_push('上传完毕, 请等待文件清单展现后继续操作 ...'); cancel_loading_status();}") # 函数插件-固定按钮区 for k in plugins: if not plugins[k].get("AsButton", True): continue diff --git a/themes/common.js b/themes/common.js index afa87141..e569178e 100644 --- a/themes/common.js +++ b/themes/common.js @@ -3,7 +3,7 @@ function gradioApp() { const elems = document.getElementsByTagName('gradio-app'); const elem = elems.length == 0 ? document : elems[0]; if (elem !== document) { - elem.getElementById = function(id) { + elem.getElementById = function (id) { return document.getElementById(id); }; } @@ -12,31 +12,31 @@ function gradioApp() { function setCookie(name, value, days) { var expires = ""; - + if (days) { - var date = new Date(); - date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000)); - expires = "; expires=" + date.toUTCString(); + var date = new Date(); + date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000)); + expires = "; expires=" + date.toUTCString(); } - + document.cookie = name + "=" + value + expires + "; path=/"; } function getCookie(name) { var decodedCookie = decodeURIComponent(document.cookie); var cookies = decodedCookie.split(';'); - + for (var i = 0; i < cookies.length; i++) { - var cookie = cookies[i].trim(); - - if (cookie.indexOf(name + "=") === 0) { - return cookie.substring(name.length + 1, cookie.length); - } + var cookie = cookies[i].trim(); + + if (cookie.indexOf(name + "=") === 0) { + return cookie.substring(name.length + 1, cookie.length); + } } - + return null; - } - +} + function addCopyButton(botElement) { // https://github.com/GaiZhenbiao/ChuanhuChatGPT/tree/main/web_assets/javascript // Copy bot button @@ -49,7 +49,7 @@ function addCopyButton(botElement) { // messageBtnColumnElement.remove(); return; } - + var copyButton = document.createElement('button'); copyButton.classList.add('copy-bot-btn'); copyButton.setAttribute('aria-label', 'Copy'); @@ -98,40 +98,38 @@ function chatbotContentChanged(attempt = 1, force = false) { } } -function chatbotAutoHeight(){ +function chatbotAutoHeight() { // 自动调整高度 - function update_height(){ + function update_height() { var { panel_height_target, chatbot_height, chatbot } = get_elements(true); - if (panel_height_target!=chatbot_height) - { + if (panel_height_target != chatbot_height) { var pixelString = panel_height_target.toString() + 'px'; - chatbot.style.maxHeight = pixelString; chatbot.style.height = pixelString; + chatbot.style.maxHeight = pixelString; chatbot.style.height = pixelString; } } - function update_height_slow(){ + function update_height_slow() { var { panel_height_target, chatbot_height, chatbot } = get_elements(); - if (panel_height_target!=chatbot_height) - { - new_panel_height = (panel_height_target - chatbot_height)*0.5 + chatbot_height; - if (Math.abs(new_panel_height - panel_height_target) < 10){ + if (panel_height_target != chatbot_height) { + new_panel_height = (panel_height_target - chatbot_height) * 0.5 + chatbot_height; + if (Math.abs(new_panel_height - panel_height_target) < 10) { new_panel_height = panel_height_target; } // console.log(chatbot_height, panel_height_target, new_panel_height); var pixelString = new_panel_height.toString() + 'px'; - chatbot.style.maxHeight = pixelString; chatbot.style.height = pixelString; + chatbot.style.maxHeight = pixelString; chatbot.style.height = pixelString; } } monitoring_input_box() update_height(); - setInterval(function() { + setInterval(function () { update_height_slow() }, 50); // 每100毫秒执行一次 } -function get_elements(consider_state_panel=false) { +function get_elements(consider_state_panel = false) { var chatbot = document.querySelector('#gpt-chatbot > div.wrap.svelte-18telvq'); if (!chatbot) { chatbot = document.querySelector('#gpt-chatbot'); @@ -142,13 +140,13 @@ function get_elements(consider_state_panel=false) { // const panel4 = document.querySelector('#interact-panel').getBoundingClientRect(); const panel5 = document.querySelector('#input-panel2').getBoundingClientRect(); const panel_active = document.querySelector('#state-panel').getBoundingClientRect(); - if (consider_state_panel || panel_active.height < 25){ + if (consider_state_panel || panel_active.height < 25) { document.state_panel_height = panel_active.height; } // 25 是chatbot的label高度, 16 是右侧的gap - var panel_height_target = panel1.height + panel2.height + panel3.height + 0 + 0 - 25 + 16*2; + var panel_height_target = panel1.height + panel2.height + panel3.height + 0 + 0 - 25 + 16 * 2; // 禁止动态的state-panel高度影响 - panel_height_target = panel_height_target + (document.state_panel_height-panel_active.height) + panel_height_target = panel_height_target + (document.state_panel_height - panel_active.height) var panel_height_target = parseInt(panel_height_target); var chatbot_height = chatbot.style.height; var chatbot_height = parseInt(chatbot_height); @@ -173,7 +171,7 @@ function add_func_paste(input) { } if (paste_files.length > 0) { // 按照文件列表执行批量上传逻辑 - await paste_upload_files(paste_files); + await upload_files(paste_files); paste_files = [] } @@ -182,8 +180,42 @@ function add_func_paste(input) { } } +function add_func_drag(elem) { + if (elem) { + const dragEvents = ["dragover", "dragenter"]; + const leaveEvents = ["dragleave", "dragend", "drop"]; -async function paste_upload_files(files) { + const onDrag = function (e) { + e.preventDefault(); + e.stopPropagation(); + if (elem_upload_float.querySelector("input[type=file]")) { + toast_push('释放以上传文件', 50) + } else { + toast_push('⚠️请先删除上传区中的历史文件,再尝试上传。', 50) + } + }; + + const onLeave = function (e) { + e.preventDefault(); + e.stopPropagation(); + }; + + dragEvents.forEach(event => { + elem.addEventListener(event, onDrag); + }); + + leaveEvents.forEach(event => { + elem.addEventListener(event, onLeave); + }); + + elem.addEventListener("drop", async function (e) { + const files = e.dataTransfer.files; + await upload_files(files); + }); + } +} + +async function upload_files(files) { const uploadInputElement = elem_upload_float.querySelector("input[type=file]"); let totalSizeMb = 0 if (files && files.length > 0) { @@ -195,19 +227,20 @@ async function paste_upload_files(files) { } // 检查文件总大小是否超过20MB if (totalSizeMb > 20) { - toast_push('⚠️文件夹大于20MB 🚀上传文件中', 2000) + toast_push('⚠️文件夹大于 20MB 🚀上传文件中', 3000) // return; // 如果超过了指定大小, 可以不进行后续上传操作 } - // 监听change事件, 原生Gradio可以实现 + // 监听change事件, 原生Gradio可以实现 // uploadInputElement.addEventListener('change', function(){replace_input_string()}); let event = new Event("change"); - Object.defineProperty(event, "target", {value: uploadInputElement, enumerable: true}); - Object.defineProperty(event, "currentTarget", {value: uploadInputElement, enumerable: true}); - Object.defineProperty(uploadInputElement, "files", {value: files, enumerable: true}); + Object.defineProperty(event, "target", { value: uploadInputElement, enumerable: true }); + Object.defineProperty(event, "currentTarget", { value: uploadInputElement, enumerable: true }); + Object.defineProperty(uploadInputElement, "files", { value: files, enumerable: true }); uploadInputElement.dispatchEvent(event); + // toast_push('🎉上传文件成功', 2000) } else { - toast_push('⚠️请先删除上传区中的历史文件,再尝试粘贴。', 2000) + toast_push('⚠️请先删除上传区中的历史文件,再尝试上传。', 3000) } } } @@ -231,23 +264,85 @@ var elem_upload = null; var elem_upload_float = null; var elem_input_main = null; var elem_input_float = null; +var gptChatbot = null; +function begin_loading_status() { + // Create the loader div and add styling + var loader = document.createElement('div'); + loader.id = 'Js_File_Loading'; + loader.style.position = "absolute"; + loader.style.top = "50%"; + loader.style.left = "50%"; + loader.style.width = "60px"; + loader.style.height = "60px"; + loader.style.border = "16px solid #f3f3f3"; + loader.style.borderTop = "16px solid #3498db"; + loader.style.borderRadius = "50%"; + loader.style.animation = "spin 2s linear infinite"; + loader.style.transform = "translate(-50%, -50%)"; + document.body.appendChild(loader); // Add the loader to the body + // Set the CSS animation keyframes + var styleSheet = document.createElement('style'); + // styleSheet.type = 'text/css'; + styleSheet.id = 'Js_File_Loading_Style' + styleSheet.innerText = ` + @keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } + }`; + document.head.appendChild(styleSheet); +} +function cancel_loading_status() { + var loadingElement = document.getElementById('Js_File_Loading'); + if (loadingElement) { + document.body.removeChild(loadingElement); // remove the loader from the body + } + var loadingStyle = document.getElementById('Js_File_Loading_Style'); + if (loadingStyle) { + document.head.removeChild(loadingStyle); + } + let clearButton = document.querySelectorAll('div[id*="elem_upload"] button[aria-label="Clear"]'); + for (let button of clearButton) { + button.addEventListener('click', function () { + setTimeout(function () { + register_upload_event(); + }, 50); + }); + } +} +function register_upload_event() { + elem_upload_float = document.getElementById('elem_upload_float') + const upload_component = elem_upload_float.querySelector("input[type=file]"); + if (upload_component) { + upload_component.addEventListener('change', function (event) { + toast_push('正在上传中,请稍等。', 2000); + begin_loading_status(); + }); + } +} function monitoring_input_box() { + register_upload_event(); + elem_upload = document.getElementById('elem_upload') elem_upload_float = document.getElementById('elem_upload_float') elem_input_main = document.getElementById('user_input_main') elem_input_float = document.getElementById('user_input_float') + if (elem_input_main) { if (elem_input_main.querySelector("textarea")) { add_func_paste(elem_input_main.querySelector("textarea")) } } if (elem_input_float) { - if (elem_input_float.querySelector("textarea")){ + if (elem_input_float.querySelector("textarea")) { add_func_paste(elem_input_float.querySelector("textarea")) } } + gptChatbot = document.getElementById('gpt-chatbot') + if (gptChatbot) { + add_func_drag(gptChatbot) + } } @@ -259,13 +354,13 @@ window.addEventListener("DOMContentLoaded", function () { function audio_fn_init() { let audio_component = document.getElementById('elem_audio'); - if (audio_component){ + if (audio_component) { let buttonElement = audio_component.querySelector('button'); let specificElement = audio_component.querySelector('.hide.sr-only'); specificElement.remove(); buttonElement.childNodes[1].nodeValue = '启动麦克风'; - buttonElement.addEventListener('click', function(event) { + buttonElement.addEventListener('click', function (event) { event.stopPropagation(); toast_push('您启动了麦克风!下一步请点击“实时语音对话”启动语音对话。'); }); @@ -273,14 +368,14 @@ function audio_fn_init() { // 查找语音插件按钮 let buttons = document.querySelectorAll('button'); let audio_button = null; - for(let button of buttons){ - if (button.textContent.includes('语音')){ + for (let button of buttons) { + if (button.textContent.includes('语音')) { audio_button = button; break; } } - if (audio_button){ - audio_button.addEventListener('click', function() { + if (audio_button) { + audio_button.addEventListener('click', function () { toast_push('您点击了“实时语音对话”启动语音对话。'); }); let parent_element = audio_component.parentElement; // 将buttonElement移动到audio_button的内部 @@ -300,5 +395,5 @@ function GptAcademicJavaScriptInit(LAYOUT = "LEFT-RIGHT") { chatbotContentChanged(1); }); chatbotObserver.observe(chatbotIndicator, { attributes: true, childList: true, subtree: true }); - if (LAYOUT === "LEFT-RIGHT") {chatbotAutoHeight();} + if (LAYOUT === "LEFT-RIGHT") { chatbotAutoHeight(); } } \ No newline at end of file From 43568b83e169ba57fc5c97b8cfe3790e2de4e958 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 21 Dec 2023 14:39:58 +0800 Subject: [PATCH 75/88] improve file upload notification --- main.py | 6 +-- themes/common.js | 126 ++++++++++++++++++++++++++++++++++------------- 2 files changed, 95 insertions(+), 37 deletions(-) diff --git a/main.py b/main.py index 4858cd77..daf00447 100644 --- a/main.py +++ b/main.py @@ -291,10 +291,8 @@ def main(): click_handle = btn.click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(btn.value)], outputs=output_combo) cancel_handles.append(click_handle) # 文件上传区,接收文件后与chatbot的互动 - file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]) - file_upload.upload(None, None, None, _js=r"()=>{toast_push('上传完毕, 请等待文件清单展现后继续操作 ...'); cancel_loading_status();}") - file_upload_2.upload(on_file_uploaded, [file_upload_2, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]) - file_upload_2.upload(None, None, None, _js=r"()=>{toast_push('上传完毕, 请等待文件清单展现后继续操作 ...'); cancel_loading_status();}") + file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}") + file_upload_2.upload(on_file_uploaded, [file_upload_2, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2, cookies]).then(None, None, None, _js=r"()=>{toast_push('上传完毕 ...'); cancel_loading_status();}") # 函数插件-固定按钮区 for k in plugins: if not plugins[k].get("AsButton", True): continue diff --git a/themes/common.js b/themes/common.js index e569178e..bbe0eb49 100644 --- a/themes/common.js +++ b/themes/common.js @@ -1,3 +1,7 @@ +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +// 第 1 部分: 工具函数 +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + function gradioApp() { // https://github.com/GaiZhenbiao/ChuanhuChatGPT/tree/main/web_assets/javascript const elems = document.getElementsByTagName('gradio-app'); @@ -37,6 +41,51 @@ function getCookie(name) { return null; } +let toastCount = 0; +function toast_push(msg, duration) { + duration = isNaN(duration) ? 3000 : duration; + const existingToasts = document.querySelectorAll('.toast'); + existingToasts.forEach(toast => { + toast.style.top = `${parseInt(toast.style.top, 10) - 70}px`; + }); + const m = document.createElement('div'); + m.innerHTML = msg; + m.classList.add('toast'); + m.style.cssText = `font-size: var(--text-md) !important; color: rgb(255, 255, 255); background-color: rgba(0, 0, 0, 0.6); padding: 10px 15px; border-radius: 4px; position: fixed; top: ${50 + toastCount * 70}%; left: 50%; transform: translateX(-50%); width: auto; text-align: center; transition: top 0.3s;`; + document.body.appendChild(m); + setTimeout(function () { + m.style.opacity = '0'; + setTimeout(function () { + document.body.removeChild(m); + toastCount--; + }, 500); + }, duration); + toastCount++; +} + +function toast_up(msg) { + var m = document.getElementById('toast_up'); + if (m) { + document.body.removeChild(m); // remove the loader from the body + } + m = document.createElement('div'); + m.id = 'toast_up'; + m.innerHTML = msg; + m.style.cssText = "font-size: var(--text-md) !important; color: rgb(255, 255, 255); background-color: rgba(0, 0, 100, 0.6); padding: 10px 15px; margin: 0 0 0 -60px; border-radius: 4px; position: fixed; top: 50%; left: 50%; width: auto; text-align: center;"; + document.body.appendChild(m); +} +function toast_down() { + var m = document.getElementById('toast_up'); + if (m) { + document.body.removeChild(m); // remove the loader from the body + } +} + + +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +// 第 2 部分: 复制按钮 +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + function addCopyButton(botElement) { // https://github.com/GaiZhenbiao/ChuanhuChatGPT/tree/main/web_assets/javascript // Copy bot button @@ -98,6 +147,12 @@ function chatbotContentChanged(attempt = 1, force = false) { } } + + +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +// 第 3 部分: chatbot动态高度调整 +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + function chatbotAutoHeight() { // 自动调整高度 function update_height() { @@ -127,8 +182,6 @@ function chatbotAutoHeight() { }, 50); // 每100毫秒执行一次 } - - function get_elements(consider_state_panel = false) { var chatbot = document.querySelector('#gpt-chatbot > div.wrap.svelte-18telvq'); if (!chatbot) { @@ -154,6 +207,18 @@ function get_elements(consider_state_panel = false) { } + +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +// 第 4 部分: 粘贴、拖拽文件上传 +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + +var elem_upload = null; +var elem_upload_float = null; +var elem_input_main = null; +var elem_input_float = null; +var elem_chatbot = null; +var exist_file_msg = '⚠️请先删除上传区(左上方)中的历史文件,再尝试上传。' + function add_func_paste(input) { let paste_files = []; if (input) { @@ -182,20 +247,21 @@ function add_func_paste(input) { function add_func_drag(elem) { if (elem) { - const dragEvents = ["dragover", "dragenter"]; + const dragEvents = ["dragover"]; const leaveEvents = ["dragleave", "dragend", "drop"]; const onDrag = function (e) { e.preventDefault(); e.stopPropagation(); if (elem_upload_float.querySelector("input[type=file]")) { - toast_push('释放以上传文件', 50) + toast_up('⚠️释放以上传文件') } else { - toast_push('⚠️请先删除上传区中的历史文件,再尝试上传。', 50) + toast_up(exist_file_msg) } }; const onLeave = function (e) { + toast_down(); e.preventDefault(); e.stopPropagation(); }; @@ -237,35 +303,11 @@ async function upload_files(files) { Object.defineProperty(event, "currentTarget", { value: uploadInputElement, enumerable: true }); Object.defineProperty(uploadInputElement, "files", { value: files, enumerable: true }); uploadInputElement.dispatchEvent(event); - - // toast_push('🎉上传文件成功', 2000) } else { - toast_push('⚠️请先删除上传区中的历史文件,再尝试上传。', 3000) + toast_push(exist_file_msg, 3000) } } } -//提示信息 封装 -function toast_push(msg, duration) { - duration = isNaN(duration) ? 3000 : duration; - const m = document.createElement('div'); - m.innerHTML = msg; - m.style.cssText = "font-size: var(--text-md) !important; color: rgb(255, 255, 255);background-color: rgba(0, 0, 0, 0.6);padding: 10px 15px;margin: 0 0 0 -60px;border-radius: 4px;position: fixed; top: 50%;left: 50%;width: auto; text-align: center;"; - document.body.appendChild(m); - setTimeout(function () { - var d = 0.5; - m.style.opacity = '0'; - setTimeout(function () { - document.body.removeChild(m) - }, d * 1000); - }, duration); -} - -var elem_upload = null; -var elem_upload_float = null; -var elem_input_main = null; -var elem_input_float = null; -var gptChatbot = null; - function begin_loading_status() { // Create the loader div and add styling @@ -293,6 +335,7 @@ function begin_loading_status() { }`; document.head.appendChild(styleSheet); } + function cancel_loading_status() { var loadingElement = document.getElementById('Js_File_Loading'); if (loadingElement) { @@ -311,6 +354,7 @@ function cancel_loading_status() { }); } } + function register_upload_event() { elem_upload_float = document.getElementById('elem_upload_float') const upload_component = elem_upload_float.querySelector("input[type=file]"); @@ -321,6 +365,7 @@ function register_upload_event() { }); } } + function monitoring_input_box() { register_upload_event(); @@ -328,6 +373,7 @@ function monitoring_input_box() { elem_upload_float = document.getElementById('elem_upload_float') elem_input_main = document.getElementById('user_input_main') elem_input_float = document.getElementById('user_input_float') + elem_chatbot = document.getElementById('gpt-chatbot') if (elem_input_main) { if (elem_input_main.querySelector("textarea")) { @@ -339,9 +385,8 @@ function monitoring_input_box() { add_func_paste(elem_input_float.querySelector("textarea")) } } - gptChatbot = document.getElementById('gpt-chatbot') - if (gptChatbot) { - add_func_drag(gptChatbot) + if (elem_chatbot) { + add_func_drag(elem_chatbot) } } @@ -352,6 +397,14 @@ window.addEventListener("DOMContentLoaded", function () { gradioApp().addEventListener("render", monitoring_input_box); }); + + + + +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +// 第 5 部分: 音频按钮样式变化 +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + function audio_fn_init() { let audio_component = document.getElementById('elem_audio'); if (audio_component) { @@ -388,6 +441,13 @@ function audio_fn_init() { } } + + + +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +// 第 6 部分: JS初始化函数 +// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + function GptAcademicJavaScriptInit(LAYOUT = "LEFT-RIGHT") { audio_fn_init(); chatbotIndicator = gradioApp().querySelector('#gpt-chatbot > div.wrap'); From bb431db7d3402d0903f7f0971acfaa445190fe69 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 21 Dec 2023 14:44:35 +0800 Subject: [PATCH 76/88] upgrade to version 3.64 --- version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version b/version index 680e6e2f..62e6aa5c 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.63, + "version": 3.64, "show_feature": true, - "new_feature": "支持将图片粘贴到输入区 <-> 修复若干隐蔽的内存BUG <-> 修复多用户冲突问题 <-> 接入Deepseek Coder <-> AutoGen多智能体插件测试版" + "new_feature": "支持直接拖拽文件到上传区 <-> 支持将图片粘贴到输入区 <-> 修复若干隐蔽的内存BUG <-> 修复多用户冲突问题 <-> 接入Deepseek Coder <-> AutoGen多智能体插件测试版" } From 867ddd355edddc7046cf744217663b5c71d93c52 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 22 Dec 2023 21:59:18 +0800 Subject: [PATCH 77/88] adjust green theme layout --- themes/green.css | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/themes/green.css b/themes/green.css index dd109d53..870af482 100644 --- a/themes/green.css +++ b/themes/green.css @@ -256,13 +256,13 @@ textarea.svelte-1pie7s6 { max-height: 95% !important; overflow-y: auto !important; }*/ -.app.svelte-1mya07g.svelte-1mya07g { +/* .app.svelte-1mya07g.svelte-1mya07g { max-width: 100%; position: relative; padding: var(--size-4); width: 100%; height: 100%; -} +} */ .gradio-container-3-32-2 h1 { font-weight: 700 !important; From e3e9921f6bd1d6ccf3445754b36a435662e3c6dc Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sat, 23 Dec 2023 17:46:25 +0800 Subject: [PATCH 78/88] correct the misuse of spark image understanding --- crazy_functions/crazy_utils.py | 2 ++ request_llms/bridge_spark.py | 4 ++-- request_llms/com_sparkapi.py | 12 +++++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 731da1ac..4d3b1953 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -139,6 +139,8 @@ def can_multi_process(llm): if llm.startswith('gpt-'): return True if llm.startswith('api2d-'): return True if llm.startswith('azure-'): return True + if llm.startswith('spark'): return True + if llm.startswith('zhipuai'): return True return False def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( diff --git a/request_llms/bridge_spark.py b/request_llms/bridge_spark.py index 6ba39ee7..1fe31ce9 100644 --- a/request_llms/bridge_spark.py +++ b/request_llms/bridge_spark.py @@ -26,7 +26,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", from .com_sparkapi import SparkRequestInstance sri = SparkRequestInstance() - for response in sri.generate(inputs, llm_kwargs, history, sys_prompt): + for response in sri.generate(inputs, llm_kwargs, history, sys_prompt, use_image_api=False): if len(observe_window) >= 1: observe_window[0] = response if len(observe_window) >= 2: @@ -52,7 +52,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp # 开始接收回复 from .com_sparkapi import SparkRequestInstance sri = SparkRequestInstance() - for response in sri.generate(inputs, llm_kwargs, history, system_prompt): + for response in sri.generate(inputs, llm_kwargs, history, system_prompt, use_image_api=True): chatbot[-1] = (inputs, response) yield from update_ui(chatbot=chatbot, history=history) diff --git a/request_llms/com_sparkapi.py b/request_llms/com_sparkapi.py index 1221de11..3f667c18 100644 --- a/request_llms/com_sparkapi.py +++ b/request_llms/com_sparkapi.py @@ -72,12 +72,12 @@ class SparkRequestInstance(): self.result_buf = "" - def generate(self, inputs, llm_kwargs, history, system_prompt): + def generate(self, inputs, llm_kwargs, history, system_prompt, use_image_api=False): llm_kwargs = llm_kwargs history = history system_prompt = system_prompt import _thread as thread - thread.start_new_thread(self.create_blocking_request, (inputs, llm_kwargs, history, system_prompt)) + thread.start_new_thread(self.create_blocking_request, (inputs, llm_kwargs, history, system_prompt, use_image_api)) while True: self.time_to_yield_event.wait(timeout=1) if self.time_to_yield_event.is_set(): @@ -86,7 +86,7 @@ class SparkRequestInstance(): return self.result_buf - def create_blocking_request(self, inputs, llm_kwargs, history, system_prompt): + def create_blocking_request(self, inputs, llm_kwargs, history, system_prompt, use_image_api): if llm_kwargs['llm_model'] == 'sparkv2': gpt_url = self.gpt_url_v2 elif llm_kwargs['llm_model'] == 'sparkv3': @@ -94,10 +94,12 @@ class SparkRequestInstance(): else: gpt_url = self.gpt_url file_manifest = [] - if llm_kwargs.get('most_recent_uploaded'): + if use_image_api and llm_kwargs.get('most_recent_uploaded'): if llm_kwargs['most_recent_uploaded'].get('path'): file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path']) - gpt_url = self.gpt_url_img + if len(file_manifest) > 0: + print('正在使用讯飞图片理解API') + gpt_url = self.gpt_url_img wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url) websocket.enableTrace(False) wsUrl = wsParam.create_url() From 7606f5c1302044228bc3937ab72d73975439be75 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sat, 23 Dec 2023 20:55:58 +0800 Subject: [PATCH 79/88] name fix --- request_llms/bridge_qwen_local.py | 2 +- toolbox.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/request_llms/bridge_qwen_local.py b/request_llms/bridge_qwen_local.py index 4a0fa69a..e6c2dd5c 100644 --- a/request_llms/bridge_qwen_local.py +++ b/request_llms/bridge_qwen_local.py @@ -1,4 +1,4 @@ -model_name = "Qwen_local" +model_name = "Qwen_Local" cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`" from toolbox import ProxyNetworkActivate, get_conf diff --git a/toolbox.py b/toolbox.py index e44d61e5..154b54ca 100644 --- a/toolbox.py +++ b/toolbox.py @@ -1129,7 +1129,7 @@ def get_user(chatbotwithcookies): class ProxyNetworkActivate(): """ - 这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理 + 这段代码定义了一个名为ProxyNetworkActivate的空上下文管理器, 用于给一小段代码上代理 """ def __init__(self, task=None) -> None: self.task = task From 0f250305b4c98c80a2b347c6deb18f723defbb13 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sat, 23 Dec 2023 20:59:32 +0800 Subject: [PATCH 80/88] add urllib3 version limit --- request_llms/requirements_qwen.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/request_llms/requirements_qwen.txt b/request_llms/requirements_qwen.txt index ea65dee7..de6bf3c9 100644 --- a/request_llms/requirements_qwen.txt +++ b/request_llms/requirements_qwen.txt @@ -1,4 +1,5 @@ modelscope transformers_stream_generator auto-gptq -optimum \ No newline at end of file +optimum +urllib3<2 \ No newline at end of file From 2c7bba5c63c1121e2b6723f6b3d7c5a78bf952b1 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sat, 23 Dec 2023 21:35:42 +0800 Subject: [PATCH 81/88] change dash scope api key check behavior --- config.py | 2 +- request_llms/bridge_qwen.py | 16 ++++++---------- request_llms/com_qwenapi.py | 13 +++++++++++-- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/config.py b/config.py index 17dac34a..861bbed9 100644 --- a/config.py +++ b/config.py @@ -108,7 +108,7 @@ QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8" # 接入通义千问在线大模型 https://dashscope.console.aliyun.com/ -DASHSCOPE_API_KEY = "此处填阿里灵积云API秘钥" # 阿里灵积云API_KEY +DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY # 百度千帆(LLM_MODEL="qianfan") diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py index 583def8b..18877b94 100644 --- a/request_llms/bridge_qwen.py +++ b/request_llms/bridge_qwen.py @@ -5,16 +5,6 @@ from toolbox import check_packages, report_exception model_name = 'Qwen' -def validate_key(): - DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY") - if DASHSCOPE_API_KEY == '': return False - return True - -if not validate_key(): - raise RuntimeError('请配置DASHSCOPE_API_KEY') -os.environ['DASHSCOPE_API_KEY'] = get_conf("DASHSCOPE_API_KEY") - - def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False): """ ⭐多线程方法 @@ -48,6 +38,12 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp chatbot=chatbot, history=history, delay=0) return + # 检查DASHSCOPE_API_KEY + if get_conf("DASHSCOPE_API_KEY") == "": + yield from update_ui_lastest_msg(f"请配置 DASHSCOPE_API_KEY。", + chatbot=chatbot, history=history, delay=0) + return + if additional_fn is not None: from core_functional import handle_core_functionality inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py index 63ebdea2..5807600d 100644 --- a/request_llms/com_qwenapi.py +++ b/request_llms/com_qwenapi.py @@ -7,12 +7,21 @@ timeout_bot_msg = '[Local Message] Request timeout. Network error.' class QwenRequestInstance(): def __init__(self): - + import dashscope self.time_to_yield_event = threading.Event() self.time_to_exit_event = threading.Event() - self.result_buf = "" + def validate_key(): + DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY") + if DASHSCOPE_API_KEY == '': return False + return True + + if not validate_key(): + raise RuntimeError('请配置 DASHSCOPE_API_KEY') + dashscope.api_key = get_conf("DASHSCOPE_API_KEY") + + def generate(self, inputs, llm_kwargs, history, system_prompt): # import _thread as thread from dashscope import Generation From ca85573ec1ca0441566680463f6f6b2efb67784d Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 24 Dec 2023 18:14:57 +0800 Subject: [PATCH 82/88] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 129e5567..63266cef 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ > > 2023.11.12: 某些依赖包尚不兼容python 3.12,推荐python 3.11。 > -> 2023.11.7: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目开源免费,近期发现有人蔑视开源协议并利用本项目违规圈钱,请提高警惕,谨防上当受骗。 +> 2023.11.7: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目开源免费,请提高警惕,谨防上当受骗。
@@ -111,7 +111,7 @@ Latex论文一键校对 | [插件] 仿Grammarly对Latex文章进行语法、拼
-- 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + [API2D](https://api2d.com/)-GPT4) +- 多种大语言模型混合调用(ChatGLM + OpenAI-GPT3.5 + GPT4)
@@ -370,8 +370,8 @@ GPT Academic开发者QQ群:`610599535` 1. `master` 分支: 主分支,稳定版 2. `frontier` 分支: 开发分支,测试版 -3. 如何接入其他大模型:[接入其他大模型](request_llms/README.md) - +3. 如何[接入其他大模型](request_llms/README.md) +4. 访问GPT-Academic的[在线服务并支持我们](https://github.com/binary-husky/gpt_academic/wiki/online) ### V:参考与学习 From 5c7499cada3da066ed389c5eace6995e4cc00906 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Mon, 25 Dec 2023 17:17:15 +0800 Subject: [PATCH 83/88] compat with some third party api --- request_llms/bridge_chatgpt.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py index 0bdebf0d..faad6cfd 100644 --- a/request_llms/bridge_chatgpt.py +++ b/request_llms/bridge_chatgpt.py @@ -102,20 +102,25 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", result = '' json_data = None while True: - try: chunk = next(stream_response).decode() + try: chunk = next(stream_response) except StopIteration: break except requests.exceptions.ConnectionError: - chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。 - if len(chunk)==0: continue - if not chunk.startswith('data:'): - error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode() + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk) + if len(chunk_decoded)==0: continue + if not chunk_decoded.startswith('data:'): + error_msg = get_full_error(chunk, stream_response).decode() if "reduce the length" in error_msg: raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg) else: raise RuntimeError("OpenAI拒绝了请求:" + error_msg) - if ('data: [DONE]' in chunk): break # api2d 正常完成 - json_data = json.loads(chunk.lstrip('data:'))['choices'][0] + if ('data: [DONE]' in chunk_decoded): break # api2d 正常完成 + # 提前读取一些信息 (用于判断异常) + if has_choices and not choice_valid: + # 一些垃圾第三方接口的出现这样的错误 + continue + json_data = chunkjson['choices'][0] delta = json_data["delta"] if len(delta) == 0: break if "role" in delta: continue From c625348ce1c943d2dc211ec8a94fdb9835e96449 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 25 Dec 2023 21:26:24 +0800 Subject: [PATCH 84/88] smarter chatbot height adjustment --- themes/common.js | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/themes/common.js b/themes/common.js index bbe0eb49..fdcac3f7 100644 --- a/themes/common.js +++ b/themes/common.js @@ -156,21 +156,21 @@ function chatbotContentChanged(attempt = 1, force = false) { function chatbotAutoHeight() { // 自动调整高度 function update_height() { - var { panel_height_target, chatbot_height, chatbot } = get_elements(true); - if (panel_height_target != chatbot_height) { - var pixelString = panel_height_target.toString() + 'px'; + var { height_target, chatbot_height, chatbot } = get_elements(true); + if (height_target != chatbot_height) { + var pixelString = height_target.toString() + 'px'; chatbot.style.maxHeight = pixelString; chatbot.style.height = pixelString; } } function update_height_slow() { - var { panel_height_target, chatbot_height, chatbot } = get_elements(); - if (panel_height_target != chatbot_height) { - new_panel_height = (panel_height_target - chatbot_height) * 0.5 + chatbot_height; - if (Math.abs(new_panel_height - panel_height_target) < 10) { - new_panel_height = panel_height_target; + var { height_target, chatbot_height, chatbot } = get_elements(); + if (height_target != chatbot_height) { + new_panel_height = (height_target - chatbot_height) * 0.5 + chatbot_height; + if (Math.abs(new_panel_height - height_target) < 10) { + new_panel_height = height_target; } - // console.log(chatbot_height, panel_height_target, new_panel_height); + // console.log(chatbot_height, height_target, new_panel_height); var pixelString = new_panel_height.toString() + 'px'; chatbot.style.maxHeight = pixelString; chatbot.style.height = pixelString; } @@ -179,7 +179,7 @@ function chatbotAutoHeight() { update_height(); setInterval(function () { update_height_slow() - }, 50); // 每100毫秒执行一次 + }, 50); // 每50毫秒执行一次 } function get_elements(consider_state_panel = false) { @@ -187,23 +187,36 @@ function get_elements(consider_state_panel = false) { if (!chatbot) { chatbot = document.querySelector('#gpt-chatbot'); } + const input_panel = document.querySelector('#input-panel'); const panel1 = document.querySelector('#input-panel').getBoundingClientRect(); const panel2 = document.querySelector('#basic-panel').getBoundingClientRect() const panel3 = document.querySelector('#plugin-panel').getBoundingClientRect(); // const panel4 = document.querySelector('#interact-panel').getBoundingClientRect(); - const panel5 = document.querySelector('#input-panel2').getBoundingClientRect(); const panel_active = document.querySelector('#state-panel').getBoundingClientRect(); if (consider_state_panel || panel_active.height < 25) { document.state_panel_height = panel_active.height; } // 25 是chatbot的label高度, 16 是右侧的gap - var panel_height_target = panel1.height + panel2.height + panel3.height + 0 + 0 - 25 + 16 * 2; + var height_target = panel1.height + panel2.height + panel3.height + 0 + 0 - 25 + 16 * 2; // 禁止动态的state-panel高度影响 - panel_height_target = panel_height_target + (document.state_panel_height - panel_active.height) - var panel_height_target = parseInt(panel_height_target); + height_target = height_target + (document.state_panel_height - panel_active.height) + var height_target = parseInt(height_target); var chatbot_height = chatbot.style.height; + + const err_tor = 5; + if (Math.abs(panel1.left - chatbot.getBoundingClientRect().left) < err_tor){ + // 是否处于窄屏模式 + height_target = window.innerHeight * 0.6; + }else{ + // 调整高度 + const chatbot_height_exceed = 15; + const chatbot_height_exceed_m = 10; + if (panel3.bottom >= window.innerHeight - chatbot_height_exceed) { + height_target = window.innerHeight - chatbot.getBoundingClientRect().top - chatbot_height_exceed_m; + } + } var chatbot_height = parseInt(chatbot_height); - return { panel_height_target, chatbot_height, chatbot }; + return { height_target, chatbot_height, chatbot }; } From 160a6836677fad1db2d2974adcdefb7cf1ce7a88 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 25 Dec 2023 22:05:14 +0800 Subject: [PATCH 85/88] smart input panel swap --- themes/common.js | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/themes/common.js b/themes/common.js index 0c0d3e41..39678598 100644 --- a/themes/common.js +++ b/themes/common.js @@ -182,12 +182,30 @@ function chatbotAutoHeight() { }, 50); // 每50毫秒执行一次 } +swapped = false; +function swap_input_area() { + // Get the elements to be swapped + var element1 = document.querySelector("#input-panel"); + var element2 = document.querySelector("#basic-panel"); + + // Get the parent of the elements + var parent = element1.parentNode; + + // Get the next sibling of element2 + var nextSibling = element2.nextSibling; + + // Swap the elements + parent.insertBefore(element2, element1); + parent.insertBefore(element1, nextSibling); + if (swapped) {swapped = false;} + else {swapped = true;} +} + function get_elements(consider_state_panel = false) { var chatbot = document.querySelector('#gpt-chatbot > div.wrap.svelte-18telvq'); if (!chatbot) { chatbot = document.querySelector('#gpt-chatbot'); } - const input_panel = document.querySelector('#input-panel'); const panel1 = document.querySelector('#input-panel').getBoundingClientRect(); const panel2 = document.querySelector('#basic-panel').getBoundingClientRect() const panel3 = document.querySelector('#plugin-panel').getBoundingClientRect(); @@ -202,7 +220,15 @@ function get_elements(consider_state_panel = false) { height_target = height_target + (document.state_panel_height - panel_active.height) var height_target = parseInt(height_target); var chatbot_height = chatbot.style.height; - + console.log(panel1.top, panel2.top, panel3.top, panel_active.top, height_target, chatbot_height); + // 交换输入区位置,使得输入区始终可用 + if (!swapped){ + if (panel1.top!=0 && panel1.top < 0){ swap_input_area(); } + } + else if (swapped){ + if (panel2.top!=0 && panel2.top > 0){ swap_input_area(); } + } + // 调整高度 const err_tor = 5; if (Math.abs(panel1.left - chatbot.getBoundingClientRect().left) < err_tor){ // 是否处于窄屏模式 From 1bd3637d32f8439a4fb76394fb95e8f61932b624 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 25 Dec 2023 22:24:12 +0800 Subject: [PATCH 86/88] modify image gen plugin user interaction --- crazy_functional.py | 8 ++++---- crazy_functions/图片生成.py | 12 ++++++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index 4cc63040..c323dc32 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -345,7 +345,7 @@ def get_crazy_functions(): "Color": "stop", "AsButton": False, "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) - "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示 + "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&gpt-4", # 高级参数输入区的显示提示 "Function": HotReload(同时问询_指定模型) }, }) @@ -356,7 +356,7 @@ def get_crazy_functions(): try: from crazy_functions.图片生成 import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2 function_plugins.update({ - "图片生成_DALLE2 (先切换模型到openai或api2d)": { + "图片生成_DALLE2 (先切换模型到gpt-*)": { "Group": "对话", "Color": "stop", "AsButton": False, @@ -367,7 +367,7 @@ def get_crazy_functions(): }, }) function_plugins.update({ - "图片生成_DALLE3 (先切换模型到openai或api2d)": { + "图片生成_DALLE3 (先切换模型到gpt-*)": { "Group": "对话", "Color": "stop", "AsButton": False, @@ -378,7 +378,7 @@ def get_crazy_functions(): }, }) function_plugins.update({ - "图片修改_DALLE2 (先切换模型到openai或api2d)": { + "图片修改_DALLE2 (先切换模型到gpt-*)": { "Group": "对话", "Color": "stop", "AsButton": False, diff --git a/crazy_functions/图片生成.py b/crazy_functions/图片生成.py index f32d1367..9751e164 100644 --- a/crazy_functions/图片生成.py +++ b/crazy_functions/图片生成.py @@ -104,7 +104,11 @@ def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys web_port 当前软件运行的端口号 """ history = [] # 清空历史,以免输入溢出 - chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*或者api2d-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 .....")) + if prompt.strip() == "": + chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新 + return + chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 .....")) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新 if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") resolution = plugin_kwargs.get("advanced_arg", '1024x1024') @@ -121,7 +125,11 @@ def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys @CatchException def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): history = [] # 清空历史,以免输入溢出 - chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*或者api2d-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 .....")) + if prompt.strip() == "": + chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新 + return + chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 .....")) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新 if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") resolution_arg = plugin_kwargs.get("advanced_arg", '1024x1024-standard-vivid').lower() From 31304f481a1c4d3e07d92348cb698bfcb57bcc16 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 25 Dec 2023 22:57:09 +0800 Subject: [PATCH 87/88] remove console log --- themes/common.js | 1 - 1 file changed, 1 deletion(-) diff --git a/themes/common.js b/themes/common.js index 39678598..8743b3ca 100644 --- a/themes/common.js +++ b/themes/common.js @@ -220,7 +220,6 @@ function get_elements(consider_state_panel = false) { height_target = height_target + (document.state_panel_height - panel_active.height) var height_target = parseInt(height_target); var chatbot_height = chatbot.style.height; - console.log(panel1.top, panel2.top, panel3.top, panel_active.top, height_target, chatbot_height); // 交换输入区位置,使得输入区始终可用 if (!swapped){ if (panel1.top!=0 && panel1.top < 0){ swap_input_area(); } From 865c4ca99390b81d03ed350071e7cc260b4e8c20 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Tue, 26 Dec 2023 22:51:56 +0800 Subject: [PATCH 88/88] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 63266cef..dd91413c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ > > 2023.11.12: 某些依赖包尚不兼容python 3.12,推荐python 3.11。 > -> 2023.11.7: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目开源免费,请提高警惕,谨防上当受骗。 +> 2023.12.26: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目完全开源免费,您可通过订阅[在线服务](https://github.com/binary-husky/gpt_academic/wiki/online)的方式鼓励本项目的发展。