From b0409b929b2814decd78e7d74650956d848ff546 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 19 Apr 2023 14:27:34 +0800 Subject: [PATCH] =?UTF-8?q?tiktoken=E5=81=9Alazyload=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- check_proxy.py | 9 ++++- crazy_functions/Latex全文润色.py | 5 ++- crazy_functions/Latex全文翻译.py | 5 ++- crazy_functions/crazy_utils.py | 4 +-- crazy_functions/代码重写为全英文_多线程.py | 5 ++- crazy_functions/批量Markdown翻译.py | 5 ++- crazy_functions/批量翻译PDF文档_多线程.py | 4 +-- crazy_functions/理解PDF文档内容.py | 4 +-- main.py | 3 +- request_llm/bridge_all.py | 38 ++++++++++++++++------ request_llm/bridge_chatglm.py | 35 +++++++++++++++++--- toolbox.py | 1 - 12 files changed, 83 insertions(+), 35 deletions(-) diff --git a/check_proxy.py b/check_proxy.py index 7fdd2b0c..28711a8c 100644 --- a/check_proxy.py +++ b/check_proxy.py @@ -103,7 +103,7 @@ def auto_update(): import json proxies, = get_conf('proxies') response = requests.get( - "https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=1) + "https://raw.githubusercontent.com/binary-husky/chatgpt_academic/master/version", proxies=proxies, timeout=5) remote_json_data = json.loads(response.text) remote_version = remote_json_data['version'] if remote_json_data["show_feature"]: @@ -133,6 +133,13 @@ def auto_update(): except: print('自动更新程序:已禁用') +def warm_up_modules(): + print('正在执行一些模块的预热...') + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] + enc.encode("模块预热", disallowed_special=()) + enc = model_info["gpt-4"]['tokenizer'] + enc.encode("模块预热", disallowed_special=()) if __name__ == '__main__': import os diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py index da03686f..c299e59d 100644 --- a/crazy_functions/Latex全文润色.py +++ b/crazy_functions/Latex全文润色.py @@ -11,9 +11,8 @@ class PaperFileGroup(): self.sp_file_tag = [] # count_token - import tiktoken - from toolbox import get_conf - enc = tiktoken.encoding_for_model("gpt-3.5-turbo") + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) self.get_token_num = get_token_num diff --git a/crazy_functions/Latex全文翻译.py b/crazy_functions/Latex全文翻译.py index de25e620..efada619 100644 --- a/crazy_functions/Latex全文翻译.py +++ b/crazy_functions/Latex全文翻译.py @@ -11,9 +11,8 @@ class PaperFileGroup(): self.sp_file_tag = [] # count_token - import tiktoken - from toolbox import get_conf - enc = tiktoken.encoding_for_model("gpt-3.5-turbo") + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) self.get_token_num = get_token_num diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 60850496..4e0eba49 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -2,9 +2,9 @@ import traceback from toolbox import update_ui, get_conf def input_clipping(inputs, history, max_token_limit): - import tiktoken import numpy as np - enc = tiktoken.encoding_for_model("gpt-3.5-turbo") + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) mode = 'input-and-history' diff --git a/crazy_functions/代码重写为全英文_多线程.py b/crazy_functions/代码重写为全英文_多线程.py index f8ecec9d..e57f80f1 100644 --- a/crazy_functions/代码重写为全英文_多线程.py +++ b/crazy_functions/代码重写为全英文_多线程.py @@ -59,9 +59,8 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_ # 第5步:Token限制下的截断与处理 MAX_TOKEN = 3000 - import tiktoken - from toolbox import get_conf - enc = tiktoken.encoding_for_model("gpt-3.5-turbo") + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=())) diff --git a/crazy_functions/批量Markdown翻译.py b/crazy_functions/批量Markdown翻译.py index 823ca3fe..68d1b501 100644 --- a/crazy_functions/批量Markdown翻译.py +++ b/crazy_functions/批量Markdown翻译.py @@ -11,9 +11,8 @@ class PaperFileGroup(): self.sp_file_tag = [] # count_token - import tiktoken - from toolbox import get_conf - enc = tiktoken.encoding_for_model("gpt-3.5-turbo") + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) self.get_token_num = get_token_num diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 351be0ee..4adb9a46 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -68,8 +68,8 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, # 递归地切割PDF文件 from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from toolbox import get_conf - enc = tiktoken.encoding_for_model("gpt-3.5-turbo") + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py index 05fbd49c..50508645 100644 --- a/crazy_functions/理解PDF文档内容.py +++ b/crazy_functions/理解PDF文档内容.py @@ -17,8 +17,8 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro TOKEN_LIMIT_PER_FRAGMENT = 2500 from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf - from toolbox import get_conf - enc = tiktoken.encoding_for_model("gpt-3.5-turbo") + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf( txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT) diff --git a/main.py b/main.py index c028e4aa..fafd38ce 100644 --- a/main.py +++ b/main.py @@ -40,7 +40,7 @@ def main(): set_theme = adjust_theme() # 代理与自动更新 - from check_proxy import check_proxy, auto_update + from check_proxy import check_proxy, auto_update, warm_up_modules proxy_info = check_proxy(proxies) gr_L1 = lambda: gr.Row().style() @@ -180,6 +180,7 @@ def main(): webbrowser.open_new_tab(f"http://localhost:{PORT}/?__dark-theme=true") threading.Thread(target=open, name="open-browser", daemon=True).start() threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start() + threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start() auto_opentab_delay() demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=PORT, auth=AUTHENTICATION, favicon_path="docs/logo.png") diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index ecc6b1e0..f416ad68 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -9,7 +9,7 @@ 2. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 """ import tiktoken - +from functools import wraps, lru_cache from concurrent.futures import ThreadPoolExecutor from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui @@ -18,13 +18,31 @@ from .bridge_chatgpt import predict as chatgpt_ui from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui from .bridge_chatglm import predict as chatglm_ui -from .bridge_tgui import predict_no_ui_long_connection as tgui_noui -from .bridge_tgui import predict as tgui_ui +# from .bridge_tgui import predict_no_ui_long_connection as tgui_noui +# from .bridge_tgui import predict as tgui_ui colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044'] -get_token_num_gpt35 = lambda txt: len(tiktoken.encoding_for_model("gpt-3.5-turbo").encode(txt, disallowed_special=())) -get_token_num_gpt4 = lambda txt: len(tiktoken.encoding_for_model("gpt-4").encode(txt, disallowed_special=())) +class LazyloadTiktoken(object): + def __init__(self, model): + self.model = model + + @staticmethod + @lru_cache(maxsize=128) + def get_encoder(model): + print('正在加载tokenizer,如果是第一次运行,可能需要一点时间下载参数') + tmp = tiktoken.encoding_for_model(model) + print('加载tokenizer完毕') + return tmp + + def encode(self, *args, **kwargs): + encoder = self.get_encoder(self.model) + return encoder.encode(*args, **kwargs) + +tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo") +tokenizer_gpt4 = LazyloadTiktoken("gpt-4") +get_token_num_gpt35 = lambda txt: len(tokenizer_gpt35.encode(txt, disallowed_special=())) +get_token_num_gpt4 = lambda txt: len(tokenizer_gpt4.encode(txt, disallowed_special=())) model_info = { # openai @@ -33,7 +51,7 @@ model_info = { "fn_without_ui": chatgpt_noui, "endpoint": "https://api.openai.com/v1/chat/completions", "max_token": 4096, - "tokenizer": tiktoken.encoding_for_model("gpt-3.5-turbo"), + "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, @@ -42,7 +60,7 @@ model_info = { "fn_without_ui": chatgpt_noui, "endpoint": "https://api.openai.com/v1/chat/completions", "max_token": 8192, - "tokenizer": tiktoken.encoding_for_model("gpt-4"), + "tokenizer": tokenizer_gpt4, "token_cnt": get_token_num_gpt4, }, @@ -52,7 +70,7 @@ model_info = { "fn_without_ui": chatgpt_noui, "endpoint": "https://openai.api2d.net/v1/chat/completions", "max_token": 4096, - "tokenizer": tiktoken.encoding_for_model("gpt-3.5-turbo"), + "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, @@ -61,7 +79,7 @@ model_info = { "fn_without_ui": chatgpt_noui, "endpoint": "https://openai.api2d.net/v1/chat/completions", "max_token": 8192, - "tokenizer": tiktoken.encoding_for_model("gpt-4"), + "tokenizer": tokenizer_gpt4, "token_cnt": get_token_num_gpt4, }, @@ -71,7 +89,7 @@ model_info = { "fn_without_ui": chatglm_noui, "endpoint": None, "max_token": 1024, - "tokenizer": tiktoken.encoding_for_model("gpt-3.5-turbo"), + "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, diff --git a/request_llm/bridge_chatglm.py b/request_llm/bridge_chatglm.py index 819519b5..7af28356 100644 --- a/request_llm/bridge_chatglm.py +++ b/request_llm/bridge_chatglm.py @@ -5,6 +5,8 @@ import importlib from toolbox import update_ui, get_conf from multiprocessing import Process, Pipe +load_message = "ChatGLM尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,ChatGLM消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……" + ################################################################################# class GetGLMHandle(Process): def __init__(self): @@ -12,13 +14,26 @@ class GetGLMHandle(Process): self.parent, self.child = Pipe() self.chatglm_model = None self.chatglm_tokenizer = None + self.info = "" + self.success = True + self.check_dependency() self.start() - print('初始化') + def check_dependency(self): + try: + import sentencepiece + self.info = "依赖检测通过" + self.success = True + except: + self.info = "缺少ChatGLM的依赖,如果要使用ChatGLM,除了基础的pip依赖以外,您还需要运行`pip install -r request_llm/requirements_chatglm.txt`安装ChatGLM的依赖。" + self.success = False + def ready(self): return self.chatglm_model is not None def run(self): + # 第一次运行,加载参数 + retry = 0 while True: try: if self.chatglm_model is None: @@ -33,7 +48,12 @@ class GetGLMHandle(Process): else: break except: - pass + retry += 1 + if retry > 3: + self.child.send('[Local Message] Call ChatGLM fail 不能正常加载ChatGLM的参数。') + raise RuntimeError("不能正常加载ChatGLM的参数!") + + # 进入任务等待状态 while True: kwargs = self.child.recv() try: @@ -64,7 +84,11 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", global glm_handle if glm_handle is None: glm_handle = GetGLMHandle() - observe_window[0] = "ChatGLM尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,ChatGLM消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……" + observe_window[0] = load_message + "\n\n" + glm_handle.info + if not glm_handle.success: + error = glm_handle.info + glm_handle = None + raise RuntimeError(error) # chatglm 没有 sys_prompt 接口,因此把prompt加入 history history_feedin = [] @@ -93,8 +117,11 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp global glm_handle if glm_handle is None: glm_handle = GetGLMHandle() - chatbot[-1] = (inputs, "ChatGLM尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,ChatGLM消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……") + chatbot[-1] = (inputs, load_message + "\n\n" + glm_handle.info) yield from update_ui(chatbot=chatbot, history=[]) + if not glm_handle.success: + glm_handle = None + return if additional_fn is not None: import core_functional diff --git a/toolbox.py b/toolbox.py index 05fd368d..038d7be8 100644 --- a/toolbox.py +++ b/toolbox.py @@ -25,7 +25,6 @@ def ArgsGeneralWrapper(f): 装饰器函数,用于重组输入参数,改变输入参数的顺序与结构。 """ def decorated(cookies, max_length, llm_model, txt, txt2, top_p, temperature, chatbot, history, system_prompt, *args): - from request_llm.bridge_all import model_info txt_passon = txt if txt == "" and txt2 != "": txt_passon = txt2 # 引入一个有cookie的chatbot