diff --git a/config.py b/config.py index 46b65787..f63d4400 100644 --- a/config.py +++ b/config.py @@ -47,7 +47,8 @@ AVAIL_LLM_MODELS = ["gpt-4-1106-preview", "gpt-4-turbo-preview", "gpt-4-vision-p # "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125" # "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2", # "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama", -# "yi-34b-chat-0205", "yi-34b-chat-200k" +# "deepseek-chat" ,"deepseek-coder", +# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview", # ] # --- --- --- --- # 此外,您还可以在接入one-api/vllm/ollama时, @@ -223,6 +224,8 @@ MOONSHOT_API_KEY = "" # 零一万物(Yi Model) API KEY YIMODEL_API_KEY = "" +# 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions" +DEEPSEEK_API_KEY = "" # Mathpix 拥有执行PDF的OCR功能,但是需要注册账号 MATHPIX_APPID = "" diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index a014bd78..9a314479 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -37,6 +37,8 @@ from .bridge_zhipu import predict as zhipu_ui from .bridge_cohere import predict as cohere_ui from .bridge_cohere import predict_no_ui_long_connection as cohere_noui +from .oai_std_model_template import get_predict_function + colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044'] class LazyloadTiktoken(object): @@ -66,9 +68,10 @@ api2d_endpoint = "https://openai.api2d.net/v1/chat/completions" newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub" gemini_endpoint = "https://generativelanguage.googleapis.com/v1beta/models" claude_endpoint = "https://api.anthropic.com/v1/messages" -yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions" cohere_endpoint = "https://api.cohere.ai/v1/chat" ollama_endpoint = "http://localhost:11434/api/chat" +yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions" +deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions" if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/' azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15' @@ -86,9 +89,10 @@ if api2d_endpoint in API_URL_REDIRECT: api2d_endpoint = API_URL_REDIRECT[api2d_e if newbing_endpoint in API_URL_REDIRECT: newbing_endpoint = API_URL_REDIRECT[newbing_endpoint] if gemini_endpoint in API_URL_REDIRECT: gemini_endpoint = API_URL_REDIRECT[gemini_endpoint] if claude_endpoint in API_URL_REDIRECT: claude_endpoint = API_URL_REDIRECT[claude_endpoint] -if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint] if cohere_endpoint in API_URL_REDIRECT: cohere_endpoint = API_URL_REDIRECT[cohere_endpoint] if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollama_endpoint] +if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint] +if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint] # 获取tokenizer tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo") @@ -654,14 +658,22 @@ if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen- except: print(trimmed_format_exc()) # -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=- -if "yi-34b-chat-0205" in AVAIL_LLM_MODELS or "yi-34b-chat-200k" in AVAIL_LLM_MODELS: # zhipuai +yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"] +if any(item in yi_models for item in AVAIL_LLM_MODELS): try: - from .bridge_yimodel import predict_no_ui_long_connection as yimodel_noui - from .bridge_yimodel import predict as yimodel_ui + yimodel_4k_noui, yimodel_4k_ui = get_predict_function( + api_key_conf_name="YIMODEL_API_KEY", max_output_token=600, disable_proxy=False + ) + yimodel_16k_noui, yimodel_16k_ui = get_predict_function( + api_key_conf_name="YIMODEL_API_KEY", max_output_token=4000, disable_proxy=False + ) + yimodel_200k_noui, yimodel_200k_ui = get_predict_function( + api_key_conf_name="YIMODEL_API_KEY", max_output_token=4096, disable_proxy=False + ) model_info.update({ "yi-34b-chat-0205": { - "fn_with_ui": yimodel_ui, - "fn_without_ui": yimodel_noui, + "fn_with_ui": yimodel_4k_ui, + "fn_without_ui": yimodel_4k_noui, "can_multi_thread": False, # 目前来说,默认情况下并发量极低,因此禁用 "endpoint": yimodel_endpoint, "max_token": 4000, @@ -669,14 +681,59 @@ if "yi-34b-chat-0205" in AVAIL_LLM_MODELS or "yi-34b-chat-200k" in AVAIL_LLM_MOD "token_cnt": get_token_num_gpt35, }, "yi-34b-chat-200k": { - "fn_with_ui": yimodel_ui, - "fn_without_ui": yimodel_noui, + "fn_with_ui": yimodel_200k_ui, + "fn_without_ui": yimodel_200k_noui, "can_multi_thread": False, # 目前来说,默认情况下并发量极低,因此禁用 "endpoint": yimodel_endpoint, "max_token": 200000, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, + "yi-large": { + "fn_with_ui": yimodel_16k_ui, + "fn_without_ui": yimodel_16k_noui, + "can_multi_thread": False, # 目前来说,默认情况下并发量极低,因此禁用 + "endpoint": yimodel_endpoint, + "max_token": 16000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "yi-medium": { + "fn_with_ui": yimodel_16k_ui, + "fn_without_ui": yimodel_16k_noui, + "can_multi_thread": True, # 这个并发量稍微大一点 + "endpoint": yimodel_endpoint, + "max_token": 16000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "yi-spark": { + "fn_with_ui": yimodel_16k_ui, + "fn_without_ui": yimodel_16k_noui, + "can_multi_thread": True, # 这个并发量稍微大一点 + "endpoint": yimodel_endpoint, + "max_token": 16000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "yi-large-turbo": { + "fn_with_ui": yimodel_16k_ui, + "fn_without_ui": yimodel_16k_noui, + "can_multi_thread": False, # 目前来说,默认情况下并发量极低,因此禁用 + "endpoint": yimodel_endpoint, + "max_token": 16000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "yi-large-preview": { + "fn_with_ui": yimodel_16k_ui, + "fn_without_ui": yimodel_16k_noui, + "can_multi_thread": False, # 目前来说,默认情况下并发量极低,因此禁用 + "endpoint": yimodel_endpoint, + "max_token": 16000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, }) except: print(trimmed_format_exc()) @@ -789,8 +846,34 @@ if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder }) except: print(trimmed_format_exc()) - - +# -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=- +if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS: + try: + deepseekapi_noui, deepseekapi_ui = get_predict_function( + APIKEY="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False + ) + model_info.update({ + "deepseek-chat":{ + "fn_with_ui": deepseekapi_ui, + "fn_without_ui": deepseekapi_noui, + "endpoint": deepseekapi_endpoint, + "can_multi_thread": True, + "max_token": 32000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "deepseek-coder":{ + "fn_with_ui": deepseekapi_ui, + "fn_without_ui": deepseekapi_noui, + "endpoint": deepseekapi_endpoint, + "can_multi_thread": True, + "max_token": 16000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + }) + except: + print(trimmed_format_exc()) # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=- for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]: # 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"] diff --git a/request_llms/bridge_yimodel.py b/request_llms/bridge_yimodel.py deleted file mode 100644 index 6d65a56d..00000000 --- a/request_llms/bridge_yimodel.py +++ /dev/null @@ -1,283 +0,0 @@ -# 借鉴自同目录下的bridge_chatgpt.py - -""" - 该文件中主要包含三个函数 - - 不具备多线程能力的函数: - 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 - - 具备多线程调用能力的函数 - 2. predict_no_ui_long_connection:支持多线程 -""" - -import json -import time -import gradio as gr -import logging -import traceback -import requests -import importlib -import random - -# config_private.py放自己的秘密如API和代理网址 -# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件 -from toolbox import get_conf, update_ui, trimmed_format_exc, is_the_upload_folder, read_one_api_model_name -proxies, TIMEOUT_SECONDS, MAX_RETRY, YIMODEL_API_KEY = \ - get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'YIMODEL_API_KEY') - -timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \ - '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。' - -def get_full_error(chunk, stream_response): - """ - 获取完整的从Openai返回的报错 - """ - while True: - try: - chunk += next(stream_response) - except: - break - return chunk - -def decode_chunk(chunk): - # 提前读取一些信息(用于判断异常) - chunk_decoded = chunk.decode() - chunkjson = None - is_last_chunk = False - try: - chunkjson = json.loads(chunk_decoded[6:]) - is_last_chunk = chunkjson.get("lastOne", False) - except: - pass - return chunk_decoded, chunkjson, is_last_chunk - -def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): - """ - 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 - inputs: - 是本次问询的输入 - sys_prompt: - 系统静默prompt - llm_kwargs: - chatGPT的内部调优参数 - history: - 是之前的对话列表 - observe_window = None: - 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 - """ - watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 - if inputs == "": inputs = "空空如也的输入栏" - headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True) - retry = 0 - while True: - try: - # make a POST request to the API endpoint, stream=False - from .bridge_all import model_info - endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] - response = requests.post(endpoint, headers=headers, proxies=proxies, - json=payload, stream=True, timeout=TIMEOUT_SECONDS); break - except requests.exceptions.ReadTimeout as e: - retry += 1 - traceback.print_exc() - if retry > MAX_RETRY: raise TimeoutError - if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') - - stream_response = response.iter_lines() - result = '' - is_head_of_the_stream = True - while True: - try: chunk = next(stream_response) - except StopIteration: - break - except requests.exceptions.ConnectionError: - chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 - chunk_decoded, chunkjson, is_last_chunk = decode_chunk(chunk) - if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r'"role":"assistant"' in chunk_decoded): - # 数据流的第一帧不携带content - is_head_of_the_stream = False; continue - if chunk: - try: - if is_last_chunk: - # 判定为数据流的结束,gpt_replying_buffer也写完了 - logging.info(f'[response] {result}') - break - result += chunkjson['choices'][0]["delta"]["content"] - if not console_slience: print(chunkjson['choices'][0]["delta"]["content"], end='') - if observe_window is not None: - # 观测窗,把已经获取的数据显示出去 - if len(observe_window) >= 1: - observe_window[0] += chunkjson['choices'][0]["delta"]["content"] - # 看门狗,如果超过期限没有喂狗,则终止 - if len(observe_window) >= 2: - if (time.time()-observe_window[1]) > watch_dog_patience: - raise RuntimeError("用户取消了程序。") - except Exception as e: - chunk = get_full_error(chunk, stream_response) - chunk_decoded = chunk.decode() - error_msg = chunk_decoded - print(error_msg) - raise RuntimeError("Json解析不合常规") - return result - - -def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): - """ - 发送至chatGPT,流式获取输出。 - 用于基础的对话功能。 - inputs 是本次问询的输入 - top_p, temperature是chatGPT的内部调优参数 - history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) - chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 - additional_fn代表点击的哪个按钮,按钮见functional.py - """ - if len(YIMODEL_API_KEY) == 0: - raise RuntimeError("没有设置YIMODEL_API_KEY选项") - if inputs == "": inputs = "空空如也的输入栏" - user_input = inputs - if additional_fn is not None: - from core_functional import handle_core_functionality - inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) - - raw_input = inputs - logging.info(f'[raw_input] {raw_input}') - chatbot.append((inputs, "")) - yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 - - # check mis-behavior - if is_the_upload_folder(user_input): - chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。") - yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面 - time.sleep(2) - - headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream) - - from .bridge_all import model_info - endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] - - history.append(inputs); history.append("") - - retry = 0 - while True: - try: - # make a POST request to the API endpoint, stream=True - response = requests.post(endpoint, headers=headers, proxies=proxies, - json=payload, stream=True, timeout=TIMEOUT_SECONDS);break - except: - retry += 1 - chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg)) - retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else "" - yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面 - if retry > MAX_RETRY: raise TimeoutError - - gpt_replying_buffer = "" - - is_head_of_the_stream = True - if stream: - stream_response = response.iter_lines() - while True: - try: - chunk = next(stream_response) - except StopIteration: - break - except requests.exceptions.ConnectionError: - chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 - - # 提前读取一些信息 (用于判断异常) - chunk_decoded, chunkjson, is_last_chunk = decode_chunk(chunk) - - if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r'"role":"assistant"' in chunk_decoded): - # 数据流的第一帧不携带content - is_head_of_the_stream = False; continue - - if chunk: - try: - if is_last_chunk: - # 判定为数据流的结束,gpt_replying_buffer也写完了 - logging.info(f'[response] {gpt_replying_buffer}') - break - # 处理数据流的主体 - status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}" - gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"] - # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出 - history[-1] = gpt_replying_buffer - chatbot[-1] = (history[-2], history[-1]) - yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面 - except Exception as e: - yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面 - chunk = get_full_error(chunk, stream_response) - chunk_decoded = chunk.decode() - error_msg = chunk_decoded - chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg) - yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 - print(error_msg) - return - -def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg): - from .bridge_all import model_info - if "bad_request" in error_msg: - chatbot[-1] = (chatbot[-1][0], "[Local Message] 已经超过了模型的最大上下文或是模型格式错误,请尝试削减单次输入的文本量。") - elif "authentication_error" in error_msg: - chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. 请确保API key有效。") - elif "not_found" in error_msg: - chatbot[-1] = (chatbot[-1][0], f"[Local Message] {llm_kwargs['llm_model']} 无效,请确保使用小写的模型名称。") - elif "rate_limit" in error_msg: - chatbot[-1] = (chatbot[-1][0], "[Local Message] 遇到了控制请求速率限制,请一分钟后重试。") - elif "system_busy" in error_msg: - chatbot[-1] = (chatbot[-1][0], "[Local Message] 系统繁忙,请一分钟后重试。") - else: - from toolbox import regular_txt_to_markdown - tb_str = '```\n' + trimmed_format_exc() + '```' - chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}") - return chatbot, history - -def generate_payload(inputs, llm_kwargs, history, system_prompt, stream): - """ - 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 - """ - api_key = f"Bearer {YIMODEL_API_KEY}" - - headers = { - "Content-Type": "application/json", - "Authorization": api_key - } - - conversation_cnt = len(history) // 2 - - messages = [{"role": "system", "content": system_prompt}] - if conversation_cnt: - for index in range(0, 2*conversation_cnt, 2): - what_i_have_asked = {} - what_i_have_asked["role"] = "user" - what_i_have_asked["content"] = history[index] - what_gpt_answer = {} - what_gpt_answer["role"] = "assistant" - what_gpt_answer["content"] = history[index+1] - if what_i_have_asked["content"] != "": - if what_gpt_answer["content"] == "": continue - if what_gpt_answer["content"] == timeout_bot_msg: continue - messages.append(what_i_have_asked) - messages.append(what_gpt_answer) - else: - messages[-1]['content'] = what_gpt_answer['content'] - - what_i_ask_now = {} - what_i_ask_now["role"] = "user" - what_i_ask_now["content"] = inputs - messages.append(what_i_ask_now) - model = llm_kwargs['llm_model'] - if llm_kwargs['llm_model'].startswith('one-api-'): - model = llm_kwargs['llm_model'][len('one-api-'):] - model, _ = read_one_api_model_name(model) - tokens = 600 if llm_kwargs['llm_model'] == 'yi-34b-chat-0205' else 4096 #yi-34b-chat-0205只有4k上下文... - payload = { - "model": model, - "messages": messages, - "temperature": llm_kwargs['temperature'], # 1.0, - "stream": stream, - "max_tokens": tokens - } - try: - print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") - except: - print('输入中可能存在乱码。') - return headers,payload \ No newline at end of file diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py new file mode 100644 index 00000000..648dbe41 --- /dev/null +++ b/request_llms/oai_std_model_template.py @@ -0,0 +1,401 @@ +import json +import time +import logging +import traceback +import requests + +# config_private.py放自己的秘密如API和代理网址 +# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件 +from toolbox import ( + get_conf, + update_ui, + is_the_upload_folder, +) + +proxies, TIMEOUT_SECONDS, MAX_RETRY = get_conf( + "proxies", "TIMEOUT_SECONDS", "MAX_RETRY" +) + +timeout_bot_msg = ( + "[Local Message] Request timeout. Network error. Please check proxy settings in config.py." + + "网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。" +) + + +def get_full_error(chunk, stream_response): + """ + 尝试获取完整的错误信息 + """ + while True: + try: + chunk += next(stream_response) + except: + break + return chunk + + +def decode_chunk(chunk): + """ + 用于解读"content"和"finish_reason"的内容 + """ + chunk = chunk.decode() + respose = "" + finish_reason = "False" + try: + chunk = json.loads(chunk[6:]) + except: + finish_reason = "JSON_ERROR" + # 错误处理部分 + if "error" in chunk: + respose = "API_ERROR" + try: + chunk = json.loads(chunk) + finish_reason = chunk["error"]["code"] + except: + finish_reason = "API_ERROR" + return respose, finish_reason + + try: + respose = chunk["choices"][0]["delta"]["content"] + except: + pass + try: + finish_reason = chunk["choices"][0]["finish_reason"] + except: + pass + return respose, finish_reason + + +def generate_message(input, model, key, history, max_output_token, system_prompt, temperature): + """ + 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 + """ + api_key = f"Bearer {key}" + + headers = {"Content-Type": "application/json", "Authorization": api_key} + + conversation_cnt = len(history) // 2 + + messages = [{"role": "system", "content": system_prompt}] + if conversation_cnt: + for index in range(0, 2 * conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index + 1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": + continue + if what_gpt_answer["content"] == timeout_bot_msg: + continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]["content"] = what_gpt_answer["content"] + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = input + messages.append(what_i_ask_now) + playload = { + "model": model, + "messages": messages, + "temperature": temperature, + "stream": True, + "max_tokens": max_output_token, + } + try: + print(f" {model} : {conversation_cnt} : {input[:100]} ..........") + except: + print("输入中可能存在乱码。") + return headers, playload + + +def get_predict_function( + api_key_conf_name, + max_output_token, + disable_proxy = False + ): + """ + 为openai格式的API生成响应函数,其中传入参数: + api_key_conf_name: + `config.py`中此模型的APIKEY的名字,例如"YIMODEL_API_KEY" + max_output_token: + 每次请求的最大token数量,例如对于01万物的yi-34b-chat-200k,其最大请求数为4096 + ⚠️请不要与模型的最大token数量相混淆。 + disable_proxy: + 是否使用代理,True为不使用,False为使用。 + """ + + APIKEY = get_conf(api_key_conf_name) + + def predict_no_ui_long_connection( + inputs, + llm_kwargs, + history=[], + sys_prompt="", + observe_window=None, + console_slience=False, + ): + """ + 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 + inputs: + 是本次问询的输入 + sys_prompt: + 系统静默prompt + llm_kwargs: + chatGPT的内部调优参数 + history: + 是之前的对话列表 + observe_window = None: + 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 + """ + watch_dog_patience = 5 # 看门狗的耐心,设置5秒不准咬人(咬的也不是人 + if len(APIKEY) == 0: + raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}") + if inputs == "": + inputs = "你好👋" + headers, playload = generate_message( + input=inputs, + model=llm_kwargs["llm_model"], + key=APIKEY, + history=history, + max_output_token=max_output_token, + system_prompt=sys_prompt, + temperature=llm_kwargs["temperature"], + ) + retry = 0 + while True: + try: + from .bridge_all import model_info + + endpoint = model_info[llm_kwargs["llm_model"]]["endpoint"] + if not disable_proxy: + response = requests.post( + endpoint, + headers=headers, + proxies=proxies, + json=playload, + stream=True, + timeout=TIMEOUT_SECONDS, + ) + else: + response = requests.post( + endpoint, + headers=headers, + json=playload, + stream=True, + timeout=TIMEOUT_SECONDS, + ) + break + except: + retry += 1 + traceback.print_exc() + if retry > MAX_RETRY: + raise TimeoutError + if MAX_RETRY != 0: + print(f"请求超时,正在重试 ({retry}/{MAX_RETRY}) ……") + + stream_response = response.iter_lines() + result = "" + while True: + try: + chunk = next(stream_response) + except StopIteration: + break + except requests.exceptions.ConnectionError: + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + response_text, finish_reason = decode_chunk(chunk) + # 返回的数据流第一次为空,继续等待 + if response_text == "" and finish_reason != "False": + continue + if response_text == "API_ERROR" and ( + finish_reason != "False" or finish_reason != "stop" + ): + chunk = get_full_error(chunk, stream_response) + chunk_decoded = chunk.decode() + print(chunk_decoded) + raise RuntimeError( + f"API异常,请检测终端输出。可能的原因是:{finish_reason}" + ) + if chunk: + try: + if finish_reason == "stop": + logging.info(f"[response] {result}") + break + result += response_text + if not console_slience: + print(response_text, end="") + if observe_window is not None: + # 观测窗,把已经获取的数据显示出去 + if len(observe_window) >= 1: + observe_window[0] += response_text + # 看门狗,如果超过期限没有喂狗,则终止 + if len(observe_window) >= 2: + if (time.time() - observe_window[1]) > watch_dog_patience: + raise RuntimeError("用户取消了程序。") + except Exception as e: + chunk = get_full_error(chunk, stream_response) + chunk_decoded = chunk.decode() + error_msg = chunk_decoded + print(error_msg) + raise RuntimeError("Json解析不合常规") + return result + + def predict( + inputs, + llm_kwargs, + plugin_kwargs, + chatbot, + history=[], + system_prompt="", + stream=True, + additional_fn=None, + ): + """ + 发送至chatGPT,流式获取输出。 + 用于基础的对话功能。 + inputs 是本次问询的输入 + top_p, temperature是chatGPT的内部调优参数 + history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) + chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 + additional_fn代表点击的哪个按钮,按钮见functional.py + """ + if len(APIKEY) == 0: + raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}") + if inputs == "": + inputs = "你好👋" + if additional_fn is not None: + from core_functional import handle_core_functionality + + inputs, history = handle_core_functionality( + additional_fn, inputs, history, chatbot + ) + logging.info(f"[raw_input] {inputs}") + chatbot.append((inputs, "")) + yield from update_ui( + chatbot=chatbot, history=history, msg="等待响应" + ) # 刷新界面 + + # check mis-behavior + if is_the_upload_folder(inputs): + chatbot[-1] = ( + inputs, + f"[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。", + ) + yield from update_ui( + chatbot=chatbot, history=history, msg="正常" + ) # 刷新界面 + time.sleep(2) + + headers, playload = generate_message( + input=inputs, + model=llm_kwargs["llm_model"], + key=APIKEY, + history=history, + max_output_token=max_output_token, + system_prompt=system_prompt, + temperature=llm_kwargs["temperature"], + ) + + history.append(inputs) + history.append("") + retry = 0 + while True: + try: + from .bridge_all import model_info + + endpoint = model_info[llm_kwargs["llm_model"]]["endpoint"] + if not disable_proxy: + response = requests.post( + endpoint, + headers=headers, + proxies=proxies, + json=playload, + stream=True, + timeout=TIMEOUT_SECONDS, + ) + else: + response = requests.post( + endpoint, + headers=headers, + json=playload, + stream=True, + timeout=TIMEOUT_SECONDS, + ) + break + except: + retry += 1 + chatbot[-1] = (chatbot[-1][0], timeout_bot_msg) + retry_msg = ( + f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else "" + ) + yield from update_ui( + chatbot=chatbot, history=history, msg="请求超时" + retry_msg + ) # 刷新界面 + if retry > MAX_RETRY: + raise TimeoutError + + gpt_replying_buffer = "" + + stream_response = response.iter_lines() + while True: + try: + chunk = next(stream_response) + except StopIteration: + break + except requests.exceptions.ConnectionError: + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + response_text, finish_reason = decode_chunk(chunk) + # 返回的数据流第一次为空,继续等待 + if response_text == "" and finish_reason != "False": + continue + if chunk: + try: + if response_text == "API_ERROR" and ( + finish_reason != "False" or finish_reason != "stop" + ): + chunk = get_full_error(chunk, stream_response) + chunk_decoded = chunk.decode() + chatbot[-1] = ( + chatbot[-1][0], + "[Local Message] {finish_reason},获得以下报错信息:\n" + + chunk_decoded, + ) + yield from update_ui( + chatbot=chatbot, + history=history, + msg="API异常:" + chunk_decoded, + ) # 刷新界面 + print(chunk_decoded) + return + + if finish_reason == "stop": + logging.info(f"[response] {gpt_replying_buffer}") + break + status_text = f"finish_reason: {finish_reason}" + gpt_replying_buffer += response_text + # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出 + history[-1] = gpt_replying_buffer + chatbot[-1] = (history[-2], history[-1]) + yield from update_ui( + chatbot=chatbot, history=history, msg=status_text + ) # 刷新界面 + except Exception as e: + yield from update_ui( + chatbot=chatbot, history=history, msg="Json解析不合常规" + ) # 刷新界面 + chunk = get_full_error(chunk, stream_response) + chunk_decoded = chunk.decode() + chatbot[-1] = ( + chatbot[-1][0], + "[Local Message] 解析错误,获得以下报错信息:\n" + chunk_decoded, + ) + yield from update_ui( + chatbot=chatbot, history=history, msg="Json异常" + chunk_decoded + ) # 刷新界面 + print(chunk_decoded) + return + + return predict_no_ui_long_connection, predict