From 82aac97980a610fa7847204b3ad8b3621bcf8b5d Mon Sep 17 00:00:00 2001 From: BZfei <33652938+zqfzqf@users.noreply.github.com> Date: Tue, 25 Mar 2025 00:11:55 +0800 Subject: [PATCH] =?UTF-8?q?=E9=98=BF=E9=87=8C=E4=BA=91=E7=99=BE=E7=82=BC(?= =?UTF-8?q?=E5=8E=9F=E7=81=B5=E7=A7=AF)=E5=A2=9E=E5=8A=A0=E5=AF=B9deepseek?= =?UTF-8?q?-r1=E3=80=81deepseek-v3=E6=A8=A1=E5=9E=8B=E6=94=AF=E6=8C=81=20(?= =?UTF-8?q?#2182)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 阿里云百炼(原灵积)增加对deepseek-r1、deepseek-v3模型支持 * update reasoning display --------- Co-authored-by: binary-husky --- config.py | 1 + request_llms/bridge_all.py | 25 +++++++++++++++++++++++-- request_llms/bridge_chatgpt.py | 8 ++++---- request_llms/com_qwenapi.py | 28 +++++++++++++++++++++++----- 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/config.py b/config.py index e668ac90..ac8d2fdf 100644 --- a/config.py +++ b/config.py @@ -45,6 +45,7 @@ AVAIL_LLM_MODELS = ["qwen-max", "o1-mini", "o1-mini-2024-09-12", "o1", "o1-2024- "gemini-1.5-pro", "chatglm3", "chatglm4", "deepseek-chat", "deepseek-coder", "deepseek-reasoner", "volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226", + "dashscope-deepseek-r1", "dashscope-deepseek-v3", ] EMBEDDING_MODEL = "text-embedding-3-small" diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index e911a765..a2bb3a01 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -813,8 +813,9 @@ if "qwen-local" in AVAIL_LLM_MODELS: }) except: logger.error(trimmed_format_exc()) -# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=- -qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus"] + +# -=-=-=-=-=-=- 阿里云百炼(通义)-在线模型 -=-=-=-=-=-=- +qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus","dashscope-deepseek-r1","dashscope-deepseek-v3"] if any(item in qwen_models for item in AVAIL_LLM_MODELS): try: from .bridge_qwen import predict_no_ui_long_connection as qwen_noui @@ -864,10 +865,30 @@ if any(item in qwen_models for item in AVAIL_LLM_MODELS): "max_token": 30720, "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, + }, + "dashscope-deepseek-r1": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "enable_reasoning": True, + "can_multi_thread": True, + "endpoint": None, + "max_token": 57344, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "dashscope-deepseek-v3": { + "fn_with_ui": qwen_ui, + "fn_without_ui": qwen_noui, + "can_multi_thread": True, + "endpoint": None, + "max_token": 57344, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, } }) except: logger.error(trimmed_format_exc()) + # -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=- yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"] if any(item in yi_models for item in AVAIL_LLM_MODELS): diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py index 7d5cbe64..b1eddb9e 100644 --- a/request_llms/bridge_chatgpt.py +++ b/request_llms/bridge_chatgpt.py @@ -368,12 +368,12 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer) break # 对于符合规范的接口,这里可以break else: - continue # 对于不符合规范的狗屎接口,这里需要继续 + continue # 对于不符合规范的接口,这里需要继续 # 到这里,我们已经可以假定必须包含choice了 try: status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}" except: - logger.error(f"一些垃圾第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}") + logger.error(f"一些第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}") # 处理数据流的主体 if has_content: # 正常情况 @@ -382,9 +382,9 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith # 一些第三方接口的出现这样的错误,兼容一下吧 continue else: - # 至此已经超出了正常接口应该进入的范围,一些垃圾第三方接口会出现这样的错误 + # 至此已经超出了正常接口应该进入的范围,一些第三方接口会出现这样的错误 if chunkjson['choices'][0]["delta"].get("content", None) is None: - logger.error(f"一些垃圾第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}") + logger.error(f"一些第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}") continue gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"] diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py index 70872e16..edbf9d7b 100644 --- a/request_llms/com_qwenapi.py +++ b/request_llms/com_qwenapi.py @@ -3,6 +3,7 @@ from toolbox import get_conf import threading timeout_bot_msg = '[Local Message] Request timeout. Network error.' +model_prefix_to_remove = 'dashscope-' class QwenRequestInstance(): def __init__(self): @@ -20,6 +21,13 @@ class QwenRequestInstance(): raise RuntimeError('请配置 DASHSCOPE_API_KEY') dashscope.api_key = get_conf("DASHSCOPE_API_KEY") + def format_reasoning(self, reasoning_content:str, main_content:str): + if reasoning_content: + reasoning_content_paragraphs = ''.join([f'

{line}

' for line in reasoning_content.split('\n')]) + formatted_reasoning_content = f'
{reasoning_content_paragraphs}
\n\n---\n\n' + return formatted_reasoning_content + main_content + else: + return main_content def generate(self, inputs, llm_kwargs, history, system_prompt): # import _thread as thread @@ -28,9 +36,13 @@ class QwenRequestInstance(): if top_p == 0: top_p += 1e-5 if top_p == 1: top_p -= 1e-5 + model_name = llm_kwargs['llm_model'] + if model_name.startswith(model_prefix_to_remove): model_name = model_name[len(model_prefix_to_remove):] + + self.reasoning_buf = "" self.result_buf = "" responses = Generation.call( - model=llm_kwargs['llm_model'], + model=model_name, messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt), top_p=top_p, temperature=llm_kwargs.get('temperature', 1.0), @@ -46,18 +58,24 @@ class QwenRequestInstance(): self.result_buf += response.output.choices[0].message.content except: pass - yield self.result_buf + yield self.format_reasoning(self.reasoning_buf, self.result_buf) break elif response.output.choices[0].finish_reason == 'length': self.result_buf += "[Local Message] 生成长度过长,后续输出被截断" - yield self.result_buf + yield self.format_reasoning(self.reasoning_buf, self.result_buf) break else: + try: + contain_reasoning = hasattr(response.output.choices[0].message, 'reasoning_content') + except: + contain_reasoning = False + if contain_reasoning: + self.reasoning_buf += response.output.choices[0].message.reasoning_content self.result_buf += response.output.choices[0].message.content - yield self.result_buf + yield self.format_reasoning(self.reasoning_buf, self.result_buf) else: self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}" - yield self.result_buf + yield self.format_reasoning(self.reasoning_buf, self.result_buf) break # 耗尽generator避免报错