From 82aac97980a610fa7847204b3ad8b3621bcf8b5d Mon Sep 17 00:00:00 2001
From: BZfei <33652938+zqfzqf@users.noreply.github.com>
Date: Tue, 25 Mar 2025 00:11:55 +0800
Subject: [PATCH] =?UTF-8?q?=E9=98=BF=E9=87=8C=E4=BA=91=E7=99=BE=E7=82=BC(?=
 =?UTF-8?q?=E5=8E=9F=E7=81=B5=E7=A7=AF)=E5=A2=9E=E5=8A=A0=E5=AF=B9deepseek?=
 =?UTF-8?q?-r1=E3=80=81deepseek-v3=E6=A8=A1=E5=9E=8B=E6=94=AF=E6=8C=81=20(?=
 =?UTF-8?q?#2182)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 阿里云百炼(原灵积)增加对deepseek-r1、deepseek-v3模型支持

* update reasoning display

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>
---
 config.py                      |  1 +
 request_llms/bridge_all.py     | 25 +++++++++++++++++++++++--
 request_llms/bridge_chatgpt.py |  8 ++++----
 request_llms/com_qwenapi.py    | 28 +++++++++++++++++++++++-----
 4 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/config.py b/config.py
index e668ac90..ac8d2fdf 100644
--- a/config.py
+++ b/config.py
@@ -45,6 +45,7 @@ AVAIL_LLM_MODELS = ["qwen-max", "o1-mini", "o1-mini-2024-09-12", "o1", "o1-2024-
                     "gemini-1.5-pro", "chatglm3", "chatglm4",
                     "deepseek-chat", "deepseek-coder", "deepseek-reasoner", 
                     "volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226",
+                    "dashscope-deepseek-r1", "dashscope-deepseek-v3",
                     ]
 
 EMBEDDING_MODEL = "text-embedding-3-small"
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index e911a765..a2bb3a01 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -813,8 +813,9 @@ if "qwen-local" in AVAIL_LLM_MODELS:
         })
     except:
         logger.error(trimmed_format_exc())
-# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
-qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus"]
+
+# -=-=-=-=-=-=- 阿里云百炼（通义）-在线模型 -=-=-=-=-=-=-
+qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus","dashscope-deepseek-r1","dashscope-deepseek-v3"]
 if any(item in qwen_models for item in AVAIL_LLM_MODELS):
     try:
         from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
@@ -864,10 +865,30 @@ if any(item in qwen_models for item in AVAIL_LLM_MODELS):
                 "max_token": 30720,
                 "tokenizer": tokenizer_gpt35,
                 "token_cnt": get_token_num_gpt35,
+            },
+            "dashscope-deepseek-r1": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "enable_reasoning": True,
+                "can_multi_thread": True,
+                "endpoint": None,
+                "max_token": 57344,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "dashscope-deepseek-v3": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "can_multi_thread": True,
+                "endpoint": None,
+                "max_token": 57344,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
             }
         })
     except:
         logger.error(trimmed_format_exc())
+
 # -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
 yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
 if any(item in yi_models for item in AVAIL_LLM_MODELS):
diff --git a/request_llms/bridge_chatgpt.py b/request_llms/bridge_chatgpt.py
index 7d5cbe64..b1eddb9e 100644
--- a/request_llms/bridge_chatgpt.py
+++ b/request_llms/bridge_chatgpt.py
@@ -368,12 +368,12 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
                             log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
                             break # 对于符合规范的接口，这里可以break
                         else:
-                            continue # 对于不符合规范的狗屎接口，这里需要继续
+                            continue # 对于不符合规范的接口，这里需要继续
                     # 到这里，我们已经可以假定必须包含choice了
                     try:
                         status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
                     except:
-                        logger.error(f"一些垃圾第三方接口出现这样的错误，兼容一下吧: {chunk_decoded}")
+                        logger.error(f"一些第三方接口出现这样的错误，兼容一下吧: {chunk_decoded}")
                     # 处理数据流的主体
                     if has_content:
                         # 正常情况
@@ -382,9 +382,9 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
                         # 一些第三方接口的出现这样的错误，兼容一下吧
                         continue
                     else:
-                        # 至此已经超出了正常接口应该进入的范围，一些垃圾第三方接口会出现这样的错误
+                        # 至此已经超出了正常接口应该进入的范围，一些第三方接口会出现这样的错误
                         if chunkjson['choices'][0]["delta"].get("content", None) is None:
-                            logger.error(f"一些垃圾第三方接口出现这样的错误，兼容一下吧: {chunk_decoded}")
+                            logger.error(f"一些第三方接口出现这样的错误，兼容一下吧: {chunk_decoded}")
                             continue
                         gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
 
diff --git a/request_llms/com_qwenapi.py b/request_llms/com_qwenapi.py
index 70872e16..edbf9d7b 100644
--- a/request_llms/com_qwenapi.py
+++ b/request_llms/com_qwenapi.py
@@ -3,6 +3,7 @@ from toolbox import get_conf
 import threading
 
 timeout_bot_msg = '[Local Message] Request timeout. Network error.'
+model_prefix_to_remove = 'dashscope-'
 
 class QwenRequestInstance():
     def __init__(self):
@@ -20,6 +21,13 @@ class QwenRequestInstance():
             raise RuntimeError('请配置 DASHSCOPE_API_KEY')
         dashscope.api_key = get_conf("DASHSCOPE_API_KEY")
 
+    def format_reasoning(self, reasoning_content:str, main_content:str):
+        if reasoning_content:
+            reasoning_content_paragraphs = ''.join([f'<p style="margin: 1.25em 0;">{line}</p>' for line in reasoning_content.split('\n')])
+            formatted_reasoning_content = f'<div class="reasoning_process">{reasoning_content_paragraphs}</div>\n\n---\n\n'
+            return formatted_reasoning_content + main_content
+        else:
+            return main_content
 
     def generate(self, inputs, llm_kwargs, history, system_prompt):
         # import _thread as thread
@@ -28,9 +36,13 @@ class QwenRequestInstance():
         if top_p == 0: top_p += 1e-5
         if top_p == 1: top_p -= 1e-5
 
+        model_name = llm_kwargs['llm_model']
+        if model_name.startswith(model_prefix_to_remove): model_name = model_name[len(model_prefix_to_remove):]
+
+        self.reasoning_buf = ""
         self.result_buf = ""
         responses = Generation.call(
-            model=llm_kwargs['llm_model'],
+            model=model_name,
             messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
             top_p=top_p,
             temperature=llm_kwargs.get('temperature', 1.0),
@@ -46,18 +58,24 @@ class QwenRequestInstance():
                         self.result_buf += response.output.choices[0].message.content
                     except:
                         pass
-                    yield self.result_buf
+                    yield self.format_reasoning(self.reasoning_buf, self.result_buf)
                     break
                 elif response.output.choices[0].finish_reason == 'length':
                     self.result_buf += "[Local Message] 生成长度过长，后续输出被截断"
-                    yield self.result_buf
+                    yield self.format_reasoning(self.reasoning_buf, self.result_buf)
                     break
                 else:
+                    try:
+                        contain_reasoning = hasattr(response.output.choices[0].message, 'reasoning_content')
+                    except:
+                        contain_reasoning = False
+                    if contain_reasoning:
+                        self.reasoning_buf += response.output.choices[0].message.reasoning_content
                     self.result_buf += response.output.choices[0].message.content
-                    yield self.result_buf
+                    yield self.format_reasoning(self.reasoning_buf, self.result_buf)
             else:
                 self.result_buf += f"[Local Message] 请求错误：状态码：{response.status_code}，错误码:{response.code}，消息：{response.message}"
-                yield self.result_buf
+                yield self.format_reasoning(self.reasoning_buf, self.result_buf)
                 break
 
         # 耗尽generator避免报错