镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 06:26:47 +00:00
阿里云百炼(原灵积)增加对deepseek-r1、deepseek-v3模型支持 (#2182)
* 阿里云百炼(原灵积)增加对deepseek-r1、deepseek-v3模型支持 * update reasoning display --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com>
这个提交包含在:
@@ -45,6 +45,7 @@ AVAIL_LLM_MODELS = ["qwen-max", "o1-mini", "o1-mini-2024-09-12", "o1", "o1-2024-
|
|||||||
"gemini-1.5-pro", "chatglm3", "chatglm4",
|
"gemini-1.5-pro", "chatglm3", "chatglm4",
|
||||||
"deepseek-chat", "deepseek-coder", "deepseek-reasoner",
|
"deepseek-chat", "deepseek-coder", "deepseek-reasoner",
|
||||||
"volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226",
|
"volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226",
|
||||||
|
"dashscope-deepseek-r1", "dashscope-deepseek-v3",
|
||||||
]
|
]
|
||||||
|
|
||||||
EMBEDDING_MODEL = "text-embedding-3-small"
|
EMBEDDING_MODEL = "text-embedding-3-small"
|
||||||
|
|||||||
@@ -813,8 +813,9 @@ if "qwen-local" in AVAIL_LLM_MODELS:
|
|||||||
})
|
})
|
||||||
except:
|
except:
|
||||||
logger.error(trimmed_format_exc())
|
logger.error(trimmed_format_exc())
|
||||||
# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
|
|
||||||
qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus"]
|
# -=-=-=-=-=-=- 阿里云百炼(通义)-在线模型 -=-=-=-=-=-=-
|
||||||
|
qwen_models = ["qwen-max-latest", "qwen-max-2025-01-25","qwen-max","qwen-turbo","qwen-plus","dashscope-deepseek-r1","dashscope-deepseek-v3"]
|
||||||
if any(item in qwen_models for item in AVAIL_LLM_MODELS):
|
if any(item in qwen_models for item in AVAIL_LLM_MODELS):
|
||||||
try:
|
try:
|
||||||
from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
|
from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
|
||||||
@@ -864,10 +865,30 @@ if any(item in qwen_models for item in AVAIL_LLM_MODELS):
|
|||||||
"max_token": 30720,
|
"max_token": 30720,
|
||||||
"tokenizer": tokenizer_gpt35,
|
"tokenizer": tokenizer_gpt35,
|
||||||
"token_cnt": get_token_num_gpt35,
|
"token_cnt": get_token_num_gpt35,
|
||||||
|
},
|
||||||
|
"dashscope-deepseek-r1": {
|
||||||
|
"fn_with_ui": qwen_ui,
|
||||||
|
"fn_without_ui": qwen_noui,
|
||||||
|
"enable_reasoning": True,
|
||||||
|
"can_multi_thread": True,
|
||||||
|
"endpoint": None,
|
||||||
|
"max_token": 57344,
|
||||||
|
"tokenizer": tokenizer_gpt35,
|
||||||
|
"token_cnt": get_token_num_gpt35,
|
||||||
|
},
|
||||||
|
"dashscope-deepseek-v3": {
|
||||||
|
"fn_with_ui": qwen_ui,
|
||||||
|
"fn_without_ui": qwen_noui,
|
||||||
|
"can_multi_thread": True,
|
||||||
|
"endpoint": None,
|
||||||
|
"max_token": 57344,
|
||||||
|
"tokenizer": tokenizer_gpt35,
|
||||||
|
"token_cnt": get_token_num_gpt35,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
except:
|
except:
|
||||||
logger.error(trimmed_format_exc())
|
logger.error(trimmed_format_exc())
|
||||||
|
|
||||||
# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
|
# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
|
||||||
yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
|
yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
|
||||||
if any(item in yi_models for item in AVAIL_LLM_MODELS):
|
if any(item in yi_models for item in AVAIL_LLM_MODELS):
|
||||||
|
|||||||
@@ -368,12 +368,12 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
|||||||
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
|
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
|
||||||
break # 对于符合规范的接口,这里可以break
|
break # 对于符合规范的接口,这里可以break
|
||||||
else:
|
else:
|
||||||
continue # 对于不符合规范的狗屎接口,这里需要继续
|
continue # 对于不符合规范的接口,这里需要继续
|
||||||
# 到这里,我们已经可以假定必须包含choice了
|
# 到这里,我们已经可以假定必须包含choice了
|
||||||
try:
|
try:
|
||||||
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
|
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
|
||||||
except:
|
except:
|
||||||
logger.error(f"一些垃圾第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}")
|
logger.error(f"一些第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}")
|
||||||
# 处理数据流的主体
|
# 处理数据流的主体
|
||||||
if has_content:
|
if has_content:
|
||||||
# 正常情况
|
# 正常情况
|
||||||
@@ -382,9 +382,9 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
|||||||
# 一些第三方接口的出现这样的错误,兼容一下吧
|
# 一些第三方接口的出现这样的错误,兼容一下吧
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# 至此已经超出了正常接口应该进入的范围,一些垃圾第三方接口会出现这样的错误
|
# 至此已经超出了正常接口应该进入的范围,一些第三方接口会出现这样的错误
|
||||||
if chunkjson['choices'][0]["delta"].get("content", None) is None:
|
if chunkjson['choices'][0]["delta"].get("content", None) is None:
|
||||||
logger.error(f"一些垃圾第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}")
|
logger.error(f"一些第三方接口出现这样的错误,兼容一下吧: {chunk_decoded}")
|
||||||
continue
|
continue
|
||||||
gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
|
gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from toolbox import get_conf
|
|||||||
import threading
|
import threading
|
||||||
|
|
||||||
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
||||||
|
model_prefix_to_remove = 'dashscope-'
|
||||||
|
|
||||||
class QwenRequestInstance():
|
class QwenRequestInstance():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -20,6 +21,13 @@ class QwenRequestInstance():
|
|||||||
raise RuntimeError('请配置 DASHSCOPE_API_KEY')
|
raise RuntimeError('请配置 DASHSCOPE_API_KEY')
|
||||||
dashscope.api_key = get_conf("DASHSCOPE_API_KEY")
|
dashscope.api_key = get_conf("DASHSCOPE_API_KEY")
|
||||||
|
|
||||||
|
def format_reasoning(self, reasoning_content:str, main_content:str):
|
||||||
|
if reasoning_content:
|
||||||
|
reasoning_content_paragraphs = ''.join([f'<p style="margin: 1.25em 0;">{line}</p>' for line in reasoning_content.split('\n')])
|
||||||
|
formatted_reasoning_content = f'<div class="reasoning_process">{reasoning_content_paragraphs}</div>\n\n---\n\n'
|
||||||
|
return formatted_reasoning_content + main_content
|
||||||
|
else:
|
||||||
|
return main_content
|
||||||
|
|
||||||
def generate(self, inputs, llm_kwargs, history, system_prompt):
|
def generate(self, inputs, llm_kwargs, history, system_prompt):
|
||||||
# import _thread as thread
|
# import _thread as thread
|
||||||
@@ -28,9 +36,13 @@ class QwenRequestInstance():
|
|||||||
if top_p == 0: top_p += 1e-5
|
if top_p == 0: top_p += 1e-5
|
||||||
if top_p == 1: top_p -= 1e-5
|
if top_p == 1: top_p -= 1e-5
|
||||||
|
|
||||||
|
model_name = llm_kwargs['llm_model']
|
||||||
|
if model_name.startswith(model_prefix_to_remove): model_name = model_name[len(model_prefix_to_remove):]
|
||||||
|
|
||||||
|
self.reasoning_buf = ""
|
||||||
self.result_buf = ""
|
self.result_buf = ""
|
||||||
responses = Generation.call(
|
responses = Generation.call(
|
||||||
model=llm_kwargs['llm_model'],
|
model=model_name,
|
||||||
messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
|
messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
|
||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
temperature=llm_kwargs.get('temperature', 1.0),
|
temperature=llm_kwargs.get('temperature', 1.0),
|
||||||
@@ -46,18 +58,24 @@ class QwenRequestInstance():
|
|||||||
self.result_buf += response.output.choices[0].message.content
|
self.result_buf += response.output.choices[0].message.content
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
yield self.result_buf
|
yield self.format_reasoning(self.reasoning_buf, self.result_buf)
|
||||||
break
|
break
|
||||||
elif response.output.choices[0].finish_reason == 'length':
|
elif response.output.choices[0].finish_reason == 'length':
|
||||||
self.result_buf += "[Local Message] 生成长度过长,后续输出被截断"
|
self.result_buf += "[Local Message] 生成长度过长,后续输出被截断"
|
||||||
yield self.result_buf
|
yield self.format_reasoning(self.reasoning_buf, self.result_buf)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
try:
|
||||||
|
contain_reasoning = hasattr(response.output.choices[0].message, 'reasoning_content')
|
||||||
|
except:
|
||||||
|
contain_reasoning = False
|
||||||
|
if contain_reasoning:
|
||||||
|
self.reasoning_buf += response.output.choices[0].message.reasoning_content
|
||||||
self.result_buf += response.output.choices[0].message.content
|
self.result_buf += response.output.choices[0].message.content
|
||||||
yield self.result_buf
|
yield self.format_reasoning(self.reasoning_buf, self.result_buf)
|
||||||
else:
|
else:
|
||||||
self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}"
|
self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}"
|
||||||
yield self.result_buf
|
yield self.format_reasoning(self.reasoning_buf, self.result_buf)
|
||||||
break
|
break
|
||||||
|
|
||||||
# 耗尽generator避免报错
|
# 耗尽generator避免报错
|
||||||
|
|||||||
在新工单中引用
屏蔽一个用户