From 72dbe856d29b20ecf1932c9dde1160a3278cb4e6 Mon Sep 17 00:00:00 2001 From: littleolaf <74704650+littleolaf@users.noreply.github.com> Date: Tue, 4 Mar 2025 23:58:03 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=8E=A5=E5=85=A5=20?= =?UTF-8?q?=E7=81=AB=E5=B1=B1=E5=BC=95=E6=93=8E=E5=9C=A8=E7=BA=BF=E5=A4=A7?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=20=E5=86=85=E5=AE=B9=E7=9A=84=E6=94=AF?= =?UTF-8?q?=E6=8C=81=20(#2165)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * use oai adaptive bridge function to handle vol engine * add vol engine deepseek v3 --------- Co-authored-by: binary-husky --- config.py | 7 +- request_llms/bridge_all.py | 62 ++++++++++++++++- request_llms/oai_std_model_template.py | 21 ++++-- tests/test_llms.py | 95 +++++++++++++++----------- 4 files changed, 140 insertions(+), 45 deletions(-) diff --git a/config.py b/config.py index 709d4455..e668ac90 100644 --- a/config.py +++ b/config.py @@ -43,7 +43,8 @@ AVAIL_LLM_MODELS = ["qwen-max", "o1-mini", "o1-mini-2024-09-12", "o1", "o1-2024- "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo", "gemini-1.5-pro", "chatglm3", "chatglm4", - "deepseek-chat", "deepseek-coder", "deepseek-reasoner" + "deepseek-chat", "deepseek-coder", "deepseek-reasoner", + "volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226", ] EMBEDDING_MODEL = "text-embedding-3-small" @@ -267,6 +268,10 @@ MOONSHOT_API_KEY = "" YIMODEL_API_KEY = "" +# 接入火山引擎的在线大模型),api-key获取地址 https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint +ARK_API_KEY = "00000000-0000-0000-0000-000000000000" # 火山引擎 API KEY + + # 紫东太初大模型 https://ai-maas.wair.ac.cn TAICHU_API_KEY = "" diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py index 100f285e..e911a765 100644 --- a/request_llms/bridge_all.py +++ b/request_llms/bridge_all.py @@ -80,6 +80,7 @@ ollama_endpoint = "http://localhost:11434/api/chat" yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions" deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions" grok_model_endpoint = "https://api.x.ai/v1/chat/completions" +volcengine_endpoint = "https://ark.cn-beijing.volces.com/api/v3/chat/completions" if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/' azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15' @@ -102,6 +103,7 @@ if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollam if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint] if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint] if grok_model_endpoint in API_URL_REDIRECT: grok_model_endpoint = API_URL_REDIRECT[grok_model_endpoint] +if volcengine_endpoint in API_URL_REDIRECT: volcengine_endpoint = API_URL_REDIRECT[volcengine_endpoint] # 获取tokenizer tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo") @@ -954,7 +956,7 @@ if any(item in grok_models for item in AVAIL_LLM_MODELS): try: grok_beta_128k_noui, grok_beta_128k_ui = get_predict_function( api_key_conf_name="GROK_API_KEY", max_output_token=8192, disable_proxy=False - ) + ) model_info.update({ "grok-beta": { @@ -1089,8 +1091,10 @@ if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder }) except: logger.error(trimmed_format_exc()) + # -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=- -if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS: +claude_models = ["deepseek-chat", "deepseek-coder", "deepseek-reasoner"] +if any(item in claude_models for item in AVAIL_LLM_MODELS): try: deepseekapi_noui, deepseekapi_ui = get_predict_function( api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False @@ -1127,6 +1131,60 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS o }) except: logger.error(trimmed_format_exc()) + +# -=-=-=-=-=-=- 火山引擎 对齐支持 -=-=-=-=-=-=- +for model in [m for m in AVAIL_LLM_MODELS if m.startswith("volcengine-")]: + # 为了更灵活地接入volcengine多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["volcengine-deepseek-r1-250120(max_token=6666)"] + # 其中 + # "volcengine-" 是前缀(必要) + # "deepseek-r1-250120" 是模型名(必要) + # "(max_token=6666)" 是配置(非必要) + model_info_extend = model_info + model_info_extend.update({ + "deepseek-r1-250120": { + "max_token": 16384, + "enable_reasoning": True, + "can_multi_thread": True, + "endpoint": volcengine_endpoint, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "deepseek-v3-241226": { + "max_token": 16384, + "enable_reasoning": False, + "can_multi_thread": True, + "endpoint": volcengine_endpoint, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + }) + try: + origin_model_name, max_token_tmp = read_one_api_model_name(model) + # 如果是已知模型,则尝试获取其信息 + original_model_info = model_info_extend.get(origin_model_name.replace("volcengine-", "", 1), None) + except: + logger.error(f"volcengine模型 {model} 的 max_token 配置不是整数,请检查配置文件。") + continue + + volcengine_noui, volcengine_ui = get_predict_function(api_key_conf_name="ARK_API_KEY", max_output_token=8192, disable_proxy=True, model_remove_prefix = ["volcengine-"]) + + this_model_info = { + "fn_with_ui": volcengine_ui, + "fn_without_ui": volcengine_noui, + "endpoint": volcengine_endpoint, + "can_multi_thread": True, + "max_token": 64000, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + } + + # 同步已知模型的其他信息 + attribute = "has_multimodal_capacity" + if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)}) + attribute = "enable_reasoning" + if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)}) + model_info.update({model: this_model_info}) + # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=- for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]: # 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"] diff --git a/request_llms/oai_std_model_template.py b/request_llms/oai_std_model_template.py index d50e0d27..66f3a0d7 100644 --- a/request_llms/oai_std_model_template.py +++ b/request_llms/oai_std_model_template.py @@ -57,7 +57,7 @@ def decode_chunk(chunk): finish_reason = chunk["error"]["code"] except: finish_reason = "API_ERROR" - return response, reasoning_content, finish_reason + return response, reasoning_content, finish_reason, str(chunk) try: if chunk["choices"][0]["delta"]["content"] is not None: @@ -122,7 +122,8 @@ def generate_message(input, model, key, history, max_output_token, system_prompt def get_predict_function( api_key_conf_name, max_output_token, - disable_proxy = False + disable_proxy = False, + model_remove_prefix = [], ): """ 为openai格式的API生成响应函数,其中传入参数: @@ -137,6 +138,16 @@ def get_predict_function( APIKEY = get_conf(api_key_conf_name) + def remove_prefix(model_name): + # 去除模型名字的前缀,输入 volcengine-deepseek-r1-250120 会返回 deepseek-r1-250120 + if not model_remove_prefix: + return model_name + model_without_prefix = model_name + for prefix in model_remove_prefix: + if model_without_prefix.startswith(prefix): + model_without_prefix = model_without_prefix[len(prefix):] + return model_without_prefix + def predict_no_ui_long_connection( inputs, llm_kwargs, @@ -164,9 +175,11 @@ def get_predict_function( raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}") if inputs == "": inputs = "你好👋" + + headers, payload = generate_message( input=inputs, - model=llm_kwargs["llm_model"], + model=remove_prefix(llm_kwargs["llm_model"]), key=APIKEY, history=history, max_output_token=max_output_token, @@ -302,7 +315,7 @@ def get_predict_function( headers, payload = generate_message( input=inputs, - model=llm_kwargs["llm_model"], + model=remove_prefix(llm_kwargs["llm_model"]), key=APIKEY, history=history, max_output_token=max_output_token, diff --git a/tests/test_llms.py b/tests/test_llms.py index 20d25787..ccfe1d82 100644 --- a/tests/test_llms.py +++ b/tests/test_llms.py @@ -11,46 +11,65 @@ def validate_path(): validate_path() # validate path so you can run from base directory +if __name__ == "__main__": + # from request_llms.bridge_taichu import predict_no_ui_long_connection + from request_llms.bridge_volcengine import predict_no_ui_long_connection + # from request_llms.bridge_cohere import predict_no_ui_long_connection + # from request_llms.bridge_spark import predict_no_ui_long_connection + # from request_llms.bridge_zhipu import predict_no_ui_long_connection + # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection + llm_kwargs = { + "llm_model": "volcengine", + "max_length": 4096, + "top_p": 1, + "temperature": 1, + } -if "在线模型": - if __name__ == "__main__": - from request_llms.bridge_taichu import predict_no_ui_long_connection - # from request_llms.bridge_cohere import predict_no_ui_long_connection - # from request_llms.bridge_spark import predict_no_ui_long_connection - # from request_llms.bridge_zhipu import predict_no_ui_long_connection - # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection - llm_kwargs = { - "llm_model": "taichu", - "max_length": 4096, - "top_p": 1, - "temperature": 1, - } + result = predict_no_ui_long_connection( + inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt="系统" + ) + print("final result:", result) + print("final result:", result) +# if "在线模型": +# if __name__ == "__main__": +# # from request_llms.bridge_taichu import predict_no_ui_long_connection +# from request_llms.bridge_volcengine import predict_no_ui_long_connection +# # from request_llms.bridge_cohere import predict_no_ui_long_connection +# # from request_llms.bridge_spark import predict_no_ui_long_connection +# # from request_llms.bridge_zhipu import predict_no_ui_long_connection +# # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection +# llm_kwargs = { +# "llm_model": "ep-20250222011816-5cq8z", +# "max_length": 4096, +# "top_p": 1, +# "temperature": 1, +# } - result = predict_no_ui_long_connection( - inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt="系统" - ) - print("final result:", result) - print("final result:", result) +# result = predict_no_ui_long_connection( +# inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt="系统" +# ) +# print("final result:", result) +# print("final result:", result) -if "本地模型": - if __name__ == "__main__": - # from request_llms.bridge_newbingfree import predict_no_ui_long_connection - # from request_llms.bridge_moss import predict_no_ui_long_connection - # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection - # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection - # from request_llms.bridge_claude import predict_no_ui_long_connection - # from request_llms.bridge_internlm import predict_no_ui_long_connection - # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection - # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection - # from request_llms.bridge_qwen_local import predict_no_ui_long_connection - llm_kwargs = { - "max_length": 4096, - "top_p": 1, - "temperature": 1, - } - result = predict_no_ui_long_connection( - inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt="" - ) - print("final result:", result) +# if "本地模型": +# if __name__ == "__main__": +# # from request_llms.bridge_newbingfree import predict_no_ui_long_connection +# # from request_llms.bridge_moss import predict_no_ui_long_connection +# # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection +# # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection +# # from request_llms.bridge_claude import predict_no_ui_long_connection +# # from request_llms.bridge_internlm import predict_no_ui_long_connection +# # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection +# # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection +# # from request_llms.bridge_qwen_local import predict_no_ui_long_connection +# llm_kwargs = { +# "max_length": 4096, +# "top_p": 1, +# "temperature": 1, +# } +# result = predict_no_ui_long_connection( +# inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt="" +# ) +# print("final result:", result)