添加接入火山引擎在线大模型内容的支持 (#2165)

* use oai adaptive bridge function to handle vol engine * add vol engine deepseek v3 --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com>
2025-12-06 06:26:47 +00:00 · 2025-03-04 23:58:03 +08:00
--- a/config.py
+++ b/config.py
@@ -43,7 +43,8 @@ AVAIL_LLM_MODELS = ["qwen-max", "o1-mini", "o1-mini-2024-09-12", "o1", "o1-2024-
                    "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
                    "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
                    "gemini-1.5-pro", "chatglm3", "chatglm4",
-                    "deepseek-chat", "deepseek-coder", "deepseek-reasoner"
+                    "deepseek-chat", "deepseek-coder", "deepseek-reasoner", 
                    "volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226",
                    ]
 EMBEDDING_MODEL = "text-embedding-3-small"
@@ -267,6 +268,10 @@ MOONSHOT_API_KEY = ""
 YIMODEL_API_KEY = ""
 # 接入火山引擎的在线大模型)，api-key获取地址 https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint
 ARK_API_KEY = "00000000-0000-0000-0000-000000000000" # 火山引擎 API KEY
 # 紫东太初大模型 https://ai-maas.wair.ac.cn
 TAICHU_API_KEY = ""
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -80,6 +80,7 @@ ollama_endpoint = "http://localhost:11434/api/chat"
 yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
 deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
 grok_model_endpoint = "https://api.x.ai/v1/chat/completions"
 volcengine_endpoint = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"
 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -102,6 +103,7 @@ if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollam
 if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
 if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
 if grok_model_endpoint in API_URL_REDIRECT: grok_model_endpoint = API_URL_REDIRECT[grok_model_endpoint]
 if volcengine_endpoint in API_URL_REDIRECT: volcengine_endpoint = API_URL_REDIRECT[volcengine_endpoint]
 # 获取tokenizer
 tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -1089,8 +1091,10 @@ if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
        })
    except:
        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
-if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS:
+claude_models = ["deepseek-chat", "deepseek-coder", "deepseek-reasoner"]
 if any(item in claude_models for item in AVAIL_LLM_MODELS):
    try:
        deepseekapi_noui, deepseekapi_ui = get_predict_function(
            api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False
@@ -1127,6 +1131,60 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS o
        })
    except:
        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 火山引擎 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("volcengine-")]:
    # 为了更灵活地接入volcengine多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["volcengine-deepseek-r1-250120(max_token=6666)"]
    # 其中
    #   "volcengine-"          是前缀（必要）
    #   "deepseek-r1-250120"   是模型名（必要）
    #   "(max_token=6666)"     是配置（非必要）
    model_info_extend = model_info
    model_info_extend.update({
        "deepseek-r1-250120": {
            "max_token": 16384,
            "enable_reasoning": True,
            "can_multi_thread": True,
            "endpoint": volcengine_endpoint,
            "tokenizer": tokenizer_gpt35,
            "token_cnt": get_token_num_gpt35,
        },
        "deepseek-v3-241226": {
            "max_token": 16384,
            "enable_reasoning": False,
            "can_multi_thread": True,
            "endpoint": volcengine_endpoint,
            "tokenizer": tokenizer_gpt35,
            "token_cnt": get_token_num_gpt35,
        },
    })
    try:
        origin_model_name, max_token_tmp = read_one_api_model_name(model)
        # 如果是已知模型，则尝试获取其信息
        original_model_info = model_info_extend.get(origin_model_name.replace("volcengine-", "", 1), None)
    except:
        logger.error(f"volcengine模型 {model} 的 max_token 配置不是整数，请检查配置文件。")
        continue
    volcengine_noui, volcengine_ui = get_predict_function(api_key_conf_name="ARK_API_KEY", max_output_token=8192, disable_proxy=True, model_remove_prefix = ["volcengine-"])
    this_model_info = {
        "fn_with_ui": volcengine_ui,
        "fn_without_ui": volcengine_noui,
        "endpoint": volcengine_endpoint,
        "can_multi_thread": True,
        "max_token": 64000,
        "tokenizer": tokenizer_gpt35,
        "token_cnt": get_token_num_gpt35,
    }
    # 同步已知模型的其他信息
    attribute = "has_multimodal_capacity"
    if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
    attribute = "enable_reasoning"
    if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
    model_info.update({model: this_model_info})
 # -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
 for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
    # 为了更灵活地接入one-api多模型管理界面，设计了此接口，例子：AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
--- a/request_llms/oai_std_model_template.py
+++ b/request_llms/oai_std_model_template.py
@@ -57,7 +57,7 @@ def decode_chunk(chunk):
            finish_reason = chunk["error"]["code"]
        except:
            finish_reason = "API_ERROR"
-        return response, reasoning_content, finish_reason
+        return response, reasoning_content, finish_reason, str(chunk)
    try:
        if chunk["choices"][0]["delta"]["content"] is not None:
@@ -122,7 +122,8 @@ def generate_message(input, model, key, history, max_output_token, system_prompt
 def get_predict_function(
        api_key_conf_name,
        max_output_token,
-        disable_proxy = False
+        disable_proxy = False,
        model_remove_prefix = [],
    ):
    """
    为openai格式的API生成响应函数，其中传入参数：
@@ -137,6 +138,16 @@ def get_predict_function(
    APIKEY = get_conf(api_key_conf_name)
    def remove_prefix(model_name):
        # 去除模型名字的前缀，输入 volcengine-deepseek-r1-250120 会返回 deepseek-r1-250120
        if not model_remove_prefix:
            return model_name
        model_without_prefix = model_name
        for prefix in model_remove_prefix:
            if model_without_prefix.startswith(prefix):
                model_without_prefix = model_without_prefix[len(prefix):]
        return model_without_prefix
    def predict_no_ui_long_connection(
        inputs,
        llm_kwargs,
@@ -164,9 +175,11 @@ def get_predict_function(
            raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
        if inputs == "":
            inputs = "你好👋"
        headers, payload = generate_message(
            input=inputs,
-            model=llm_kwargs["llm_model"],
+            model=remove_prefix(llm_kwargs["llm_model"]),
            key=APIKEY,
            history=history,
            max_output_token=max_output_token,
@@ -302,7 +315,7 @@ def get_predict_function(
        headers, payload = generate_message(
            input=inputs,
-            model=llm_kwargs["llm_model"],
+            model=remove_prefix(llm_kwargs["llm_model"]),
            key=APIKEY,
            history=history,
            max_output_token=max_output_token,
--- a/tests/test_llms.py
+++ b/tests/test_llms.py
@@ -11,16 +11,15 @@ def validate_path():
 validate_path()  # validate path so you can run from base directory
 if "在线模型":
 if __name__ == "__main__":
-        from request_llms.bridge_taichu import predict_no_ui_long_connection
+    # from request_llms.bridge_taichu import predict_no_ui_long_connection
    from request_llms.bridge_volcengine import predict_no_ui_long_connection
    # from request_llms.bridge_cohere import predict_no_ui_long_connection
    # from request_llms.bridge_spark import predict_no_ui_long_connection
    # from request_llms.bridge_zhipu import predict_no_ui_long_connection
    # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
    llm_kwargs = {
-            "llm_model": "taichu",
+        "llm_model": "volcengine",
        "max_length": 4096,
        "top_p": 1,
        "temperature": 1,
@@ -31,26 +30,46 @@ if "在线模型":
    )
    print("final result:", result)
    print("final result:", result)
 # if "在线模型":
 #     if __name__ == "__main__":
 #         # from request_llms.bridge_taichu import predict_no_ui_long_connection
 #         from request_llms.bridge_volcengine import predict_no_ui_long_connection
 #         # from request_llms.bridge_cohere import predict_no_ui_long_connection
 #         # from request_llms.bridge_spark import predict_no_ui_long_connection
 #         # from request_llms.bridge_zhipu import predict_no_ui_long_connection
 #         # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
 #         llm_kwargs = {
 #             "llm_model": "ep-20250222011816-5cq8z",
 #             "max_length": 4096,
 #             "top_p": 1,
 #             "temperature": 1,
 #         }
 #         result = predict_no_ui_long_connection(
 #             inputs="请问什么是质子？", llm_kwargs=llm_kwargs, history=["你好", "我好！"], sys_prompt="系统"
 #         )
 #         print("final result:", result)
 #         print("final result:", result)
-if "本地模型":
+# if "本地模型":
-    if __name__ == "__main__":
+#     if __name__ == "__main__":
-        # from request_llms.bridge_newbingfree import predict_no_ui_long_connection
+#         # from request_llms.bridge_newbingfree import predict_no_ui_long_connection
-        # from request_llms.bridge_moss import predict_no_ui_long_connection
+#         # from request_llms.bridge_moss import predict_no_ui_long_connection
-        # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
+#         # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
-        # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
+#         # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
-        # from request_llms.bridge_claude import predict_no_ui_long_connection
+#         # from request_llms.bridge_claude import predict_no_ui_long_connection
-        # from request_llms.bridge_internlm import predict_no_ui_long_connection
+#         # from request_llms.bridge_internlm import predict_no_ui_long_connection
-        # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
+#         # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
-        # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
+#         # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
-        # from request_llms.bridge_qwen_local import predict_no_ui_long_connection
+#         # from request_llms.bridge_qwen_local import predict_no_ui_long_connection
-        llm_kwargs = {
+#         llm_kwargs = {
-            "max_length": 4096,
+#             "max_length": 4096,
-            "top_p": 1,
+#             "top_p": 1,
-            "temperature": 1,
+#             "temperature": 1,
-        }
+#         }
-        result = predict_no_ui_long_connection(
+#         result = predict_no_ui_long_connection(
-            inputs="请问什么是质子？", llm_kwargs=llm_kwargs, history=["你好", "我好！"], sys_prompt=""
+#             inputs="请问什么是质子？", llm_kwargs=llm_kwargs, history=["你好", "我好！"], sys_prompt=""
-        )
+#         )
-        print("final result:", result)
+#         print("final result:", result)