接入新模型

2025-12-07 23:16:48 +00:00 · 2023-10-28 19:23:43 +08:00
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -483,6 +483,22 @@ if "llama2" in AVAIL_LLM_MODELS:   # llama2
        })
    except:
        print(trimmed_format_exc())
+if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai
+    try:
+        from .bridge_zhipu import predict_no_ui_long_connection as zhipu_noui
+        from .bridge_zhipu import predict as zhipu_ui
+        model_info.update({
+            "zhipuai": {
+                "fn_with_ui": zhipu_ui,
+                "fn_without_ui": zhipu_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())

 # <-- 用于定义和切换多个azure模型 -->
 AZURE_CFG_ARRAY, = get_conf("AZURE_CFG_ARRAY")
--- a/request_llms/bridge_chatglm.py
+++ b/request_llms/bridge_chatglm.py
@@ -155,13 +155,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        history_feedin.append([history[2*i], history[2*i+1]] )

    # 开始接收chatglm的回复
-    response = "[Local Message]: 等待ChatGLM响应中 ..."
+    response = "[Local Message] 等待ChatGLM响应中 ..."
    for response in glm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
        chatbot[-1] = (inputs, response)
        yield from update_ui(chatbot=chatbot, history=history)

    # 总结输出
-    if response == "[Local Message]: 等待ChatGLM响应中 ...":
-        response = "[Local Message]: ChatGLM响应异常 ..."
+    if response == "[Local Message] 等待ChatGLM响应中 ...":
+        response = "[Local Message] ChatGLM响应异常 ..."
    history.extend([inputs, response])
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_chatglmft.py
+++ b/request_llms/bridge_chatglmft.py
@@ -195,13 +195,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        history_feedin.append([history[2*i], history[2*i+1]] )

    # 开始接收chatglmft的回复
-    response = "[Local Message]: 等待ChatGLMFT响应中 ..."
+    response = "[Local Message] 等待ChatGLMFT响应中 ..."
    for response in glmft_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
        chatbot[-1] = (inputs, response)
        yield from update_ui(chatbot=chatbot, history=history)

    # 总结输出
-    if response == "[Local Message]: 等待ChatGLMFT响应中 ...":
-        response = "[Local Message]: ChatGLMFT响应异常 ..."
+    if response == "[Local Message] 等待ChatGLMFT响应中 ...":
+        response = "[Local Message] ChatGLMFT响应异常 ..."
    history.extend([inputs, response])
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_jittorllms_llama.py
+++ b/request_llms/bridge_jittorllms_llama.py
@@ -163,13 +163,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        history_feedin.append([history[2*i], history[2*i+1]] )

    # 开始接收jittorllms的回复
-    response = "[Local Message]: 等待jittorllms响应中 ..."
+    response = "[Local Message] 等待jittorllms响应中 ..."
    for response in llama_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
        chatbot[-1] = (inputs, response)
        yield from update_ui(chatbot=chatbot, history=history)

    # 总结输出
-    if response == "[Local Message]: 等待jittorllms响应中 ...":
-        response = "[Local Message]: jittorllms响应异常 ..."
+    if response == "[Local Message] 等待jittorllms响应中 ...":
+        response = "[Local Message] jittorllms响应异常 ..."
    history.extend([inputs, response])
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_jittorllms_pangualpha.py
+++ b/request_llms/bridge_jittorllms_pangualpha.py
@@ -163,13 +163,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        history_feedin.append([history[2*i], history[2*i+1]] )

    # 开始接收jittorllms的回复
-    response = "[Local Message]: 等待jittorllms响应中 ..."
+    response = "[Local Message] 等待jittorllms响应中 ..."
    for response in pangu_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
        chatbot[-1] = (inputs, response)
        yield from update_ui(chatbot=chatbot, history=history)

    # 总结输出
-    if response == "[Local Message]: 等待jittorllms响应中 ...":
-        response = "[Local Message]: jittorllms响应异常 ..."
+    if response == "[Local Message] 等待jittorllms响应中 ...":
+        response = "[Local Message] jittorllms响应异常 ..."
    history.extend([inputs, response])
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_jittorllms_rwkv.py
+++ b/request_llms/bridge_jittorllms_rwkv.py
@@ -163,13 +163,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        history_feedin.append([history[2*i], history[2*i+1]] )

    # 开始接收jittorllms的回复
-    response = "[Local Message]: 等待jittorllms响应中 ..."
+    response = "[Local Message] 等待jittorllms响应中 ..."
    for response in rwkv_glm_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
        chatbot[-1] = (inputs, response)
        yield from update_ui(chatbot=chatbot, history=history)

    # 总结输出
-    if response == "[Local Message]: 等待jittorllms响应中 ...":
-        response = "[Local Message]: jittorllms响应异常 ..."
+    if response == "[Local Message] 等待jittorllms响应中 ...":
+        response = "[Local Message] jittorllms响应异常 ..."
    history.extend([inputs, response])
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_moss.py
+++ b/request_llms/bridge_moss.py
@@ -219,7 +219,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
            moss_handle = None
            return
    else:
-        response = "[Local Message]: 等待MOSS响应中 ..."
+        response = "[Local Message] 等待MOSS响应中 ..."
        chatbot[-1] = (inputs, response)
        yield from update_ui(chatbot=chatbot, history=history)

@@ -238,7 +238,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        yield from update_ui(chatbot=chatbot, history=history)

    # 总结输出
-    if response == "[Local Message]: 等待MOSS响应中 ...":
-        response = "[Local Message]: MOSS响应异常 ..."
+    if response == "[Local Message] 等待MOSS响应中 ...":
+        response = "[Local Message] MOSS响应异常 ..."
    history.extend([inputs, response.strip('<|MOSS|>: ')])
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_newbingfree.py
+++ b/request_llms/bridge_newbingfree.py
@@ -199,7 +199,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",

    watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
    response = ""
-    if len(observe_window) >= 1: observe_window[0] = "[Local Message]: 等待NewBing响应中 ..."
+    if len(observe_window) >= 1: observe_window[0] = "[Local Message] 等待NewBing响应中 ..."
    for response in newbingfree_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
        if len(observe_window) >= 1:  observe_window[0] = preprocess_newbing_out_simple(response)
        if len(observe_window) >= 2:  
@@ -212,7 +212,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        单线程方法
        函数的说明请见 request_llms/bridge_all.py
    """
-    chatbot.append((inputs, "[Local Message]: 等待NewBing响应中 ..."))
+    chatbot.append((inputs, "[Local Message] 等待NewBing响应中 ..."))

    global newbingfree_handle
    if (newbingfree_handle is None) or (not newbingfree_handle.success):
@@ -231,13 +231,13 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
    for i in range(len(history)//2):
        history_feedin.append([history[2*i], history[2*i+1]] )

-    chatbot[-1] = (inputs, "[Local Message]: 等待NewBing响应中 ...")
-    response = "[Local Message]: 等待NewBing响应中 ..."
+    chatbot[-1] = (inputs, "[Local Message] 等待NewBing响应中 ...")
+    response = "[Local Message] 等待NewBing响应中 ..."
    yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
    for response in newbingfree_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
        chatbot[-1] = (inputs, preprocess_newbing_out(response))
        yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
-    if response == "[Local Message]: 等待NewBing响应中 ...": response = "[Local Message]: NewBing响应异常，请刷新界面重试 ..."
+    if response == "[Local Message] 等待NewBing响应中 ...": response = "[Local Message] NewBing响应异常，请刷新界面重试 ..."
    history.extend([inputs, response])
    logging.info(f'[raw_input] {inputs}')
    logging.info(f'[response] {response}')
--- a/request_llms/bridge_qianfan.py
+++ b/request_llms/bridge_qianfan.py
@@ -158,8 +158,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        return
    
    # 总结输出
-    response = f"[Local Message]: {model_name}响应异常 ..."
-    if response == f"[Local Message]: 等待{model_name}响应中 ...":
-        response = f"[Local Message]: {model_name}响应异常 ..."
+    response = f"[Local Message] {model_name}响应异常 ..."
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
    history.extend([inputs, response])
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_spark.py
+++ b/request_llms/bridge_spark.py
@@ -42,7 +42,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
    yield from update_ui(chatbot=chatbot, history=history)

    if validate_key() is False:
-        yield from update_ui_lastest_msg(lastmsg="[Local Message]: 请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET", chatbot=chatbot, history=history, delay=0)
+        yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET", chatbot=chatbot, history=history, delay=0)
        return

    if additional_fn is not None:
@@ -57,7 +57,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        yield from update_ui(chatbot=chatbot, history=history)

    # 总结输出
-    if response == f"[Local Message]: 等待{model_name}响应中 ...":
-        response = f"[Local Message]: {model_name}响应异常 ..."
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
    history.extend([inputs, response])
    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/bridge_stackclaude.py
+++ b/request_llms/bridge_stackclaude.py
@@ -222,7 +222,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",

    watch_dog_patience = 5  # 看门狗 (watchdog) 的耐心, 设置5秒即可
    response = ""
-    observe_window[0] = "[Local Message]: 等待Claude响应中 ..."
+    observe_window[0] = "[Local Message] 等待Claude响应中 ..."
    for response in claude_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
        observe_window[0] = preprocess_newbing_out_simple(response)
        if len(observe_window) >= 2:
@@ -236,7 +236,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
        单线程方法
        函数的说明请见 request_llms/bridge_all.py
    """
-    chatbot.append((inputs, "[Local Message]: 等待Claude响应中 ..."))
+    chatbot.append((inputs, "[Local Message] 等待Claude响应中 ..."))

    global claude_handle
    if (claude_handle is None) or (not claude_handle.success):
@@ -255,14 +255,14 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
    for i in range(len(history)//2):
        history_feedin.append([history[2*i], history[2*i+1]])

-    chatbot[-1] = (inputs, "[Local Message]: 等待Claude响应中 ...")
-    response = "[Local Message]: 等待Claude响应中 ..."
+    chatbot[-1] = (inputs, "[Local Message] 等待Claude响应中 ...")
+    response = "[Local Message] 等待Claude响应中 ..."
    yield from update_ui(chatbot=chatbot, history=history, msg="Claude响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
    for response in claude_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt):
        chatbot[-1] = (inputs, preprocess_newbing_out(response))
        yield from update_ui(chatbot=chatbot, history=history, msg="Claude响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
-    if response == "[Local Message]: 等待Claude响应中 ...":
-        response = "[Local Message]: Claude响应异常，请刷新界面重试 ..."
+    if response == "[Local Message] 等待Claude响应中 ...":
+        response = "[Local Message] Claude响应异常，请刷新界面重试 ..."
    history.extend([inputs, response])
    logging.info(f'[raw_input] {inputs}')
    logging.info(f'[response] {response}')
--- a/request_llms/bridge_zhipu.py
+++ b/request_llms/bridge_zhipu.py
@@ -0,0 +1,59 @@
+
+import time
+from toolbox import update_ui, get_conf, update_ui_lastest_msg
+
+model_name = '智谱AI大模型'
+
+def validate_key():
+    ZHIPUAI_API_KEY, = get_conf("ZHIPUAI_API_KEY")
+    if ZHIPUAI_API_KEY == '': return False
+    return True
+
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        ⭐多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    watch_dog_patience = 5
+    response = ""
+
+    if validate_key() is False:
+        raise RuntimeError('请配置ZHIPUAI_API_KEY')
+
+    from .com_zhipuapi import ZhipuRequestInstance
+    sri = ZhipuRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
+        if len(observe_window) >= 1:
+            observe_window[0] = response
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
+    return response
+
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        ⭐单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history)
+
+    if validate_key() is False:
+        yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置ZHIPUAI_API_KEY", chatbot=chatbot, history=history, delay=0)
+        return
+
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+
+    # 开始接收回复    
+    from .com_zhipuapi import ZhipuRequestInstance
+    sri = ZhipuRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
+
+    # 总结输出
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llms/com_zhipuapi.py
+++ b/request_llms/com_zhipuapi.py
@@ -0,0 +1,67 @@
+from toolbox import get_conf
+import threading
+import logging
+
+timeout_bot_msg = '[Local Message] Request timeout. Network error.'
+
+class ZhipuRequestInstance():
+    def __init__(self):
+
+        self.time_to_yield_event = threading.Event()
+        self.time_to_exit_event = threading.Event()
+
+        self.result_buf = ""
+
+    def generate(self, inputs, llm_kwargs, history, system_prompt):
+        # import _thread as thread
+        import zhipuai
+        ZHIPUAI_API_KEY, ZHIPUAI_MODEL = get_conf("ZHIPUAI_API_KEY", "ZHIPUAI_MODEL")
+        zhipuai.api_key = ZHIPUAI_API_KEY
+        self.result_buf = ""
+        response = zhipuai.model_api.sse_invoke(
+            model=ZHIPUAI_MODEL,
+            prompt=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
+            top_p=llm_kwargs['top_p'],
+            temperature=llm_kwargs['temperature'],
+        )
+        for event in response.events():
+            if event.event == "add":
+                self.result_buf += event.data
+                yield self.result_buf
+            elif event.event == "error" or event.event == "interrupted":
+                raise RuntimeError("Unknown error:" + event.data)
+            elif event.event == "finish":
+                yield self.result_buf
+                break
+            else:
+                raise RuntimeError("Unknown error:" + str(event))
+            
+        logging.info(f'[raw_input] {inputs}')
+        logging.info(f'[response] {self.result_buf}')
+        return self.result_buf
+
+def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
+    conversation_cnt = len(history) // 2
+    messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "":
+                    continue
+                if what_gpt_answer["content"] == timeout_bot_msg:
+                    continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    return messages
--- a/request_llms/local_llm_class.py
+++ b/request_llms/local_llm_class.py
@@ -166,14 +166,14 @@ def get_local_llm_predict_fns(LLMSingletonClass, model_name):
            history_feedin.append([history[2*i], history[2*i+1]] )

        # 开始接收回复
-        response = f"[Local Message]: 等待{model_name}响应中 ..."
+        response = f"[Local Message] 等待{model_name}响应中 ..."
        for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
            chatbot[-1] = (inputs, response)
            yield from update_ui(chatbot=chatbot, history=history)

        # 总结输出
-        if response == f"[Local Message]: 等待{model_name}响应中 ...":
-            response = f"[Local Message]: {model_name}响应异常 ..."
+        if response == f"[Local Message] 等待{model_name}响应中 ...":
+            response = f"[Local Message] {model_name}响应异常 ..."
        history.extend([inputs, response])
        yield from update_ui(chatbot=chatbot, history=history)