add support for Deepseek R1 model and display CoT (#2118)

* feat: add support for R1 model and display CoT * fix unpacking * feat: customized font & font size * auto hide tooltip when scoll down * tooltip glass transparent css * fix: Enhance API key validation in is_any_api_key function (#2113) * support qwen2.5-max! * update minior adjustment --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> Co-authored-by: Steven Moder <java20131114@gmail.com>
2025-12-05 22:16:49 +00:00 · 2025-02-04 16:02:02 +08:00
--- a/config.py
+++ b/config.py
@@ -13,6 +13,9 @@ API_KEY = "在此处填写APIKEY"    # 可同时填写多个API-KEY，用英文
 # [step 1-2]>> ( 接入通义 qwen-max ) 接入通义千问在线大模型，api-key获取地址 https://dashscope.console.aliyun.com/
 DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY

+# [step 1-3]>> ( 接入通义 deepseek-reasoner ) 深度求索(DeepSeek) API KEY，默认请求地址为"https://api.deepseek.com/v1/chat/completions"
+DEEPSEEK_API_KEY = ""
+
 # [step 2]>> 改为True应用代理，如果直接在海外服务器部署，此处不修改；如果使用本地或无地域限制的大模型时，此处也不需要修改
 USE_PROXY = False
 if USE_PROXY:
@@ -39,7 +42,8 @@ AVAIL_LLM_MODELS = ["qwen-max", "o1-mini", "o1-mini-2024-09-12", "o1", "o1-2024-
                    "gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
                    "gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
                    "gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
-                    "gemini-1.5-pro", "chatglm3", "chatglm4"
+                    "gemini-1.5-pro", "chatglm3", "chatglm4",
+                    "deepseek-chat", "deepseek-coder", "deepseek-reasoner"
                    ]

 EMBEDDING_MODEL = "text-embedding-3-small"
@@ -261,9 +265,6 @@ MOONSHOT_API_KEY = ""
 # 零一万物(Yi Model) API KEY
 YIMODEL_API_KEY = ""

-# 深度求索(DeepSeek) API KEY，默认请求地址为"https://api.deepseek.com/v1/chat/completions"
-DEEPSEEK_API_KEY = ""
-

 # 紫东太初大模型 https://ai-maas.wair.ac.cn
 TAICHU_API_KEY = ""
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -1090,18 +1090,18 @@ if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
    except:
        logger.error(trimmed_format_exc())
 # -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
-if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
+if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS:
    try:
        deepseekapi_noui, deepseekapi_ui = get_predict_function(
            api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False
-            )
+        )
        model_info.update({
            "deepseek-chat":{
                "fn_with_ui": deepseekapi_ui,
                "fn_without_ui": deepseekapi_noui,
                "endpoint": deepseekapi_endpoint,
                "can_multi_thread": True,
-                "max_token": 32000,
+                "max_token": 64000,
                "tokenizer": tokenizer_gpt35,
                "token_cnt": get_token_num_gpt35,
            },
@@ -1114,6 +1114,16 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
                "tokenizer": tokenizer_gpt35,
                "token_cnt": get_token_num_gpt35,
            },
+            "deepseek-reasoner":{
+                "fn_with_ui": deepseekapi_ui,
+                "fn_without_ui": deepseekapi_noui,
+                "endpoint": deepseekapi_endpoint,
+                "can_multi_thread": True,
+                "max_token": 64000,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+                "enable_reasoning": True
+            },
        })
    except:
        logger.error(trimmed_format_exc())
--- a/request_llms/oai_std_model_template.py
+++ b/request_llms/oai_std_model_template.py
@@ -36,10 +36,11 @@ def get_full_error(chunk, stream_response):

 def decode_chunk(chunk):
    """
-    用于解读"content"和"finish_reason"的内容
+    用于解读"content"和"finish_reason"的内容（如果支持思维链也会返回"reasoning_content"内容）
    """
    chunk = chunk.decode()
    respose = ""
+    reasoning_content = ""
    finish_reason = "False"
    try:
        chunk = json.loads(chunk[6:])
@@ -57,14 +58,20 @@ def decode_chunk(chunk):
        return respose, finish_reason

    try:
-        respose = chunk["choices"][0]["delta"]["content"]
+        if chunk["choices"][0]["delta"]["content"] is not None:
+            respose = chunk["choices"][0]["delta"]["content"]
+    except:
+        pass
+    try:
+        if chunk["choices"][0]["delta"]["reasoning_content"] is not None:
+            reasoning_content = chunk["choices"][0]["delta"]["reasoning_content"]
    except:
        pass
    try:
        finish_reason = chunk["choices"][0]["finish_reason"]
    except:
        pass
-    return respose, finish_reason
+    return respose, reasoning_content, finish_reason


 def generate_message(input, model, key, history, max_output_token, system_prompt, temperature):
@@ -149,6 +156,7 @@ def get_predict_function(
        observe_window = None：
            用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
        """
+        from .bridge_all import model_info
        watch_dog_patience = 5  # 看门狗的耐心，设置5秒不准咬人(咬的也不是人
        if len(APIKEY) == 0:
            raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
@@ -163,29 +171,21 @@ def get_predict_function(
            system_prompt=sys_prompt,
            temperature=llm_kwargs["temperature"],
        )
+
+        reasoning = model_info[llm_kwargs['llm_model']].get('enable_reasoning', False)
+
        retry = 0
        while True:
            try:
-                from .bridge_all import model_info
-
                endpoint = model_info[llm_kwargs["llm_model"]]["endpoint"]
-                if not disable_proxy:
-                    response = requests.post(
-                        endpoint,
-                        headers=headers,
-                        proxies=proxies,
-                        json=playload,
-                        stream=True,
-                        timeout=TIMEOUT_SECONDS,
-                    )
-                else:
-                    response = requests.post(
-                        endpoint,
-                        headers=headers,
-                        json=playload,
-                        stream=True,
-                        timeout=TIMEOUT_SECONDS,
-                    )
+                response = requests.post(
+                    endpoint,
+                    headers=headers,
+                    proxies=None if disable_proxy else proxies,
+                    json=playload,
+                    stream=True,
+                    timeout=TIMEOUT_SECONDS,
+                )
                break
            except:
                retry += 1
@@ -194,10 +194,13 @@ def get_predict_function(
                    raise TimeoutError
                if MAX_RETRY != 0:
                    logger.error(f"请求超时，正在重试 ({retry}/{MAX_RETRY}) ……")
-
-        stream_response = response.iter_lines()
+        
        result = ""
        finish_reason = ""
+        if reasoning:
+            resoning_buffer = ""
+        
+        stream_response = response.iter_lines()
        while True:
            try:
                chunk = next(stream_response)
@@ -207,9 +210,9 @@ def get_predict_function(
                break
            except requests.exceptions.ConnectionError:
                chunk = next(stream_response)  # 失败了，重试一次？再失败就没办法了。
-            response_text, finish_reason = decode_chunk(chunk)
+            response_text, reasoning_content, finish_reason = decode_chunk(chunk)
            # 返回的数据流第一次为空，继续等待
-            if response_text == "" and finish_reason != "False":
+            if response_text == "" and (reasoning == False or reasoning_content == "") and finish_reason != "False":
                continue
            if response_text == "API_ERROR" and (
                finish_reason != "False" or finish_reason != "stop"
@@ -227,6 +230,8 @@ def get_predict_function(
                            print(f"[response] {result}")
                        break
                    result += response_text
+                    if reasoning:
+                        resoning_buffer += reasoning_content
                    if observe_window is not None:
                        # 观测窗，把已经获取的数据显示出去
                        if len(observe_window) >= 1:
@@ -241,6 +246,10 @@ def get_predict_function(
                    error_msg = chunk_decoded
                    logger.error(error_msg)
                    raise RuntimeError("Json解析不合常规")
+        if reasoning:
+            # reasoning 的部分加上框 (>)
+            return '\n'.join(map(lambda x: '> ' + x, resoning_buffer.split('\n'))) + \
+                   '\n\n' + result
        return result

    def predict(
@@ -262,6 +271,7 @@ def get_predict_function(
        chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
        additional_fn代表点击的哪个按钮，按钮见functional.py
        """
+        from .bridge_all import model_info
        if len(APIKEY) == 0:
            raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
        if inputs == "":
@@ -298,32 +308,23 @@ def get_predict_function(
            system_prompt=system_prompt,
            temperature=llm_kwargs["temperature"],
        )
+        
+        reasoning = model_info[llm_kwargs['llm_model']].get('enable_reasoning', False)

        history.append(inputs)
        history.append("")
        retry = 0
        while True:
            try:
-                from .bridge_all import model_info
-
                endpoint = model_info[llm_kwargs["llm_model"]]["endpoint"]
-                if not disable_proxy:
-                    response = requests.post(
-                        endpoint,
-                        headers=headers,
-                        proxies=proxies,
-                        json=playload,
-                        stream=True,
-                        timeout=TIMEOUT_SECONDS,
-                    )
-                else:
-                    response = requests.post(
-                        endpoint,
-                        headers=headers,
-                        json=playload,
-                        stream=True,
-                        timeout=TIMEOUT_SECONDS,
-                    )
+                response = requests.post(
+                    endpoint,
+                    headers=headers,
+                    proxies=None if disable_proxy else proxies,
+                    json=playload,
+                    stream=True,
+                    timeout=TIMEOUT_SECONDS,
+                )
                break
            except:
                retry += 1
@@ -338,6 +339,8 @@ def get_predict_function(
                    raise TimeoutError

        gpt_replying_buffer = ""
+        if reasoning:
+            gpt_reasoning_buffer = ""

        stream_response = response.iter_lines()
        while True:
@@ -347,9 +350,9 @@ def get_predict_function(
                break
            except requests.exceptions.ConnectionError:
                chunk = next(stream_response)  # 失败了，重试一次？再失败就没办法了。
-            response_text, finish_reason = decode_chunk(chunk)
+            response_text, reasoning_content, finish_reason = decode_chunk(chunk)
            # 返回的数据流第一次为空，继续等待
-            if response_text == "" and finish_reason != "False":
+            if response_text == "" and (reasoning == False or reasoning_content == "") and finish_reason != "False":
                status_text = f"finish_reason: {finish_reason}"
                yield from update_ui(
                    chatbot=chatbot, history=history, msg=status_text
@@ -379,9 +382,14 @@ def get_predict_function(
                        logger.info(f"[response] {gpt_replying_buffer}")
                        break
                    status_text = f"finish_reason: {finish_reason}"
-                    gpt_replying_buffer += response_text
-                    # 如果这里抛出异常，一般是文本过长，详情见get_full_error的输出
-                    history[-1] = gpt_replying_buffer
+                    if reasoning:
+                        gpt_replying_buffer += response_text
+                        gpt_reasoning_buffer += reasoning_content
+                        history[-1] = '\n'.join(map(lambda x: '> ' + x, gpt_reasoning_buffer.split('\n'))) + '\n\n' + gpt_replying_buffer
+                    else:
+                        gpt_replying_buffer += response_text
+                        # 如果这里抛出异常，一般是文本过长，详情见get_full_error的输出
+                        history[-1] = gpt_replying_buffer
                    chatbot[-1] = (history[-2], history[-1])
                    yield from update_ui(
                        chatbot=chatbot, history=history, msg=status_text
--- a/themes/welcome.js
+++ b/themes/welcome.js
@@ -2,12 +2,19 @@ class WelcomeMessage {
    constructor() {
        this.static_welcome_message = [
            {
-                title: "环境配置教程",
-                content: "配置模型和插件，释放大语言模型的学术应用潜力。",
-                svg: "file=themes/svg/conf.svg",
+                title: "改变主题外观",
+                content: "点击「界面外观」，然后「更换UI主题」或「切换界面明暗」。",
+                svg: "file=themes/svg/theme.svg",
                url: "https://github.com/binary-husky/gpt_academic/wiki/%E9%A1%B9%E7%9B%AE%E9%85%8D%E7%BD%AE%E8%AF%B4%E6%98%8E",
            },
            {
+                title: "修改回答语言偏好",    
+                content: "点击「更改模型」，删除「System prompt」并输入「用某语言回答」。",
+                svg: "file=themes/svg/prompt.svg",
+                url: "https://github.com/binary-husky/gpt_academic",
+            },
+            {
+                title: "Arxiv论文一键翻译",
                title: "Arxiv论文翻译",
                content: "无缝切换学术阅读语言，最优英文转中文的学术论文阅读体验。",
                svg: "file=themes/svg/arxiv.svg",
@@ -19,6 +26,12 @@ class WelcomeMessage {
                svg: "file=themes/svg/mm.svg",
                url: "https://github.com/binary-husky/gpt_academic",
            },
+            {
+                title: "获取多个模型的答案",
+                content: "输入问题后点击「询问多个GPT模型」，消耗算子低于单词询问gpt-4o。",
+                svg: "file=themes/svg/model_multiple.svg",
+                url: "https://github.com/binary-husky/gpt_academic",
+            },
            {
                title: "文档与源码批处理",
                content: "您可以将任意文件拖入「此处」，随后调用对应插件功能。",
@@ -52,7 +65,13 @@ class WelcomeMessage {
            {
                title: "实时语音对话",
                content: "配置实时语音对话功能，无须任何激活词，我将一直倾听。",
-                svg: "file=themes/svg/default.svg",
+                svg: "file=themes/svg/voice.svg",
+                url: "https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md",
+            },
+            {
+                title: "联网回答问题",
+                content: "输入问题后，点击右侧插件区的「查互联网后回答」插件。",
+                svg: "file=themes/svg/Internet.svg",
                url: "https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md",
            },
            {
@@ -85,6 +104,7 @@ class WelcomeMessage {
        this.card_array = [];
        this.static_welcome_message_previous = [];
        this.reflesh_time_interval = 15 * 1000;
+        this.update_time_interval = 2 * 1000;
        this.major_title = "欢迎使用GPT-Academic";

        const reflesh_render_status = () => {
@@ -101,12 +121,19 @@ class WelcomeMessage {
        window.addEventListener('resize', this.update.bind(this));
        // add a loop to reflesh cards
        this.startRefleshCards();
+        this.startAutoUpdate();
    }

    begin_render() {
        this.update();
    }

+    async startAutoUpdate() {
+        // sleep certain time
+        await new Promise(r => setTimeout(r, this.update_time_interval));
+        this.update();
+    }
+
    async startRefleshCards() {
        // sleep certain time
        await new Promise(r => setTimeout(r, this.reflesh_time_interval));
@@ -134,6 +161,7 @@ class WelcomeMessage {

        // combine two lists
        this.static_welcome_message_previous = not_shown_previously.concat(already_shown_previously);
+        this.static_welcome_message_previous = this.static_welcome_message_previous.slice(0, this.max_welcome_card_num);

        (async () => {
            // 使用 for...of 循环来处理异步操作
@@ -198,12 +226,11 @@ class WelcomeMessage {
        return array;
    }

-    async update() {
+    async can_display() {
        // update the card visibility
        const elem_chatbot = document.getElementById('gpt-chatbot');
        const chatbot_top = elem_chatbot.getBoundingClientRect().top;
        const welcome_card_container = document.getElementsByClassName('welcome-card-container')[0];
-
        // detect if welcome card overflow
        let welcome_card_overflow = false;
        if (welcome_card_container) {
@@ -215,22 +242,22 @@ class WelcomeMessage {
        var page_width = document.documentElement.clientWidth;
        const width_to_hide_welcome = 1200;
        if (!await this.isChatbotEmpty() || page_width < width_to_hide_welcome || welcome_card_overflow) {
-            // overflow !
-            if (this.visible) {
-                // console.log("remove welcome");
-                this.removeWelcome();
-                this.card_array = [];
-                this.static_welcome_message_previous = [];
-            }
+            // cannot display
+            return false;
+        }
+        return true;
+    }
+
+    async update() {
+        const can_display = await this.can_display();
+        if (can_display && !this.visible) {
+            this.showWelcome();
            return;
        }
-        if (this.visible) {
-            // console.log("already visible");
+        if (!can_display && this.visible) {
+            this.removeWelcome();
            return;
        }
-        // not overflow, not yet shown, then create and display welcome card
-        // console.log("show welcome");
-        this.showWelcome();
    }

    showCard(message) {
@@ -297,6 +324,16 @@ class WelcomeMessage {
        });

        elem_chatbot.appendChild(welcome_card_container);
+        const can_display = await this.can_display();
+        if (!can_display) {
+            // undo
+            this.visible = false;
+            this.card_array = [];
+            this.static_welcome_message_previous = [];
+            elem_chatbot.removeChild(welcome_card_container);
+            await new Promise(r => setTimeout(r, this.update_time_interval / 2));
+            return;
+        }

        // 添加显示动画
        requestAnimationFrame(() => {
@@ -313,6 +350,8 @@ class WelcomeMessage {
        welcome_card_container.classList.add('hide');
        welcome_card_container.addEventListener('transitionend', () => {
            elem_chatbot.removeChild(welcome_card_container);
+            this.card_array = [];
+            this.static_welcome_message_previous = [];
        }, { once: true });
        // add a fail safe timeout
        const timeout = 600; // 与 CSS 中 transition 的时间保持一致(1s)