Master 4.0 (#2210)

* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能，支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能，支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
2025-12-06 06:26:47 +00:00 · 2025-08-23 15:59:22 +08:00
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -456,7 +456,7 @@ model_info = {
        "endpoint": None,
        "max_token": 10124 * 8,
        "tokenizer": tokenizer_gpt35,
-        "token_cnt": get_token_num_gpt35,       
+        "token_cnt": get_token_num_gpt35,
    },
    "glm-4v": {
        "fn_with_ui": zhipu_ui,
@@ -718,7 +718,7 @@ if any(item in claude_models for item in AVAIL_LLM_MODELS):
            "tokenizer": tokenizer_gpt35,
            "token_cnt": get_token_num_gpt35,
        },
-    })    
+    })
 if "jittorllms_rwkv" in AVAIL_LLM_MODELS:
    from .bridge_jittorllms_rwkv import predict_no_ui_long_connection as rwkv_noui
    from .bridge_jittorllms_rwkv import predict as rwkv_ui
@@ -1063,18 +1063,18 @@ if any(item in grok_models for item in AVAIL_LLM_MODELS):
        grok_beta_128k_noui, grok_beta_128k_ui = get_predict_function(
            api_key_conf_name="GROK_API_KEY", max_output_token=8192, disable_proxy=False
        )
-        
+
        model_info.update({
            "grok-beta": {
                "fn_with_ui": grok_beta_128k_ui,
                "fn_without_ui": grok_beta_128k_noui,
-                "can_multi_thread": True,  
+                "can_multi_thread": True,
                "endpoint": grok_model_endpoint,
                "max_token": 128000,
                "tokenizer": tokenizer_gpt35,
                "token_cnt": get_token_num_gpt35,
            },
-            
+
        })
    except:
        logger.error(trimmed_format_exc())
@@ -1519,6 +1519,8 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list, sys
 # 根据基础功能区 ModelOverride 参数调整模型类型，用于 `predict` 中
 import importlib
 import core_functional
+from shared_utils.doc_loader_dynamic import start_with_url, load_web_content, contain_uploaded_files, load_uploaded_files
+
 def execute_model_override(llm_kwargs, additional_fn, method):
    functional = core_functional.get_core_functions()
    if (additional_fn in functional) and 'ModelOverride' in functional[additional_fn]:
@@ -1565,5 +1567,12 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot,
    if additional_fn: # 根据基础功能区 ModelOverride 参数调整模型类型
        llm_kwargs, additional_fn, method = execute_model_override(llm_kwargs, additional_fn, method)

+    if start_with_url(inputs):
+        yield from load_web_content(inputs, chatbot, history)
+        return
+
+    if contain_uploaded_files(inputs):
+        inputs = yield from load_uploaded_files(inputs, method, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, stream, additional_fn)
+
    # 更新一下llm_kwargs的参数，否则会出现参数不匹配的问题
    yield from method(inputs, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, stream, additional_fn)
--- a/request_llms/embed_models/bge_llm.py
+++ b/request_llms/embed_models/bge_llm.py
@@ -0,0 +1,109 @@
+import re
+import requests
+from loguru import logger
+from typing import List, Dict
+from urllib3.util import Retry
+from requests.adapters import HTTPAdapter
+from textwrap import dedent
+from request_llms.bridge_all import predict_no_ui_long_connection
+
+class BGELLMRanker:
+    """使用LLM进行论文相关性判断的类"""
+    def __init__(self, llm_kwargs):
+        self.llm_kwargs = llm_kwargs
+
+    def is_paper_relevant(self, query: str, paper_text: str) -> bool:
+        """判断论文是否与查询相关"""
+        prompt = dedent(f"""
+            Evaluate if this academic paper contains information that directly addresses the user's query.
+
+            Query: {query}
+
+            Paper Content:
+            {paper_text}
+
+            Evaluation Criteria:
+            1. The paper must contain core information that directly answers the query
+            2. The paper's main research focus must be highly relevant to the query
+            3. Papers that only mention query-related content in abstract should be excluded
+            4. Papers with superficial or general discussions should be excluded
+            5. For queries about "recent" or "latest" advances, paper should be from last 3 years
+
+            Instructions:
+            - Carefully evaluate against ALL criteria above
+            - Return true ONLY if paper meets ALL criteria
+            - If any criteria is not met or unclear, return false
+            - Be strict but not overly restrictive
+
+            Output Rules:
+            - Must ONLY respond with <decision>true</decision> or <decision>false</decision>
+            - true = paper contains relevant information to answer the query
+            - false = paper does not contain sufficient relevant information
+
+            Do not include any explanation or additional text."""
+        )
+        response = predict_no_ui_long_connection(
+            inputs=prompt,
+            history=[],
+            llm_kwargs=self.llm_kwargs,
+            sys_prompt="You are an expert at determining paper relevance to queries. Respond only with <decision>true</decision> or <decision>false</decision>."
+        )
+        # 提取decision标签中的内容
+        match = re.search(r'<decision>(.*?)</decision>', response, re.IGNORECASE)
+        if match:
+            decision = match.group(1).lower()
+            return decision == "true"
+        else:
+            return False
+
+    def batch_check_relevance(self, query: str, paper_texts: List[str], show_progress: bool = True) -> List[bool]:
+        """批量检查论文相关性
+
+        Args:
+            query: 用户查询
+            paper_texts: 论文文本列表
+            show_progress: 是否显示进度条
+
+        Returns:
+            List[bool]: 相关性判断结果列表
+        """
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        from tqdm import tqdm
+
+        results = [False] * len(paper_texts)
+
+        # 减少并发线程数以避免连接池耗尽
+        max_workers = min(20, len(paper_texts))  # 限制最大线程数
+
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            future_to_idx = {
+                executor.submit(self.is_paper_relevant, query, text): i
+                for i, text in enumerate(paper_texts)
+            }
+            iterator = as_completed(future_to_idx)
+            if show_progress:
+                iterator = tqdm(iterator, total=len(paper_texts), desc="判断论文相关性")
+            for future in iterator:
+                idx = future_to_idx[future]
+                try:
+                    results[idx] = future.result()
+                except Exception as e:
+                    logger.exception(f"处理论文 {idx} 时出错: {str(e)}")
+                    results[idx] = False
+        return results
+
+def main():
+    # 测试代码
+    ranker = BGELLMRanker()
+
+    query = "Recent advances in transformer models"
+    paper_text = """
+    Title: Attention Is All You Need
+    Abstract: The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely...
+    """
+
+    is_relevant = ranker.is_paper_relevant(query, paper_text)
+    print(f"Paper relevant: {is_relevant}")
+
+if __name__ == "__main__":
+    main()
--- a/request_llms/embed_models/bridge_all_embed.py
+++ b/request_llms/embed_models/bridge_all_embed.py
@@ -8,13 +8,10 @@ API_URL_REDIRECT, AZURE_ENDPOINT, AZURE_ENGINE = get_conf("API_URL_REDIRECT", "A
 openai_endpoint = "https://api.openai.com/v1/chat/completions"
 if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
 azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
-
-
 if openai_endpoint in API_URL_REDIRECT: openai_endpoint = API_URL_REDIRECT[openai_endpoint]
-
 openai_embed_endpoint = openai_endpoint.replace("chat/completions", "embeddings")

-from .openai_embed import OpenAiEmbeddingModel
+from request_llms.embed_models.openai_embed import OpenAiEmbeddingModel

 embed_model_info = {
    # text-embedding-3-small    Increased performance over 2nd generation ada embedding model  |  1,536