镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
186 行
7.5 KiB
Python
186 行
7.5 KiB
Python
from typing import List, Dict, Any
|
|
from .base_handler import BaseHandler
|
|
from textwrap import dedent
|
|
from crazy_functions.review_fns.query_analyzer import SearchCriteria
|
|
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency as request_gpt
|
|
|
|
class 论文推荐功能(BaseHandler):
|
|
"""论文推荐处理器"""
|
|
|
|
def __init__(self, arxiv, semantic, llm_kwargs=None):
|
|
super().__init__(arxiv, semantic, llm_kwargs)
|
|
|
|
async def handle(
|
|
self,
|
|
criteria: SearchCriteria,
|
|
chatbot: List[List[str]],
|
|
history: List[List[str]],
|
|
system_prompt: str,
|
|
llm_kwargs: Dict[str, Any],
|
|
plugin_kwargs: Dict[str, Any],
|
|
) -> str:
|
|
"""处理论文推荐请求,返回最终的prompt"""
|
|
|
|
search_params = self._get_search_params(plugin_kwargs)
|
|
|
|
# 1. 先搜索种子论文
|
|
seed_papers = await self._search_seed_papers(criteria, search_params)
|
|
if not seed_papers:
|
|
return self._generate_apology_prompt(criteria)
|
|
|
|
# 使用BGE重排序
|
|
all_papers = seed_papers
|
|
|
|
if not all_papers:
|
|
return self._generate_apology_prompt(criteria)
|
|
|
|
self.ranked_papers = self.paper_ranker.rank_papers(
|
|
query=criteria.original_query,
|
|
papers=all_papers,
|
|
search_criteria=criteria
|
|
)
|
|
|
|
if not self.ranked_papers:
|
|
return self._generate_apology_prompt(criteria)
|
|
|
|
# 构建最终的prompt
|
|
current_time = self._get_current_time()
|
|
final_prompt = dedent(f"""Current time: {current_time}
|
|
|
|
Based on the user's interest in {criteria.main_topic}, here are relevant papers.
|
|
|
|
Available papers for recommendation:
|
|
{self._format_papers(self.ranked_papers)}
|
|
|
|
Please provide:
|
|
1. Group papers by sub-topics or themes if applicable
|
|
|
|
2. For each paper:
|
|
- Publication time and venue (when available)
|
|
- Journal metrics (when available):
|
|
* Impact Factor (IF)
|
|
* JCR Quartile
|
|
* Chinese Academy of Sciences (CAS) Division
|
|
- The key contributions and main findings
|
|
- Why it's relevant to the user's interests
|
|
- How it relates to other recommended papers
|
|
- The paper's citation count and citation impact
|
|
- The paper's download link
|
|
|
|
3. A suggested reading order based on:
|
|
- Journal impact and quality metrics
|
|
- Chronological development of ideas
|
|
- Paper relationships and dependencies
|
|
- Difficulty level
|
|
- Impact and significance
|
|
|
|
4. Future Directions
|
|
- Emerging venues and research streams
|
|
- Novel methodological approaches
|
|
- Cross-disciplinary opportunities
|
|
- Research gaps by publication type
|
|
|
|
|
|
IMPORTANT:
|
|
- Focus on explaining why each paper is valuable
|
|
- Highlight connections between papers
|
|
- Consider both citation counts AND journal metrics when discussing impact
|
|
- When available, use IF, JCR quartile, and CAS division to assess paper quality
|
|
- Mention publication timing when discussing paper relationships
|
|
- When referring to papers, use HTML links in this format:
|
|
* For DOIs: <a href='https://doi.org/DOI_HERE' target='_blank'>DOI: DOI_HERE</a>
|
|
* For titles: <a href='PAPER_URL' target='_blank'>PAPER_TITLE</a>
|
|
- Present papers in a way that shows the evolution of ideas over time
|
|
- Base recommendations ONLY on the explicitly provided paper information
|
|
- Do not make ANY assumptions about papers beyond the given data
|
|
- When information is missing or unclear, acknowledge the limitation
|
|
- Never speculate about:
|
|
* Paper quality or rigor not evidenced in the data
|
|
* Research impact beyond citation counts and journal metrics
|
|
* Implementation details not mentioned
|
|
* Author expertise or background
|
|
* Future research directions not stated
|
|
- For each recommendation, cite only verifiable information
|
|
- Clearly distinguish between facts and potential implications
|
|
|
|
Format your response in markdown with clear sections.
|
|
Language requirement:
|
|
- If the query explicitly specifies a language, use that language
|
|
- Otherwise, match the language of the original user query
|
|
"""
|
|
)
|
|
return final_prompt
|
|
|
|
async def _search_seed_papers(self, criteria: SearchCriteria, search_params: Dict) -> List:
|
|
"""搜索种子论文"""
|
|
try:
|
|
# 使用_search_all_sources替代原来的并行搜索
|
|
all_papers = await self._search_all_sources(criteria, search_params)
|
|
|
|
if not all_papers:
|
|
return []
|
|
|
|
return all_papers
|
|
|
|
except Exception as e:
|
|
print(f"搜索种子论文时出错: {str(e)}")
|
|
return []
|
|
|
|
async def _get_recommendations(self, seed_papers: List, multiplier: int = 1) -> List:
|
|
"""获取推荐论文"""
|
|
recommendations = []
|
|
base_limit = 3 * multiplier
|
|
|
|
# 将种子论文添加到推荐列表中
|
|
recommendations.extend(seed_papers)
|
|
|
|
# 只使用前5篇论文作为种子
|
|
seed_papers = seed_papers[:5]
|
|
|
|
for paper in seed_papers:
|
|
try:
|
|
if paper.doi and paper.doi.startswith("10.48550/arXiv."):
|
|
# arXiv论文
|
|
arxiv_id = paper.doi.split(".")[-1]
|
|
paper_details = await self.arxiv.get_paper_details(arxiv_id)
|
|
if paper_details and hasattr(paper_details, 'venue'):
|
|
category = paper_details.venue.split(":")[-1]
|
|
similar_papers = await self.arxiv.search_by_category(
|
|
category,
|
|
limit=base_limit,
|
|
sort_by='relevance'
|
|
)
|
|
recommendations.extend(similar_papers)
|
|
elif paper.doi: # 只对有DOI的论文获取推荐
|
|
# Semantic Scholar论文
|
|
similar_papers = await self.semantic.get_recommended_papers(
|
|
paper.doi,
|
|
limit=base_limit
|
|
)
|
|
if similar_papers: # 只添加成功获取的推荐
|
|
recommendations.extend(similar_papers)
|
|
else:
|
|
# 对于没有DOI的论文,使用标题进行相关搜索
|
|
if paper.title:
|
|
similar_papers = await self.semantic.search(
|
|
query=paper.title,
|
|
limit=base_limit
|
|
)
|
|
recommendations.extend(similar_papers)
|
|
|
|
except Exception as e:
|
|
print(f"获取论文 '{paper.title}' 的推荐时发生错误: {str(e)}")
|
|
continue
|
|
|
|
# 去重处理
|
|
seen_dois = set()
|
|
unique_recommendations = []
|
|
for paper in recommendations:
|
|
if paper.doi and paper.doi not in seen_dois:
|
|
seen_dois.add(paper.doi)
|
|
unique_recommendations.append(paper)
|
|
elif not paper.doi and paper not in unique_recommendations:
|
|
unique_recommendations.append(paper)
|
|
|
|
return unique_recommendations
|