from typing import List, Dict, Any
from .base_handler import BaseHandler
from textwrap import dedent
from crazy_functions.review_fns.query_analyzer import SearchCriteria
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency as request_gpt
class 论文推荐功能(BaseHandler):
"""论文推荐处理器"""
def __init__(self, arxiv, semantic, llm_kwargs=None):
super().__init__(arxiv, semantic, llm_kwargs)
async def handle(
self,
criteria: SearchCriteria,
chatbot: List[List[str]],
history: List[List[str]],
system_prompt: str,
llm_kwargs: Dict[str, Any],
plugin_kwargs: Dict[str, Any],
) -> str:
"""处理论文推荐请求,返回最终的prompt"""
search_params = self._get_search_params(plugin_kwargs)
# 1. 先搜索种子论文
seed_papers = await self._search_seed_papers(criteria, search_params)
if not seed_papers:
return self._generate_apology_prompt(criteria)
# 使用BGE重排序
all_papers = seed_papers
if not all_papers:
return self._generate_apology_prompt(criteria)
self.ranked_papers = self.paper_ranker.rank_papers(
query=criteria.original_query,
papers=all_papers,
search_criteria=criteria
)
if not self.ranked_papers:
return self._generate_apology_prompt(criteria)
# 构建最终的prompt
current_time = self._get_current_time()
final_prompt = dedent(f"""Current time: {current_time}
Based on the user's interest in {criteria.main_topic}, here are relevant papers.
Available papers for recommendation:
{self._format_papers(self.ranked_papers)}
Please provide:
1. Group papers by sub-topics or themes if applicable
2. For each paper:
- Publication time and venue (when available)
- Journal metrics (when available):
* Impact Factor (IF)
* JCR Quartile
* Chinese Academy of Sciences (CAS) Division
- The key contributions and main findings
- Why it's relevant to the user's interests
- How it relates to other recommended papers
- The paper's citation count and citation impact
- The paper's download link
3. A suggested reading order based on:
- Journal impact and quality metrics
- Chronological development of ideas
- Paper relationships and dependencies
- Difficulty level
- Impact and significance
4. Future Directions
- Emerging venues and research streams
- Novel methodological approaches
- Cross-disciplinary opportunities
- Research gaps by publication type
IMPORTANT:
- Focus on explaining why each paper is valuable
- Highlight connections between papers
- Consider both citation counts AND journal metrics when discussing impact
- When available, use IF, JCR quartile, and CAS division to assess paper quality
- Mention publication timing when discussing paper relationships
- When referring to papers, use HTML links in this format:
* For DOIs: DOI: DOI_HERE
* For titles: PAPER_TITLE
- Present papers in a way that shows the evolution of ideas over time
- Base recommendations ONLY on the explicitly provided paper information
- Do not make ANY assumptions about papers beyond the given data
- When information is missing or unclear, acknowledge the limitation
- Never speculate about:
* Paper quality or rigor not evidenced in the data
* Research impact beyond citation counts and journal metrics
* Implementation details not mentioned
* Author expertise or background
* Future research directions not stated
- For each recommendation, cite only verifiable information
- Clearly distinguish between facts and potential implications
Format your response in markdown with clear sections.
Language requirement:
- If the query explicitly specifies a language, use that language
- Otherwise, match the language of the original user query
"""
)
return final_prompt
async def _search_seed_papers(self, criteria: SearchCriteria, search_params: Dict) -> List:
"""搜索种子论文"""
try:
# 使用_search_all_sources替代原来的并行搜索
all_papers = await self._search_all_sources(criteria, search_params)
if not all_papers:
return []
return all_papers
except Exception as e:
print(f"搜索种子论文时出错: {str(e)}")
return []
async def _get_recommendations(self, seed_papers: List, multiplier: int = 1) -> List:
"""获取推荐论文"""
recommendations = []
base_limit = 3 * multiplier
# 将种子论文添加到推荐列表中
recommendations.extend(seed_papers)
# 只使用前5篇论文作为种子
seed_papers = seed_papers[:5]
for paper in seed_papers:
try:
if paper.doi and paper.doi.startswith("10.48550/arXiv."):
# arXiv论文
arxiv_id = paper.doi.split(".")[-1]
paper_details = await self.arxiv.get_paper_details(arxiv_id)
if paper_details and hasattr(paper_details, 'venue'):
category = paper_details.venue.split(":")[-1]
similar_papers = await self.arxiv.search_by_category(
category,
limit=base_limit,
sort_by='relevance'
)
recommendations.extend(similar_papers)
elif paper.doi: # 只对有DOI的论文获取推荐
# Semantic Scholar论文
similar_papers = await self.semantic.get_recommended_papers(
paper.doi,
limit=base_limit
)
if similar_papers: # 只添加成功获取的推荐
recommendations.extend(similar_papers)
else:
# 对于没有DOI的论文,使用标题进行相关搜索
if paper.title:
similar_papers = await self.semantic.search(
query=paper.title,
limit=base_limit
)
recommendations.extend(similar_papers)
except Exception as e:
print(f"获取论文 '{paper.title}' 的推荐时发生错误: {str(e)}")
continue
# 去重处理
seen_dois = set()
unique_recommendations = []
for paper in recommendations:
if paper.doi and paper.doi not in seen_dois:
seen_dois.add(paper.doi)
unique_recommendations.append(paper)
elif not paper.doi and paper not in unique_recommendations:
unique_recommendations.append(paper)
return unique_recommendations