镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 06:26:47 +00:00
* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
290 行
12 KiB
Python
290 行
12 KiB
Python
import re
|
||
import os
|
||
import asyncio
|
||
from typing import List, Dict, Tuple
|
||
from dataclasses import dataclass
|
||
from textwrap import dedent
|
||
from toolbox import CatchException, get_conf, update_ui, promote_file_to_downloadzone, get_log_folder, get_user
|
||
from toolbox import update_ui, CatchException, report_exception, write_history_to_file
|
||
from crazy_functions.review_fns.data_sources.semantic_source import SemanticScholarSource
|
||
from crazy_functions.review_fns.data_sources.arxiv_source import ArxivSource
|
||
from crazy_functions.review_fns.query_analyzer import QueryAnalyzer
|
||
from crazy_functions.review_fns.handlers.review_handler import 文献综述功能
|
||
from crazy_functions.review_fns.handlers.recommend_handler import 论文推荐功能
|
||
from crazy_functions.review_fns.handlers.qa_handler import 学术问答功能
|
||
from crazy_functions.review_fns.handlers.paper_handler import 单篇论文分析功能
|
||
from crazy_functions.Conversation_To_File import write_chat_to_file
|
||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||
from crazy_functions.review_fns.handlers.latest_handler import Arxiv最新论文推荐功能
|
||
from datetime import datetime
|
||
|
||
@CatchException
|
||
def 学术对话(txt: str, llm_kwargs: Dict, plugin_kwargs: Dict, chatbot: List,
|
||
history: List, system_prompt: str, user_request: str):
|
||
"""主函数"""
|
||
|
||
# 初始化数据源
|
||
arxiv_source = ArxivSource()
|
||
semantic_source = SemanticScholarSource(
|
||
api_key=get_conf("SEMANTIC_SCHOLAR_KEY")
|
||
)
|
||
|
||
# 初始化处理器
|
||
handlers = {
|
||
"review": 文献综述功能(arxiv_source, semantic_source, llm_kwargs),
|
||
"recommend": 论文推荐功能(arxiv_source, semantic_source, llm_kwargs),
|
||
"qa": 学术问答功能(arxiv_source, semantic_source, llm_kwargs),
|
||
"paper": 单篇论文分析功能(arxiv_source, semantic_source, llm_kwargs),
|
||
"latest": Arxiv最新论文推荐功能(arxiv_source, semantic_source, llm_kwargs),
|
||
}
|
||
|
||
# 分析查询意图
|
||
chatbot.append([None, "正在分析研究主题和查询要求..."])
|
||
yield from update_ui(chatbot=chatbot, history=history)
|
||
|
||
query_analyzer = QueryAnalyzer()
|
||
search_criteria = yield from query_analyzer.analyze_query(txt, chatbot, llm_kwargs)
|
||
handler = handlers.get(search_criteria.query_type)
|
||
if not handler:
|
||
handler = handlers["qa"] # 默认使用QA处理器
|
||
|
||
# 处理查询
|
||
chatbot.append([None, f"使用{handler.__class__.__name__}处理...,可能需要您耐心等待3~5分钟..."])
|
||
yield from update_ui(chatbot=chatbot, history=history)
|
||
|
||
final_prompt = asyncio.run(handler.handle(
|
||
criteria=search_criteria,
|
||
chatbot=chatbot,
|
||
history=history,
|
||
system_prompt=system_prompt,
|
||
llm_kwargs=llm_kwargs,
|
||
plugin_kwargs=plugin_kwargs
|
||
))
|
||
|
||
if final_prompt:
|
||
# 检查是否是道歉提示
|
||
if "很抱歉,我们未能找到" in final_prompt:
|
||
chatbot.append([txt, final_prompt])
|
||
yield from update_ui(chatbot=chatbot, history=history)
|
||
return
|
||
# 在 final_prompt 末尾添加用户原始查询要求
|
||
final_prompt += dedent(f"""
|
||
Original user query: "{txt}"
|
||
|
||
IMPORTANT NOTE :
|
||
- Your response must directly address the user's original user query above
|
||
- While following the previous guidelines, prioritize answering what the user specifically asked
|
||
- Make sure your response format and content align with the user's expectations
|
||
- Do not translate paper titles, keep them in their original language
|
||
- Do not generate a reference list in your response - references will be handled separately
|
||
""")
|
||
|
||
# 使用最终的prompt生成回答
|
||
response = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||
inputs=final_prompt,
|
||
inputs_show_user=txt,
|
||
llm_kwargs=llm_kwargs,
|
||
chatbot=chatbot,
|
||
history=[],
|
||
sys_prompt=f"You are a helpful academic assistant. Response in Chinese by default unless specified language is required in the user's query."
|
||
)
|
||
|
||
# 1. 获取文献列表
|
||
papers_list = handler.ranked_papers # 直接使用原始论文数据
|
||
|
||
# 在新的对话中添加格式化的参考文献列表
|
||
if papers_list:
|
||
references = ""
|
||
for idx, paper in enumerate(papers_list, 1):
|
||
# 构建作者列表
|
||
authors = paper.authors[:3]
|
||
if len(paper.authors) > 3:
|
||
authors.append("et al.")
|
||
authors_str = ", ".join(authors)
|
||
|
||
# 构建期刊指标信息
|
||
metrics = []
|
||
if hasattr(paper, 'if_factor') and paper.if_factor:
|
||
metrics.append(f"IF: {paper.if_factor}")
|
||
if hasattr(paper, 'jcr_division') and paper.jcr_division:
|
||
metrics.append(f"JCR: {paper.jcr_division}")
|
||
if hasattr(paper, 'cas_division') and paper.cas_division:
|
||
metrics.append(f"中科院分区: {paper.cas_division}")
|
||
metrics_str = f" [{', '.join(metrics)}]" if metrics else ""
|
||
|
||
# 构建DOI链接
|
||
doi_link = ""
|
||
if paper.doi:
|
||
if "arxiv.org" in str(paper.doi):
|
||
doi_url = paper.doi
|
||
else:
|
||
doi_url = f"https://doi.org/{paper.doi}"
|
||
doi_link = f" <a href='{doi_url}' target='_blank'>DOI: {paper.doi}</a>"
|
||
|
||
# 构建完整的引用
|
||
reference = f"[{idx}] {authors_str}. *{paper.title}*"
|
||
if paper.venue_name:
|
||
reference += f". {paper.venue_name}"
|
||
if paper.year:
|
||
reference += f", {paper.year}"
|
||
reference += metrics_str
|
||
if doi_link:
|
||
reference += f".{doi_link}"
|
||
reference += " \n"
|
||
|
||
references += reference
|
||
|
||
# 添加新的对话显示参考文献
|
||
chatbot.append(["参考文献如下:", references])
|
||
yield from update_ui(chatbot=chatbot, history=history)
|
||
|
||
|
||
# 2. 保存为不同格式
|
||
from .review_fns.conversation_doc.word_doc import WordFormatter
|
||
from .review_fns.conversation_doc.word2pdf import WordToPdfConverter
|
||
from .review_fns.conversation_doc.markdown_doc import MarkdownFormatter
|
||
from .review_fns.conversation_doc.html_doc import HtmlFormatter
|
||
|
||
# 创建保存目录
|
||
save_dir = get_log_folder(get_user(chatbot), plugin_name='chatscholar')
|
||
|
||
if not os.path.exists(save_dir):
|
||
os.makedirs(save_dir)
|
||
|
||
# 生成文件名
|
||
def get_safe_filename(txt, max_length=10):
|
||
# 获取文本前max_length个字符作为文件名
|
||
filename = txt[:max_length].strip()
|
||
# 移除不安全的文件名字符
|
||
filename = re.sub(r'[\\/:*?"<>|]', '', filename)
|
||
# 如果文件名为空,使用时间戳
|
||
if not filename:
|
||
filename = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||
return filename
|
||
|
||
base_filename = get_safe_filename(txt)
|
||
|
||
result_files = [] # 收集所有生成的文件
|
||
pdf_path = None # 用于跟踪PDF是否成功生成
|
||
|
||
# 保存为Markdown
|
||
try:
|
||
md_formatter = MarkdownFormatter()
|
||
md_content = md_formatter.create_document(txt, response, papers_list)
|
||
result_file_md = write_history_to_file(
|
||
history=[md_content],
|
||
file_basename=f"markdown_{base_filename}.md"
|
||
)
|
||
result_files.append(result_file_md)
|
||
except Exception as e:
|
||
print(f"Markdown保存失败: {str(e)}")
|
||
|
||
# 保存为HTML
|
||
try:
|
||
html_formatter = HtmlFormatter()
|
||
html_content = html_formatter.create_document(txt, response, papers_list)
|
||
result_file_html = write_history_to_file(
|
||
history=[html_content],
|
||
file_basename=f"html_{base_filename}.html"
|
||
)
|
||
result_files.append(result_file_html)
|
||
except Exception as e:
|
||
print(f"HTML保存失败: {str(e)}")
|
||
|
||
# 保存为Word
|
||
try:
|
||
word_formatter = WordFormatter()
|
||
try:
|
||
doc = word_formatter.create_document(txt, response, papers_list)
|
||
except Exception as e:
|
||
print(f"Word文档内容生成失败: {str(e)}")
|
||
raise e
|
||
|
||
try:
|
||
result_file_docx = os.path.join(
|
||
os.path.dirname(result_file_md) if result_file_md else save_dir,
|
||
f"docx_{base_filename}.docx"
|
||
)
|
||
doc.save(result_file_docx)
|
||
result_files.append(result_file_docx)
|
||
print(f"Word文档已保存到: {result_file_docx}")
|
||
|
||
# 转换为PDF
|
||
try:
|
||
pdf_path = WordToPdfConverter.convert_to_pdf(result_file_docx)
|
||
if pdf_path:
|
||
result_files.append(pdf_path)
|
||
print(f"PDF文档已生成: {pdf_path}")
|
||
except Exception as e:
|
||
print(f"PDF转换失败: {str(e)}")
|
||
|
||
except Exception as e:
|
||
print(f"Word文档保存失败: {str(e)}")
|
||
raise e
|
||
|
||
except Exception as e:
|
||
print(f"Word格式化失败: {str(e)}")
|
||
import traceback
|
||
print(f"详细错误信息: {traceback.format_exc()}")
|
||
|
||
# 保存为BibTeX格式
|
||
try:
|
||
from .review_fns.conversation_doc.reference_formatter import ReferenceFormatter
|
||
ref_formatter = ReferenceFormatter()
|
||
bibtex_content = ref_formatter.create_document(papers_list)
|
||
|
||
# 在与其他文件相同目录下创建BibTeX文件
|
||
result_file_bib = os.path.join(
|
||
os.path.dirname(result_file_md) if result_file_md else save_dir,
|
||
f"references_{base_filename}.bib"
|
||
)
|
||
|
||
# 直接写入文件
|
||
with open(result_file_bib, 'w', encoding='utf-8') as f:
|
||
f.write(bibtex_content)
|
||
|
||
result_files.append(result_file_bib)
|
||
print(f"BibTeX文件已保存到: {result_file_bib}")
|
||
except Exception as e:
|
||
print(f"BibTeX格式保存失败: {str(e)}")
|
||
|
||
# 保存为EndNote格式
|
||
try:
|
||
from .review_fns.conversation_doc.endnote_doc import EndNoteFormatter
|
||
endnote_formatter = EndNoteFormatter()
|
||
endnote_content = endnote_formatter.create_document(papers_list)
|
||
|
||
# 在与其他文件相同目录下创建EndNote文件
|
||
result_file_enw = os.path.join(
|
||
os.path.dirname(result_file_md) if result_file_md else save_dir,
|
||
f"references_{base_filename}.enw"
|
||
)
|
||
|
||
# 直接写入文件
|
||
with open(result_file_enw, 'w', encoding='utf-8') as f:
|
||
f.write(endnote_content)
|
||
|
||
result_files.append(result_file_enw)
|
||
print(f"EndNote文件已保存到: {result_file_enw}")
|
||
except Exception as e:
|
||
print(f"EndNote格式保存失败: {str(e)}")
|
||
|
||
# 添加所有文件到下载区
|
||
success_files = []
|
||
for file in result_files:
|
||
try:
|
||
promote_file_to_downloadzone(file, chatbot=chatbot)
|
||
success_files.append(os.path.basename(file))
|
||
except Exception as e:
|
||
print(f"文件添加到下载区失败: {str(e)}")
|
||
|
||
# 更新成功提示消息
|
||
if success_files:
|
||
chatbot.append(["保存对话记录成功,bib和enw文件支持导入到EndNote、Zotero、JabRef、Mendeley等文献管理软件,HTML文件支持在浏览器中打开,里面包含详细论文源信息", "对话已保存并添加到下载区,可以在下载区找到相关文件"])
|
||
else:
|
||
chatbot.append(["保存对话记录", "所有格式的保存都失败了,请检查错误日志。"])
|
||
|
||
yield from update_ui(chatbot=chatbot, history=history)
|
||
else:
|
||
report_exception(chatbot, history, a=f"处理失败", b=f"请尝试其他查询")
|
||
yield from update_ui(chatbot=chatbot, history=history) |