文件
gpt_academic/crazy_functions/review_fns/conversation_doc/html_doc.py
binary-husky 8042750d41 Master 4.0 (#2210)
* stage academic conversation

* stage document conversation

* fix buggy gradio version

* file dynamic load

* merge more academic plugins

* accelerate nltk

* feat: 为predict函数添加文件和URL读取功能
- 添加URL检测和网页内容提取功能,支持自动提取网页文本
- 添加文件路径识别和文件内容读取功能,支持private_upload路径格式
- 集成WebTextExtractor处理网页内容提取
- 集成TextContentLoader处理本地文件读取
- 支持文件路径与问题组合的智能处理

* back

* block unstable

---------

Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
2025-08-23 15:59:22 +08:00

472 行
17 KiB
Python

此文件含有模棱两可的 Unicode 字符

此文件含有可能会与其他字符混淆的 Unicode 字符。 如果您是想特意这样的,可以安全地忽略该警告。 使用 Escape 按钮显示他们。

class HtmlFormatter:
"""聊天记录HTML格式生成器"""
def __init__(self):
self.css_styles = """
:root {
--primary-color: #2563eb;
--primary-light: #eff6ff;
--secondary-color: #1e293b;
--background-color: #f8fafc;
--text-color: #334155;
--border-color: #e2e8f0;
--card-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
}
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.8;
margin: 0;
padding: 2rem;
color: var(--text-color);
background-color: var(--background-color);
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
padding: 2rem;
border-radius: 16px;
box-shadow: var(--card-shadow);
}
::selection {
background: var(--primary-light);
color: var(--primary-color);
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(20px); }
to { opacity: 1; transform: translateY(0); }
}
@keyframes slideIn {
from { transform: translateX(-20px); opacity: 0; }
to { transform: translateX(0); opacity: 1; }
}
.container {
animation: fadeIn 0.6s ease-out;
}
.QaBox {
animation: slideIn 0.5s ease-out;
transition: all 0.3s ease;
}
.QaBox:hover {
transform: translateX(5px);
}
.Question, .Answer, .historyBox {
transition: all 0.3s ease;
}
.chat-title {
color: var(--primary-color);
font-size: 2em;
text-align: center;
margin: 1rem 0 2rem;
padding-bottom: 1rem;
border-bottom: 2px solid var(--primary-color);
}
.chat-body {
display: flex;
flex-direction: column;
gap: 1.5rem;
margin: 2rem 0;
}
.QaBox {
background: white;
padding: 1.5rem;
border-radius: 8px;
border-left: 4px solid var(--primary-color);
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
margin-bottom: 1.5rem;
}
.Question {
color: var(--secondary-color);
font-weight: 500;
margin-bottom: 1rem;
}
.Answer {
color: var(--text-color);
background: var(--primary-light);
padding: 1rem;
border-radius: 6px;
}
.history-section {
margin-top: 3rem;
padding-top: 2rem;
border-top: 2px solid var(--border-color);
}
.history-title {
color: var(--secondary-color);
font-size: 1.5em;
margin-bottom: 1.5rem;
text-align: center;
}
.historyBox {
background: white;
padding: 1rem;
margin: 0.5rem 0;
border-radius: 6px;
border: 1px solid var(--border-color);
}
@media (prefers-color-scheme: dark) {
:root {
--background-color: #0f172a;
--text-color: #e2e8f0;
--border-color: #1e293b;
}
.container, .QaBox {
background: #1e293b;
}
}
"""
def create_document(self, question: str, answer: str, ranked_papers: list = None) -> str:
"""生成完整的HTML文档
Args:
question: str, 用户问题
answer: str, AI回答
ranked_papers: list, 排序后的论文列表
Returns:
str: 完整的HTML文档字符串
"""
chat_content = f'''
<div class="QaBox">
<div class="Question">{question}</div>
<div class="Answer markdown-body" id="answer-content">{answer}</div>
</div>
'''
references_content = ""
if ranked_papers:
references_content = '<div class="history-section"><h2 class="history-title">参考文献</h2>'
for idx, paper in enumerate(ranked_papers, 1):
authors = ', '.join(paper.authors)
# 构建引用信息
citations_info = f"被引用次数:{paper.citations}" if paper.citations is not None else "引用信息未知"
# 构建下载链接
download_links = []
if paper.doi:
# 检查是否是arXiv链接
if 'arxiv.org' in paper.doi:
# 如果DOI中包含完整的arXiv URL,直接使用
arxiv_url = paper.doi if paper.doi.startswith('http') else f'http://{paper.doi}'
download_links.append(f'<a href="{arxiv_url}">arXiv链接</a>')
# 提取arXiv ID并添加PDF链接
arxiv_id = arxiv_url.split('abs/')[-1].split('v')[0]
download_links.append(f'<a href="https://arxiv.org/pdf/{arxiv_id}.pdf">PDF下载</a>')
else:
# 非arXiv的DOI使用标准格式
download_links.append(f'<a href="https://doi.org/{paper.doi}">DOI: {paper.doi}</a>')
if hasattr(paper, 'url') and paper.url and 'arxiv.org' not in str(paper.url):
# 只有当URL不是arXiv链接时才添加
download_links.append(f'<a href="{paper.url}">原文链接</a>')
download_section = ' | '.join(download_links) if download_links else "无直接下载链接"
# 构建来源信息
source_info = []
if paper.venue_type:
source_info.append(f"类型:{paper.venue_type}")
if paper.venue_name:
source_info.append(f"来源:{paper.venue_name}")
# 添加期刊指标信息
if hasattr(paper, 'if_factor') and paper.if_factor:
source_info.append(f"<span class='journal-metric'>IF: {paper.if_factor}</span>")
if hasattr(paper, 'jcr_division') and paper.jcr_division:
source_info.append(f"<span class='journal-metric'>JCR分区: {paper.jcr_division}</span>")
if hasattr(paper, 'cas_division') and paper.cas_division:
source_info.append(f"<span class='journal-metric'>中科院分区: {paper.cas_division}</span>")
if hasattr(paper, 'venue_info') and paper.venue_info:
if paper.venue_info.get('journal_ref'):
source_info.append(f"期刊参考:{paper.venue_info['journal_ref']}")
if paper.venue_info.get('publisher'):
source_info.append(f"出版商:{paper.venue_info['publisher']}")
source_section = ' | '.join(source_info) if source_info else ""
# 构建标准引用格式
standard_citation = f"[{idx}] "
# 添加作者最多3个,超过则添加et al.
author_list = paper.authors[:3]
if len(paper.authors) > 3:
author_list.append("et al.")
standard_citation += ", ".join(author_list) + ". "
# 添加标题
standard_citation += f"<i>{paper.title}</i>"
# 添加期刊/会议名称
if paper.venue_name:
standard_citation += f". {paper.venue_name}"
# 添加年份
if paper.year:
standard_citation += f", {paper.year}"
# 添加DOI
if paper.doi:
if 'arxiv.org' in paper.doi:
# 如果是arXiv链接,直接使用arXiv URL
arxiv_url = paper.doi if paper.doi.startswith('http') else f'http://{paper.doi}'
standard_citation += f". {arxiv_url}"
else:
# 非arXiv的DOI使用标准格式
standard_citation += f". DOI: {paper.doi}"
standard_citation += "."
references_content += f'''
<div class="historyBox">
<div class="entry">
<p class="paper-title"><b>[{idx}]</b> <i>{paper.title}</i></p>
<p class="paper-authors">作者:{authors}</p>
<p class="paper-year">发表年份:{paper.year if paper.year else "未知"}</p>
<p class="paper-citations">{citations_info}</p>
{f'<p class="paper-source">{source_section}</p>' if source_section else ""}
<p class="paper-abstract">摘要:{paper.abstract if paper.abstract else "无摘要"}</p>
<p class="paper-links">链接:{download_section}</p>
<div class="standard-citation">
<p class="citation-title">标准引用格式:</p>
<p class="citation-text">{standard_citation}</p>
<button class="copy-btn" onclick="copyToClipboard(this.previousElementSibling)">复制引用格式</button>
</div>
</div>
</div>
'''
references_content += '</div>'
# 添加新的CSS样式
css_additions = """
.paper-title {
font-size: 1.1em;
margin-bottom: 0.5em;
}
.paper-authors {
color: var(--secondary-color);
margin: 0.3em 0;
}
.paper-year, .paper-citations {
color: var(--text-color);
margin: 0.3em 0;
}
.paper-source {
color: var(--text-color);
font-style: italic;
margin: 0.3em 0;
}
.paper-abstract {
margin: 0.8em 0;
padding: 0.8em;
background: var(--primary-light);
border-radius: 4px;
}
.paper-links {
margin-top: 0.5em;
}
.paper-links a {
color: var(--primary-color);
text-decoration: none;
margin-right: 1em;
}
.paper-links a:hover {
text-decoration: underline;
}
.standard-citation {
margin-top: 1em;
padding: 1em;
background: #f8fafc;
border-radius: 4px;
border: 1px solid var(--border-color);
}
.citation-title {
font-weight: bold;
margin-bottom: 0.5em;
color: var(--secondary-color);
}
.citation-text {
font-family: 'Times New Roman', Times, serif;
line-height: 1.6;
margin-bottom: 0.5em;
padding: 0.5em;
background: white;
border-radius: 4px;
border: 1px solid var(--border-color);
}
.copy-btn {
background: var(--primary-color);
color: white;
border: none;
padding: 0.5em 1em;
border-radius: 4px;
cursor: pointer;
font-size: 0.9em;
transition: background-color 0.2s;
}
.copy-btn:hover {
background: #1e40af;
}
@media (prefers-color-scheme: dark) {
.standard-citation {
background: #1e293b;
}
.citation-text {
background: #0f172a;
}
}
/* 添加期刊指标样式 */
.journal-metric {
display: inline-block;
padding: 0.2em 0.6em;
margin: 0 0.3em;
background: var(--primary-light);
border-radius: 4px;
font-weight: 500;
color: var(--primary-color);
}
@media (prefers-color-scheme: dark) {
.journal-metric {
background: #1e293b;
color: #60a5fa;
}
}
"""
# 修改 js_code 部分,添加 markdown 解析功能
js_code = """
<script>
// 复制功能
function copyToClipboard(element) {
const text = element.innerText;
navigator.clipboard.writeText(text).then(function() {
const btn = element.nextElementSibling;
const originalText = btn.innerText;
btn.innerText = '已复制!';
setTimeout(() => {
btn.innerText = originalText;
}, 2000);
}).catch(function(err) {
console.error('复制失败:', err);
});
}
// Markdown解析
document.addEventListener('DOMContentLoaded', function() {
const answerContent = document.getElementById('answer-content');
if (answerContent) {
const markdown = answerContent.textContent;
answerContent.innerHTML = marked.parse(markdown);
}
});
</script>
"""
# 将新的CSS样式添加到现有样式中
self.css_styles += css_additions
return f"""
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>学术对话存档</title>
<!-- 添加 marked.js -->
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<!-- 添加 GitHub Markdown CSS -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/sindresorhus/github-markdown-css@4.0.0/github-markdown.min.css">
<style>
{self.css_styles}
/* 添加 Markdown 相关样式 */
.markdown-body {{
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
padding: 1rem;
background: var(--primary-light);
border-radius: 6px;
}}
.markdown-body pre {{
background-color: #f6f8fa;
border-radius: 6px;
padding: 16px;
overflow: auto;
}}
.markdown-body code {{
background-color: rgba(175,184,193,0.2);
border-radius: 6px;
padding: 0.2em 0.4em;
font-size: 85%;
}}
.markdown-body pre code {{
background-color: transparent;
padding: 0;
}}
.markdown-body blockquote {{
border-left: 0.25em solid #d0d7de;
padding: 0 1em;
color: #656d76;
}}
.markdown-body table {{
border-collapse: collapse;
width: 100%;
margin: 1em 0;
}}
.markdown-body table th,
.markdown-body table td {{
border: 1px solid #d0d7de;
padding: 6px 13px;
}}
.markdown-body table tr:nth-child(2n) {{
background-color: #f6f8fa;
}}
@media (prefers-color-scheme: dark) {{
.markdown-body {{
background: #1e293b;
color: #e2e8f0;
}}
.markdown-body pre {{
background-color: #0f172a;
}}
.markdown-body code {{
background-color: rgba(99,110,123,0.4);
}}
.markdown-body blockquote {{
border-left-color: #30363d;
color: #8b949e;
}}
.markdown-body table th,
.markdown-body table td {{
border-color: #30363d;
}}
.markdown-body table tr:nth-child(2n) {{
background-color: #0f172a;
}}
}}
</style>
</head>
<body>
<div class="container">
<h1 class="chat-title">学术对话存档</h1>
<div class="chat-body">
{chat_content}
{references_content}
</div>
</div>
{js_code}
</body>
</html>
"""