from pathlib import Path from typing import List, Dict from dataclasses import dataclass from datetime import datetime import os import re @dataclass class SectionFragment: """Arxiv论文片段数据类""" title: str authors: str abstract: str catalogs: str arxiv_id: str = "" current_section: str = "Introduction" content: str = '' bibliography: str = '' class PaperHtmlFormatter: """HTML格式论文文档生成器""" def __init__(self, fragments: List[SectionFragment], output_dir: Path): self.fragments = fragments self.output_dir = output_dir self.css_styles = """ :root { --primary-color: #1a73e8; --secondary-color: #34495e; --background-color: #f8f9fa; --text-color: #2c3e50; --border-color: #e0e0e0; --code-bg-color: #f6f8fa; } body { font-family: "Source Serif Pro", "Times New Roman", serif; line-height: 1.8; max-width: 1000px; margin: 0 auto; padding: 2rem; color: var(--text-color); background-color: var(--background-color); font-size: 16px; } .container { background: white; padding: 2rem; border-radius: 8px; box-shadow: 0 2px 12px rgba(0,0,0,0.1); } h1 { color: var(--primary-color); font-size: 2.2em; text-align: center; margin: 1.5rem 0; padding-bottom: 1rem; border-bottom: 3px solid var(--primary-color); } h2 { color: var(--secondary-color); font-size: 1.8em; margin-top: 2rem; padding-left: 1rem; border-left: 4px solid var(--primary-color); } h3 { color: var(--text-color); font-size: 1.5em; margin-top: 1.5rem; border-bottom: 2px solid var(--border-color); padding-bottom: 0.5rem; } .authors { text-align: center; color: var(--secondary-color); font-size: 1.1em; margin: 1rem 0 2rem; } .abstract-container { background: var(--background-color); padding: 1.5rem; border-radius: 6px; margin: 2rem 0; } .abstract-title { font-weight: bold; color: var(--primary-color); margin-bottom: 1rem; } .abstract-content { font-style: italic; line-height: 1.7; } .toc { background: white; padding: 1.5rem; border-radius: 6px; margin: 2rem 0; box-shadow: 0 2px 8px rgba(0,0,0,0.05); } .toc-title { color: var(--primary-color); font-size: 1.4em; margin-bottom: 1rem; } .section-content { background: white; padding: 1.5rem; border-radius: 6px; margin: 1.5rem 0; box-shadow: 0 1px 3px rgba(0,0,0,0.05); } .fragment { margin: 2rem 0; padding-left: 1rem; border-left: 3px solid var(--border-color); } .fragment:hover { border-left-color: var(--primary-color); } .bibliography { background: var(--code-bg-color); padding: 1rem; border-radius: 4px; font-family: "Source Code Pro", monospace; font-size: 0.9em; white-space: pre-wrap; margin-top: 1rem; } pre { background: var(--code-bg-color); padding: 1rem; border-radius: 4px; overflow-x: auto; font-family: "Source Code Pro", monospace; } .paper-info { background: white; padding: 2rem; border-radius: 8px; margin: 2rem 0; box-shadow: 0 2px 8px rgba(0,0,0,0.1); } .arxiv-id { text-align: center; color: #666; font-size: 0.9em; margin: 1rem 0; } .section-title { display: flex; align-items: center; gap: 0.5rem; color: var(--secondary-color); } .section-icon { color: var(--primary-color); } @media print { body { background: white; } .container { box-shadow: none; } } """ def _sanitize_html(self, text: str) -> str: """清理HTML特殊字符""" if not text: return "" replacements = { "&": "&", "<": "<", ">": ">", '"': """, "'": "'" } for old, new in replacements.items(): text = text.replace(old, new) return text def _create_section_id(self, section: str) -> str: """创建section的ID""" section = section.strip() or "uncategorized" # 移除特殊字符,转换为小写并用连字符替换空格 section_id = re.sub(r'[^\w\s-]', '', section.lower()) return section_id.replace(' ', '-') def format_paper_info(self) -> str: """格式化论文基本信息""" if not self.fragments: return "" first_fragment = self.fragments[0] paper_info = ['
')
paper_info.append(self._sanitize_html(first_fragment.catalogs))
paper_info.append('')
paper_info.append('