这个提交包含在:
lbykkkk
2024-11-03 17:57:56 +08:00
父节点 9172337695
当前提交 36e50d490d

查看文件

@@ -43,14 +43,15 @@ class WordFormatter(DocumentFormatter):
self.doc = Document() self.doc = Document()
self._setup_document() self._setup_document()
self._create_styles() self._create_styles()
# 初始化标题编号系统 - 只使用两级编号 # 初始化三级标题编号系统
self.numbers = { self.numbers = {
1: 0, # 一级标题编号 1: 0, # 一级标题编号
2: 0 # 二级标题编号 2: 0, # 二级标题编号
3: 0 # 三级标题编号
} }
def _setup_document(self): def _setup_document(self):
"""设置文档基本格式""" """设置文档基本格式,包括页面设置和页眉"""
sections = self.doc.sections sections = self.doc.sections
for section in sections: for section in sections:
# 设置页面大小为A4 # 设置页面大小为A4
@@ -61,26 +62,35 @@ class WordFormatter(DocumentFormatter):
section.bottom_margin = Cm(3.5) # 下边距35mm section.bottom_margin = Cm(3.5) # 下边距35mm
section.left_margin = Cm(2.8) # 左边距28mm section.left_margin = Cm(2.8) # 左边距28mm
section.right_margin = Cm(2.6) # 右边距26mm section.right_margin = Cm(2.6) # 右边距26mm
# 设置页眉页脚 # 设置页眉页脚距离
section.header_distance = Cm(2.0) section.header_distance = Cm(2.0)
section.footer_distance = Cm(2.0) section.footer_distance = Cm(2.0)
# 添加页眉
header = section.header
header_para = header.paragraphs[0]
header_para.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
header_run = header_para.add_run("该文档由GPT-academic生成")
header_run.font.name = '仿宋'
header_run._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
header_run.font.size = Pt(9)
def _create_styles(self): def _create_styles(self):
"""创建文档样式""" """创建文档样式"""
# 创建正文样式 # 创建正文样式
style = self.doc.styles.add_style('Normal_Custom', WD_STYLE_TYPE.PARAGRAPH) style = self.doc.styles.add_style('Normal_Custom', WD_STYLE_TYPE.PARAGRAPH)
style.font.name = '仿宋' style.font.name = '仿宋'
style._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋') style._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
style.font.size = Pt(14) # 调整正文字号为14号 style.font.size = Pt(14)
style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE
style.paragraph_format.space_after = Pt(0) style.paragraph_format.space_after = Pt(0)
style.paragraph_format.first_line_indent = Pt(28) # 首行缩进两个字符14pt * 2 style.paragraph_format.first_line_indent = Pt(28)
# 创建各级标题样式(从大到小递减) # 创建各级标题样式
self._create_heading_style('Title_Custom', '方正小标宋简体', 32, WD_PARAGRAPH_ALIGNMENT.CENTER) # 大标题,增大字号到32 self._create_heading_style('Title_Custom', '方正小标宋简体', 32, WD_PARAGRAPH_ALIGNMENT.CENTER)
self._create_heading_style('Heading1_Custom', '黑体', 22, WD_PARAGRAPH_ALIGNMENT.LEFT) # 一级标题 self._create_heading_style('Heading1_Custom', '黑体', 22, WD_PARAGRAPH_ALIGNMENT.LEFT)
self._create_heading_style('Heading2_Custom', '黑体', 18, WD_PARAGRAPH_ALIGNMENT.LEFT) # 二级标题 self._create_heading_style('Heading2_Custom', '黑体', 18, WD_PARAGRAPH_ALIGNMENT.LEFT)
self._create_heading_style('Heading3_Custom', '黑体', 16, WD_PARAGRAPH_ALIGNMENT.LEFT) # 三级标题 self._create_heading_style('Heading3_Custom', '黑体', 16, WD_PARAGRAPH_ALIGNMENT.LEFT)
def _create_heading_style(self, style_name: str, font_name: str, font_size: int, alignment): def _create_heading_style(self, style_name: str, font_name: str, font_size: int, alignment):
"""创建标题样式""" """创建标题样式"""
@@ -88,7 +98,7 @@ class WordFormatter(DocumentFormatter):
style.font.name = font_name style.font.name = font_name
style._element.rPr.rFonts.set(qn('w:eastAsia'), font_name) style._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
style.font.size = Pt(font_size) style.font.size = Pt(font_size)
style.font.bold = True # 所有标题都加粗 style.font.bold = True
style.paragraph_format.alignment = alignment style.paragraph_format.alignment = alignment
style.paragraph_format.space_before = Pt(12) style.paragraph_format.space_before = Pt(12)
style.paragraph_format.space_after = Pt(12) style.paragraph_format.space_after = Pt(12)
@@ -96,25 +106,41 @@ class WordFormatter(DocumentFormatter):
return style return style
def _get_heading_number(self, level: int) -> str: def _get_heading_number(self, level: int) -> str:
"""生成标题编号""" """
生成标题编号
Args:
level: 标题级别 (0-3)
Returns:
str: 格式化的标题编号
"""
if level == 0: # 主标题不需要编号 if level == 0: # 主标题不需要编号
return "" return ""
self.numbers[level] += 1 # 增加当前级别的编号 self.numbers[level] += 1 # 增加当前级别的编号
# 如果是一级标题,重置级标题编号 # 重置级标题编号
if level == 1: for i in range(level + 1, 4):
self.numbers[2] = 0 self.numbers[i] = 0
# 根据级别返回不同格式的编号 # 根据级别返回不同格式的编号
if level == 1: if level == 1:
return f"{self.numbers[1]}. " return f"{self.numbers[1]}. "
elif level == 2: elif level == 2:
return f"{self.numbers[1]}.{self.numbers[2]} " return f"{self.numbers[1]}.{self.numbers[2]} "
elif level == 3:
return f"{self.numbers[1]}.{self.numbers[2]}.{self.numbers[3]} "
return "" return ""
def _add_heading(self, text: str, level: int): def _add_heading(self, text: str, level: int):
"""添加带编号的标题""" """
添加带编号的标题
Args:
text: 标题文本
level: 标题级别 (0-3)
"""
style_map = { style_map = {
0: 'Title_Custom', 0: 'Title_Custom',
1: 'Heading1_Custom', 1: 'Heading1_Custom',
@@ -122,29 +148,24 @@ class WordFormatter(DocumentFormatter):
3: 'Heading3_Custom' 3: 'Heading3_Custom'
} }
# 获取标题编号
number = self._get_heading_number(level) number = self._get_heading_number(level)
# 创建段落
paragraph = self.doc.add_paragraph(style=style_map[level]) paragraph = self.doc.add_paragraph(style=style_map[level])
# 分别添加编号和文本,并设置样式
if number: if number:
number_run = paragraph.add_run(number) number_run = paragraph.add_run(number)
self._get_run_style(number_run, '黑体', 22 if level == 1 else 18, True) font_size = 22 if level == 1 else (18 if level == 2 else 16)
self._get_run_style(number_run, '黑体', font_size, True)
text_run = paragraph.add_run(text) text_run = paragraph.add_run(text)
font_size = 32 if level == 0 else (22 if level == 1 else 18) # 主标题32号,一级标题22号,其他18号 font_size = 32 if level == 0 else (22 if level == 1 else (18 if level == 2 else 16))
self._get_run_style(text_run, '黑体', font_size, True) self._get_run_style(text_run, '黑体', font_size, True)
# 特殊处理:主标题添加日期 # 主标题添加日期
if level == 0: if level == 0:
date_paragraph = self.doc.add_paragraph() date_paragraph = self.doc.add_paragraph()
date_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER date_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
date_run = date_paragraph.add_run(datetime.now().strftime('%Y年%m月%d')) date_run = date_paragraph.add_run(datetime.now().strftime('%Y年%m月%d'))
date_run.font.name = '仿宋' self._get_run_style(date_run, '仿宋', 16, False)
date_run._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
date_run.font.size = Pt(16)
return paragraph return paragraph
@@ -165,7 +186,6 @@ class WordFormatter(DocumentFormatter):
for fp, reason in self.failed_files: for fp, reason in self.failed_files:
result.append(f"{os.path.basename(fp)}: {reason}") result.append(f"{os.path.basename(fp)}: {reason}")
# 在文档中添加内容
self._add_heading("处理失败文件", 1) self._add_heading("处理失败文件", 1)
for fp, reason in self.failed_files: for fp, reason in self.failed_files:
self._add_content(f"{os.path.basename(fp)}: {reason}", indent=False) self._add_content(f"{os.path.basename(fp)}: {reason}", indent=False)
@@ -177,42 +197,75 @@ class WordFormatter(DocumentFormatter):
"""添加正文内容""" """添加正文内容"""
paragraph = self.doc.add_paragraph(text, style='Normal_Custom') paragraph = self.doc.add_paragraph(text, style='Normal_Custom')
if not indent: if not indent:
paragraph.paragraph_format.first_line_indent = Pt(0) # 不缩进的段落 paragraph.paragraph_format.first_line_indent = Pt(0)
return paragraph return paragraph
def format_file_summaries(self) -> str: def format_file_summaries(self) -> str:
"""格式化文件总结内容""" """
格式化文件总结内容,确保正确的标题层级
返回:
str: 格式化后的文件总结字符串
标题层级规则:
1. 一级标题为"各文件详细总结"
2. 如果文件有目录路径:
- 目录路径作为二级标题 (2.1, 2.2 等)
- 该目录下所有文件作为三级标题 (2.1.1, 2.1.2 等)
3. 如果文件没有目录路径:
- 文件直接作为二级标题 (2.1, 2.2 等)
"""
result = [] result = []
sorted_paths = sorted(self.file_summaries_map.keys()) # 首先对文件路径进行分组整理
current_dir = "" file_groups = {}
for path in sorted(self.file_summaries_map.keys()):
for path in sorted_paths:
dir_path = os.path.dirname(path) dir_path = os.path.dirname(path)
if dir_path != current_dir: if dir_path not in file_groups:
if dir_path: file_groups[dir_path] = []
result.append(f"\n📁 {dir_path}") file_groups[dir_path].append(path)
self._add_heading(f"📁 {dir_path}", 2)
current_dir = dir_path
# 添加文件名和内容到结果字符串 # 处理没有目录的文件
file_name = os.path.basename(path) root_files = file_groups.get("", [])
result.append(f"\n📄 {file_name}") if root_files:
result.append(self.file_summaries_map[path]) for path in sorted(root_files):
file_name = os.path.basename(path)
result.append(f"\n📄 {file_name}")
result.append(self.file_summaries_map[path])
# 无目录的文件作为二级标题
self._add_heading(f"📄 {file_name}", 2)
self._add_content(self.file_summaries_map[path])
self.doc.add_paragraph()
# 在文档中添加文件名作为带编号的二级标题 # 处理有目录的文件
self._add_heading(f"📄 {file_name}", 2) for dir_path in sorted(file_groups.keys()):
self._add_content(self.file_summaries_map[path]) if dir_path == "": # 跳过已处理的根目录文件
self.doc.add_paragraph() continue
# 添加目录作为二级标题
result.append(f"\n📁 {dir_path}")
self._add_heading(f"📁 {dir_path}", 2)
# 该目录下的所有文件作为三级标题
for path in sorted(file_groups[dir_path]):
file_name = os.path.basename(path)
result.append(f"\n📄 {file_name}")
result.append(self.file_summaries_map[path])
# 添加文件名作为三级标题
self._add_heading(f"📄 {file_name}", 3)
self._add_content(self.file_summaries_map[path])
self.doc.add_paragraph()
return "\n".join(result) return "\n".join(result)
def create_document(self): def create_document(self):
"""创建完整Word文档并返回文档对象""" """创建完整Word文档并返回文档对象"""
# 重置所有编号 # 重置所有编号
for level in self.numbers: for level in self.numbers:
self.numbers[level] = 0 self.numbers[level] = 0
# 添加主标题(更大字号和加粗) # 添加主标题
self._add_heading("文档总结报告", 0) self._add_heading("文档总结报告", 0)
self.doc.add_paragraph() self.doc.add_paragraph()
@@ -229,7 +282,7 @@ class WordFormatter(DocumentFormatter):
self._add_heading("各文件详细总结", 1) self._add_heading("各文件详细总结", 1)
self.format_file_summaries() self.format_file_summaries()
return self.doc # 返回文档对象 return self.doc
class MarkdownFormatter(DocumentFormatter): class MarkdownFormatter(DocumentFormatter):