diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
index f67e79fe..bae4d951 100644
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -159,10 +159,10 @@ def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, cha
file_name = '在线预览翻译(原文)' + gen_time_str() + '.html'
preview_fp = os.path.join(ex_folder, file_name)
from shared_utils.advanced_markdown_format import markdown_convertion_for_file
- with open(generated_fp, "r", encoding="utf-8") as f:
- md = f.read()
- # Markdown中使用不标准的表格,需要在表格前加上一个emoji,以便公式渲染
- md = re.sub(r'^
', r'😃', md, flags=re.MULTILINE)
+ # with open(generated_fp, "r", encoding="utf-8") as f:
+ # md = f.read()
+ # # Markdown中使用不标准的表格,需要在表格前加上一个emoji,以便公式渲染
+ # md = re.sub(r'^', r'.', md, flags=re.MULTILINE)
html = markdown_convertion_for_file(md)
with open(preview_fp, "w", encoding="utf-8") as f: f.write(html)
chatbot.append([None, f"生成在线预览:{generate_file_link([preview_fp])}"])
@@ -182,7 +182,7 @@ def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, cha
with open(generated_fp, 'r', encoding='utf8') as f: content = f.read()
content = content.replace('```markdown', '\n').replace('```', '\n')
# Markdown中使用不标准的表格,需要在表格前加上一个emoji,以便公式渲染
- content = re.sub(r'^', r'😃', content, flags=re.MULTILINE)
+ # content = re.sub(r'^', r'.', content, flags=re.MULTILINE)
with open(generated_fp, 'w', encoding='utf8') as f: f.write(content)
# 生成在线预览html
file_name = '在线预览翻译' + gen_time_str() + '.html'
diff --git a/shared_utils/advanced_markdown_format.py b/shared_utils/advanced_markdown_format.py
index 5674e1da..e5295c1d 100644
--- a/shared_utils/advanced_markdown_format.py
+++ b/shared_utils/advanced_markdown_format.py
@@ -46,6 +46,16 @@ code_highlight_configs_block_mermaid = {
},
}
+
+mathpatterns = {
+ r"(?(.*?)'
txt = fix_markdown_indent(txt)
# convert everything to html format
- split = markdown.markdown(text="---")
convert_stage_1 = markdown.markdown(
text=txt,
extensions=[
@@ -245,14 +286,25 @@ def markdown_convertion_for_file(txt):
],
extension_configs={**markdown_extension_configs, **code_highlight_configs},
)
- convert_stage_1 = markdown_bug_hunt(convert_stage_1)
+
+
+ convert_stage_1 = fix_dollar_sticking_bug(convert_stage_1)
+ def repl_fn(match):
+ content = match.group(2)
+ return f''
+
+ pattern = "|".join([pattern for pattern, property in mathpatterns.items() if not property["allow_multi_lines"]])
+ pattern = re.compile(pattern, flags=re.ASCII)
+ convert_stage_2 = pattern.sub(repl_fn, convert_stage_1)
+
+ convert_stage_4 = markdown_bug_hunt(convert_stage_2)
# 2. convert to rendered equation
- convert_stage_2_2, n = re.subn(
- find_equation_pattern, replace_math_render, convert_stage_1, flags=re.DOTALL
+ convert_stage_5, n = re.subn(
+ find_equation_pattern, replace_math_render, convert_stage_4, flags=re.DOTALL
)
# cat them together
- return pre + convert_stage_2_2 + suf
+ return pre + convert_stage_5 + suf
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt):