镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-07 15:06:48 +00:00
Merge branch 'master' into frontier
这个提交包含在:
@@ -2,6 +2,7 @@ import markdown
|
||||
import re
|
||||
import os
|
||||
import math
|
||||
import html
|
||||
|
||||
from loguru import logger
|
||||
from textwrap import dedent
|
||||
@@ -384,6 +385,24 @@ def markdown_convertion(txt):
|
||||
)
|
||||
|
||||
|
||||
def code_block_title_replace_format(match):
|
||||
lang = match.group(1)
|
||||
filename = match.group(2)
|
||||
return f"```{lang} {{title=\"{filename}\"}}\n"
|
||||
|
||||
|
||||
def get_last_backticks_indent(text):
|
||||
# 从后向前查找最后一个 ```
|
||||
lines = text.splitlines()
|
||||
for line in reversed(lines):
|
||||
if '```' in line:
|
||||
# 计算前面的空格数量
|
||||
indent = len(line) - len(line.lstrip())
|
||||
return indent
|
||||
return 0 # 如果没找到返回0
|
||||
|
||||
|
||||
@lru_cache(maxsize=16) # 使用lru缓存
|
||||
def close_up_code_segment_during_stream(gpt_reply):
|
||||
"""
|
||||
在gpt输出代码的中途(输出了前面的```,但还没输出完后面的```),补上后面的```
|
||||
@@ -397,6 +416,12 @@ def close_up_code_segment_during_stream(gpt_reply):
|
||||
"""
|
||||
if "```" not in gpt_reply:
|
||||
return gpt_reply
|
||||
|
||||
# replace [```python:warp.py] to [```python {title="warp.py"}]
|
||||
pattern = re.compile(r"```([a-z]{1,12}):([^:\n]{1,35}\.([a-zA-Z^:\n]{1,3}))\n")
|
||||
if pattern.search(gpt_reply):
|
||||
gpt_reply = pattern.sub(code_block_title_replace_format, gpt_reply)
|
||||
|
||||
if gpt_reply.endswith("```"):
|
||||
return gpt_reply
|
||||
|
||||
@@ -404,7 +429,11 @@ def close_up_code_segment_during_stream(gpt_reply):
|
||||
segments = gpt_reply.split("```")
|
||||
n_mark = len(segments) - 1
|
||||
if n_mark % 2 == 1:
|
||||
return gpt_reply + "\n```" # 输出代码片段中!
|
||||
try:
|
||||
num_padding = get_last_backticks_indent(gpt_reply)
|
||||
except:
|
||||
num_padding = 0
|
||||
return gpt_reply + "\n" + " "*num_padding + "```" # 输出代码片段中!
|
||||
else:
|
||||
return gpt_reply
|
||||
|
||||
@@ -421,6 +450,19 @@ def special_render_issues_for_mermaid(text):
|
||||
return text
|
||||
|
||||
|
||||
def contain_html_tag(text):
|
||||
"""
|
||||
判断文本中是否包含HTML标签。
|
||||
"""
|
||||
pattern = r'</?([a-zA-Z0-9_]{3,16})>|<script\s+[^>]*src=["\']([^"\']+)["\'][^>]*>'
|
||||
return re.search(pattern, text) is not None
|
||||
|
||||
|
||||
def contain_image(text):
|
||||
pattern = r'<br/><br/><div align="center"><img src="file=(.*?)" base64="(.*?)"></div>'
|
||||
return re.search(pattern, text) is not None
|
||||
|
||||
|
||||
def compat_non_markdown_input(text):
|
||||
"""
|
||||
改善非markdown输入的显示效果,例如将空格转换为 ,将换行符转换为</br>等。
|
||||
@@ -429,9 +471,13 @@ def compat_non_markdown_input(text):
|
||||
# careful input:markdown输入
|
||||
text = special_render_issues_for_mermaid(text) # 处理特殊的渲染问题
|
||||
return text
|
||||
elif "</div>" in text:
|
||||
elif ("<" in text) and (">" in text) and contain_html_tag(text):
|
||||
# careful input:html输入
|
||||
return text
|
||||
if contain_image(text):
|
||||
return text
|
||||
else:
|
||||
escaped_text = html.escape(text)
|
||||
return escaped_text
|
||||
else:
|
||||
# whatever input:非markdown输入
|
||||
lines = text.split("\n")
|
||||
|
||||
在新工单中引用
屏蔽一个用户