Merge branch 'master' into frontier

2025-12-07 15:06:48 +00:00 · 2025-01-29 00:00:08 +08:00
--- a/shared_utils/advanced_markdown_format.py
+++ b/shared_utils/advanced_markdown_format.py
@@ -2,6 +2,7 @@ import markdown
 import re
 import os
 import math
+import html

 from loguru import logger
 from textwrap import dedent
@@ -384,6 +385,24 @@ def markdown_convertion(txt):
        )


+def code_block_title_replace_format(match):
+    lang = match.group(1)
+    filename = match.group(2)
+    return f"```{lang} {{title=\"{filename}\"}}\n"
+
+
+def get_last_backticks_indent(text):
+    # 从后向前查找最后一个 ``` 
+    lines = text.splitlines()
+    for line in reversed(lines):
+        if '```' in line:
+            # 计算前面的空格数量
+            indent = len(line) - len(line.lstrip())
+            return indent
+    return 0 # 如果没找到返回0
+
+
+@lru_cache(maxsize=16)  # 使用lru缓存
 def close_up_code_segment_during_stream(gpt_reply):
    """
    在gpt输出代码的中途（输出了前面的```，但还没输出完后面的```），补上后面的```
@@ -397,6 +416,12 @@ def close_up_code_segment_during_stream(gpt_reply):
    """
    if "```" not in gpt_reply:
        return gpt_reply
+
+    # replace [```python:warp.py] to [```python {title="warp.py"}]
+    pattern = re.compile(r"```([a-z]{1,12}):([^:\n]{1,35}\.([a-zA-Z^:\n]{1,3}))\n")
+    if pattern.search(gpt_reply):
+        gpt_reply = pattern.sub(code_block_title_replace_format, gpt_reply)
+
    if gpt_reply.endswith("```"):
        return gpt_reply

@@ -404,7 +429,11 @@ def close_up_code_segment_during_stream(gpt_reply):
    segments = gpt_reply.split("```")
    n_mark = len(segments) - 1
    if n_mark % 2 == 1:
-        return gpt_reply + "\n```"  # 输出代码片段中！
+        try:
+            num_padding = get_last_backticks_indent(gpt_reply)
+        except:
+            num_padding = 0
+        return gpt_reply + "\n" + " "*num_padding + "```"  # 输出代码片段中！
    else:
        return gpt_reply

@@ -421,6 +450,19 @@ def special_render_issues_for_mermaid(text):
    return text


+def contain_html_tag(text):
+    """
+    判断文本中是否包含HTML标签。
+    """
+    pattern = r'</?([a-zA-Z0-9_]{3,16})>|<script\s+[^>]*src=["\']([^"\']+)["\'][^>]*>'
+    return re.search(pattern, text) is not None
+
+
+def contain_image(text):
+    pattern = r'<br/><br/><div align="center"><img src="file=(.*?)" base64="(.*?)"></div>'
+    return re.search(pattern, text) is not None
+
+
 def compat_non_markdown_input(text):
    """
    改善非markdown输入的显示效果，例如将空格转换为&nbsp;，将换行符转换为</br>等。
@@ -429,9 +471,13 @@ def compat_non_markdown_input(text):
        # careful input：markdown输入
        text = special_render_issues_for_mermaid(text)  # 处理特殊的渲染问题
        return text
-    elif "</div>" in text:
+    elif ("<" in text) and (">" in text) and contain_html_tag(text):
        # careful input：html输入
-        return text
+        if contain_image(text):
+            return text
+        else:
+            escaped_text = html.escape(text)
+            return escaped_text
    else:
        # whatever input：非markdown输入
        lines = text.split("\n")