format source code

这个提交包含在:
binary-husky
2024-01-13 18:00:52 +08:00
父节点 1714116a89
当前提交 7ab379688e
共有 12 个文件被更改,包括 1049 次插入595 次删除

查看文件

@@ -2,6 +2,7 @@ import markdown
import re
import os
import math
from textwrap import dedent
from latex2mathml.converter import convert as tex2mathml
from functools import wraps, lru_cache
from shared_utils.config_loader import get_conf as get_conf
@@ -32,7 +33,148 @@ def text_divide_paragraph(text):
text = "</br>".join(lines)
return pre + text + suf
def tex2mathml_catch_exception(content, *args, **kwargs):
try:
content = tex2mathml(content, *args, **kwargs)
except:
content = content
return content
def replace_math_no_render(match):
content = match.group(1)
if 'mode=display' in match.group(0):
content = content.replace('\n', '</br>')
return f"<font color=\"#00FF00\">$$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$$</font>"
else:
return f"<font color=\"#00FF00\">$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$</font>"
def replace_math_render(match):
content = match.group(1)
if 'mode=display' in match.group(0):
if '\\begin{aligned}' in content:
content = content.replace('\\begin{aligned}', '\\begin{array}')
content = content.replace('\\end{aligned}', '\\end{array}')
content = content.replace('&', ' ')
content = tex2mathml_catch_exception(content, display="block")
return content
else:
return tex2mathml_catch_exception(content)
def markdown_bug_hunt(content):
"""
解决一个mdx_math的bug单$包裹begin命令时多余<script>
"""
content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">',
'<script type="math/tex; mode=display">')
content = content.replace('</script>\n</script>', '</script>')
return content
def is_equation(txt):
"""
判定是否为公式 | 测试1 写出洛伦兹定律,使用tex格式公式 测试2 给出柯西不等式,使用latex格式 测试3 写出麦克斯韦方程组
"""
if '```' in txt and '```reference' not in txt: return False
if '$' not in txt and '\\[' not in txt: return False
mathpatterns = {
r'(?<!\\|\$)(\$)([^\$]+)(\$)': {'allow_multi_lines': False}, #  $...$
r'(?<!\\)(\$\$)([^\$]+)(\$\$)': {'allow_multi_lines': True}, # $$...$$
r'(?<!\\)(\\\[)(.+?)(\\\])': {'allow_multi_lines': False}, # \[...\]
# r'(?<!\\)(\\\()(.+?)(\\\))': {'allow_multi_lines': False}, # \(...\)
# r'(?<!\\)(\\begin{([a-z]+?\*?)})(.+?)(\\end{\2})': {'allow_multi_lines': True}, # \begin...\end
# r'(?<!\\)(\$`)([^`]+)(`\$)': {'allow_multi_lines': False}, # $`...`$
}
matches = []
for pattern, property in mathpatterns.items():
flags = re.ASCII | re.DOTALL if property['allow_multi_lines'] else re.ASCII
matches.extend(re.findall(pattern, txt, flags))
if len(matches) == 0: return False
contain_any_eq = False
illegal_pattern = re.compile(r'[^\x00-\x7F]|echo')
for match in matches:
if len(match) != 3: return False
eq_canidate = match[1]
if illegal_pattern.search(eq_canidate):
return False
else:
contain_any_eq = True
return contain_any_eq
def fix_markdown_indent(txt):
# fix markdown indent
if (' - ' not in txt) or ('. ' not in txt):
# do not need to fix, fast escape
return txt
# walk through the lines and fix non-standard indentation
lines = txt.split("\n")
pattern = re.compile(r'^\s+-')
activated = False
for i, line in enumerate(lines):
if line.startswith('- ') or line.startswith('1. '):
activated = True
if activated and pattern.match(line):
stripped_string = line.lstrip()
num_spaces = len(line) - len(stripped_string)
if (num_spaces % 4) == 3:
num_spaces_should_be = math.ceil(num_spaces / 4) * 4
lines[i] = ' ' * num_spaces_should_be + stripped_string
return '\n'.join(lines)
FENCED_BLOCK_RE = re.compile(
dedent(r'''
(?P<fence>^[ \t]*(?:~{3,}|`{3,}))[ ]* # opening fence
((\{(?P<attrs>[^\}\n]*)\})| # (optional {attrs} or
(\.?(?P<lang>[\w#.+-]*)[ ]*)? # optional (.)lang
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
\n # newline (end of opening fence)
(?P<code>.*?)(?<=\n) # the code block
(?P=fence)[ ]*$ # closing fence
'''),
re.MULTILINE | re.DOTALL | re.VERBOSE
)
def get_line_range(re_match_obj, txt):
start_pos, end_pos = re_match_obj.regs[0]
num_newlines_before = txt[:start_pos+1].count('\n')
line_start = num_newlines_before
line_end = num_newlines_before + txt[start_pos:end_pos].count('\n')+1
return line_start, line_end
def fix_code_segment_indent(txt):
lines = []
change_any = False
txt_tmp = txt
while True:
re_match_obj = FENCED_BLOCK_RE.search(txt_tmp)
if not re_match_obj: break
if len(lines) == 0: lines = txt.split("\n")
# 清空 txt_tmp 对应的位置方便下次搜索
start_pos, end_pos = re_match_obj.regs[0]
txt_tmp = txt_tmp[:start_pos] + ' '*(end_pos-start_pos) + txt_tmp[end_pos:]
line_start, line_end = get_line_range(re_match_obj, txt)
# 获取公共缩进
shared_indent_cnt = 1e5
for i in range(line_start, line_end):
stripped_string = lines[i].lstrip()
num_spaces = len(lines[i]) - len(stripped_string)
if num_spaces < shared_indent_cnt:
shared_indent_cnt = num_spaces
# 修复缩进
if (shared_indent_cnt < 1e5) and (shared_indent_cnt % 4) == 3:
num_spaces_should_be = math.ceil(shared_indent_cnt / 4) * 4
for i in range(line_start, line_end):
add_n = num_spaces_should_be - shared_indent_cnt
lines[i] = ' ' * add_n + lines[i]
if not change_any: # 遇到第一个
change_any = True
if change_any:
return '\n'.join(lines)
else:
return txt
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt):
"""
@@ -52,92 +194,8 @@ def markdown_convertion(txt):
}
find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>'
def tex2mathml_catch_exception(content, *args, **kwargs):
try:
content = tex2mathml(content, *args, **kwargs)
except:
content = content
return content
def replace_math_no_render(match):
content = match.group(1)
if 'mode=display' in match.group(0):
content = content.replace('\n', '</br>')
return f"<font color=\"#00FF00\">$$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$$</font>"
else:
return f"<font color=\"#00FF00\">$</font><font color=\"#FF00FF\">{content}</font><font color=\"#00FF00\">$</font>"
def replace_math_render(match):
content = match.group(1)
if 'mode=display' in match.group(0):
if '\\begin{aligned}' in content:
content = content.replace('\\begin{aligned}', '\\begin{array}')
content = content.replace('\\end{aligned}', '\\end{array}')
content = content.replace('&', ' ')
content = tex2mathml_catch_exception(content, display="block")
return content
else:
return tex2mathml_catch_exception(content)
def markdown_bug_hunt(content):
"""
解决一个mdx_math的bug单$包裹begin命令时多余<script>
"""
content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">',
'<script type="math/tex; mode=display">')
content = content.replace('</script>\n</script>', '</script>')
return content
def is_equation(txt):
"""
判定是否为公式 | 测试1 写出洛伦兹定律,使用tex格式公式 测试2 给出柯西不等式,使用latex格式 测试3 写出麦克斯韦方程组
"""
if '```' in txt and '```reference' not in txt: return False
if '$' not in txt and '\\[' not in txt: return False
mathpatterns = {
r'(?<!\\|\$)(\$)([^\$]+)(\$)': {'allow_multi_lines': False}, #  $...$
r'(?<!\\)(\$\$)([^\$]+)(\$\$)': {'allow_multi_lines': True}, # $$...$$
r'(?<!\\)(\\\[)(.+?)(\\\])': {'allow_multi_lines': False}, # \[...\]
# r'(?<!\\)(\\\()(.+?)(\\\))': {'allow_multi_lines': False}, # \(...\)
# r'(?<!\\)(\\begin{([a-z]+?\*?)})(.+?)(\\end{\2})': {'allow_multi_lines': True}, # \begin...\end
# r'(?<!\\)(\$`)([^`]+)(`\$)': {'allow_multi_lines': False}, # $`...`$
}
matches = []
for pattern, property in mathpatterns.items():
flags = re.ASCII | re.DOTALL if property['allow_multi_lines'] else re.ASCII
matches.extend(re.findall(pattern, txt, flags))
if len(matches) == 0: return False
contain_any_eq = False
illegal_pattern = re.compile(r'[^\x00-\x7F]|echo')
for match in matches:
if len(match) != 3: return False
eq_canidate = match[1]
if illegal_pattern.search(eq_canidate):
return False
else:
contain_any_eq = True
return contain_any_eq
def fix_markdown_indent(txt):
# fix markdown indent
if (' - ' not in txt) or ('. ' not in txt):
return txt # do not need to fix, fast escape
# walk through the lines and fix non-standard indentation
lines = txt.split("\n")
pattern = re.compile(r'^\s+-')
activated = False
for i, line in enumerate(lines):
if line.startswith('- ') or line.startswith('1. '):
activated = True
if activated and pattern.match(line):
stripped_string = line.lstrip()
num_spaces = len(line) - len(stripped_string)
if (num_spaces % 4) == 3:
num_spaces_should_be = math.ceil(num_spaces / 4) * 4
lines[i] = ' ' * num_spaces_should_be + stripped_string
return '\n'.join(lines)
txt = fix_markdown_indent(txt)
txt = fix_code_segment_indent(txt)
if is_equation(txt): # 有$标识的公式符号,且没有代码段```的标识
# convert everything to html format
split = markdown.markdown(text='---')

查看文件

@@ -1,7 +1,7 @@
import os
"""
========================================================================
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
接驳void-terminal:
- set_conf: 在运行过程中动态地修改配置
- set_multi_conf: 在运行过程中动态地修改多个配置
@@ -9,17 +9,20 @@ import os
- get_plugin_default_kwargs: 获取插件的默认参数
- get_chat_handle: 获取简单聊天的句柄
- get_chat_default_kwargs: 获取简单聊天的默认参数
========================================================================
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
"""
def get_plugin_handle(plugin_name):
"""
e.g. plugin_name = 'crazy_functions.批量Markdown翻译->Markdown翻译指定语言'
"""
import importlib
assert '->' in plugin_name, \
"Example of plugin_name: crazy_functions.批量Markdown翻译->Markdown翻译指定语言"
module, fn_name = plugin_name.split('->')
assert (
"->" in plugin_name
), "Example of plugin_name: crazy_functions.批量Markdown翻译->Markdown翻译指定语言"
module, fn_name = plugin_name.split("->")
f_hot_reload = getattr(importlib.import_module(module, fn_name), fn_name)
return f_hot_reload
@@ -29,6 +32,7 @@ def get_chat_handle():
Get chat function
"""
from request_llms.bridge_all import predict_no_ui_long_connection
return predict_no_ui_long_connection
@@ -37,13 +41,14 @@ def get_plugin_default_kwargs():
Get Plugin Default Arguments
"""
from toolbox import ChatBotWithCookies, load_chat_cookies
cookies = load_chat_cookies()
llm_kwargs = {
'api_key': cookies['api_key'],
'llm_model': cookies['llm_model'],
'top_p': 1.0,
'max_length': None,
'temperature': 1.0,
"api_key": cookies["api_key"],
"llm_model": cookies["llm_model"],
"top_p": 1.0,
"max_length": None,
"temperature": 1.0,
}
chatbot = ChatBotWithCookies(llm_kwargs)
@@ -55,7 +60,7 @@ def get_plugin_default_kwargs():
"chatbot_with_cookie": chatbot,
"history": [],
"system_prompt": "You are a good AI.",
"web_port": None
"web_port": None,
}
return DEFAULT_FN_GROUPS_kwargs
@@ -65,13 +70,14 @@ def get_chat_default_kwargs():
Get Chat Default Arguments
"""
from toolbox import load_chat_cookies
cookies = load_chat_cookies()
llm_kwargs = {
'api_key': cookies['api_key'],
'llm_model': cookies['llm_model'],
'top_p': 1.0,
'max_length': None,
'temperature': 1.0,
"api_key": cookies["api_key"],
"llm_model": cookies["llm_model"],
"top_p": 1.0,
"max_length": None,
"temperature": 1.0,
}
default_chat_kwargs = {
"inputs": "Hello there, are you ready?",