version 3.6

这个提交包含在:
binary-husky
2023-11-20 01:17:59 +08:00
父节点 971ac206f3
当前提交 17d0a32f36
共有 120 个文件被更改,包括 12092 次插入626 次删除

查看文件

@@ -7,10 +7,11 @@ import os
import gradio
import shutil
import glob
import math
from latex2mathml.converter import convert as tex2mathml
from functools import wraps, lru_cache
pj = os.path.join
default_user_name = 'default_user'
"""
========================================================================
第一部分
@@ -60,11 +61,16 @@ def ArgsGeneralWrapper(f):
txt_passon = txt
if txt == "" and txt2 != "": txt_passon = txt2
# 引入一个有cookie的chatbot
if request.username is not None:
user_name = request.username
else:
user_name = default_user_name
cookies.update({
'top_p':top_p,
'api_key': cookies['api_key'],
'llm_model': llm_model,
'temperature':temperature,
'user_name': user_name,
})
llm_kwargs = {
'api_key': cookies['api_key'],
@@ -151,13 +157,13 @@ def CatchException(f):
except Exception as e:
from check_proxy import check_proxy
from toolbox import get_conf
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
tb_str = '```\n' + trimmed_format_exc() + '```'
if len(chatbot_with_cookie) == 0:
chatbot_with_cookie.clear()
chatbot_with_cookie.append(["插件调度异常", "异常原因"])
chatbot_with_cookie[-1] = (chatbot_with_cookie[-1][0],
f"[Local Message] 实验性函数调用出错: \n\n{tb_str} \n\n当前代理可用性: \n\n{check_proxy(proxies)}")
f"[Local Message] 插件调用出错: \n\n{tb_str} \n\n当前代理可用性: \n\n{check_proxy(proxies)}")
yield from update_ui(chatbot=chatbot_with_cookie, history=history, msg=f'异常 {e}') # 刷新界面
return decorated
@@ -186,7 +192,7 @@ def HotReload(f):
其他小工具:
- write_history_to_file: 将结果写入markdown文件中
- regular_txt_to_markdown: 将普通文本转换为Markdown格式的文本。
- report_execption: 向chatbot中添加简单的意外错误信息
- report_exception: 向chatbot中添加简单的意外错误信息
- text_divide_paragraph: 将文本按照段落分隔符分割开,生成带有段落标签的HTML代码。
- markdown_convertion: 用多种方式组合,将markdown转化为好看的html
- format_io: 接管gradio默认的markdown处理方式
@@ -259,7 +265,7 @@ def regular_txt_to_markdown(text):
def report_execption(chatbot, history, a, b):
def report_exception(chatbot, history, a, b):
"""
向chatbot中添加错误信息
"""
@@ -278,9 +284,12 @@ def text_divide_paragraph(text):
if '```' in text:
# careful input
return pre + text + suf
return text
elif '</div>' in text:
# careful input
return text
else:
# wtf input
# whatever input
lines = text.split("\n")
for i, line in enumerate(lines):
lines[i] = lines[i].replace(" ", "&nbsp;")
@@ -372,6 +381,26 @@ def markdown_convertion(txt):
contain_any_eq = True
return contain_any_eq
def fix_markdown_indent(txt):
# fix markdown indent
if (' - ' not in txt) or ('. ' not in txt):
return txt # do not need to fix, fast escape
# walk through the lines and fix non-standard indentation
lines = txt.split("\n")
pattern = re.compile(r'^\s+-')
activated = False
for i, line in enumerate(lines):
if line.startswith('- ') or line.startswith('1. '):
activated = True
if activated and pattern.match(line):
stripped_string = line.lstrip()
num_spaces = len(line) - len(stripped_string)
if (num_spaces % 4) == 3:
num_spaces_should_be = math.ceil(num_spaces/4) * 4
lines[i] = ' ' * num_spaces_should_be + stripped_string
return '\n'.join(lines)
txt = fix_markdown_indent(txt)
if is_equation(txt): # 有$标识的公式符号,且没有代码段```的标识
# convert everything to html format
split = markdown.markdown(text='---')
@@ -513,40 +542,60 @@ def find_recent_files(directory):
return recent_files
def file_already_in_downloadzone(file, user_path):
try:
parent_path = os.path.abspath(user_path)
child_path = os.path.abspath(file)
if os.path.samefile(os.path.commonpath([parent_path, child_path]), parent_path):
return True
else:
return False
except:
return False
def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
# 将文件复制一份到下载区
import shutil
if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}'
new_path = pj(get_log_folder(), rename_file)
# 如果已经存在,先删除
if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path)
# 把文件复制过去
if not os.path.exists(new_path): shutil.copyfile(file, new_path)
# 将文件添加到chatbot cookie中,避免多用户干扰
if chatbot is not None:
user_name = get_user(chatbot)
else:
user_name = default_user_name
user_path = get_log_folder(user_name, plugin_name=None)
if file_already_in_downloadzone(file, user_path):
new_path = file
else:
user_path = get_log_folder(user_name, plugin_name='downloadzone')
if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}'
new_path = pj(user_path, rename_file)
# 如果已经存在,先删除
if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path)
# 把文件复制过去
if not os.path.exists(new_path): shutil.copyfile(file, new_path)
# 将文件添加到chatbot cookie中
if chatbot is not None:
if 'files_to_promote' in chatbot._cookies: current = chatbot._cookies['files_to_promote']
else: current = []
chatbot._cookies.update({'files_to_promote': [new_path] + current})
return new_path
def disable_auto_promotion(chatbot):
chatbot._cookies.update({'files_to_promote': []})
return
def is_the_upload_folder(string):
PATH_PRIVATE_UPLOAD, = get_conf('PATH_PRIVATE_UPLOAD')
pattern = r'^PATH_PRIVATE_UPLOAD/[A-Za-z0-9_-]+/\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}$'
pattern = pattern.replace('PATH_PRIVATE_UPLOAD', PATH_PRIVATE_UPLOAD)
if re.match(pattern, string): return True
else: return False
def del_outdated_uploads(outdate_time_seconds):
PATH_PRIVATE_UPLOAD, = get_conf('PATH_PRIVATE_UPLOAD')
def del_outdated_uploads(outdate_time_seconds, target_path_base=None):
if target_path_base is None:
user_upload_dir = get_conf('PATH_PRIVATE_UPLOAD')
else:
user_upload_dir = target_path_base
current_time = time.time()
one_hour_ago = current_time - outdate_time_seconds
# Get a list of all subdirectories in the PATH_PRIVATE_UPLOAD folder
# Get a list of all subdirectories in the user_upload_dir folder
# Remove subdirectories that are older than one hour
for subdirectory in glob.glob(f'{PATH_PRIVATE_UPLOAD}/*/*'):
for subdirectory in glob.glob(f'{user_upload_dir}/*'):
subdirectory_time = os.path.getmtime(subdirectory)
if subdirectory_time < one_hour_ago:
try: shutil.rmtree(subdirectory)
@@ -559,17 +608,16 @@ def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkbo
"""
if len(files) == 0:
return chatbot, txt
# 移除过时的旧文件从而节省空间&保护隐私
outdate_time_seconds = 60
del_outdated_uploads(outdate_time_seconds)
# 创建工作路径
user_name = "default" if not request.username else request.username
user_name = default_user_name if not request.username else request.username
time_tag = gen_time_str()
PATH_PRIVATE_UPLOAD, = get_conf('PATH_PRIVATE_UPLOAD')
target_path_base = pj(PATH_PRIVATE_UPLOAD, user_name, time_tag)
target_path_base = get_upload_folder(user_name, tag=time_tag)
os.makedirs(target_path_base, exist_ok=True)
# 移除过时的旧文件从而节省空间&保护隐私
outdate_time_seconds = 3600 # 一小时
del_outdated_uploads(outdate_time_seconds, get_upload_folder(user_name))
# 逐个文件转移到目标路径
upload_msg = ''
@@ -604,13 +652,14 @@ def on_file_uploaded(request: gradio.Request, files, chatbot, txt, txt2, checkbo
def on_report_generated(cookies, files, chatbot):
from toolbox import find_recent_files
PATH_LOGGING, = get_conf('PATH_LOGGING')
# from toolbox import find_recent_files
# PATH_LOGGING = get_conf('PATH_LOGGING')
if 'files_to_promote' in cookies:
report_files = cookies['files_to_promote']
cookies.pop('files_to_promote')
else:
report_files = find_recent_files(PATH_LOGGING)
report_files = []
# report_files = find_recent_files(PATH_LOGGING)
if len(report_files) == 0:
return cookies, None, chatbot
# files.extend(report_files)
@@ -621,13 +670,34 @@ def on_report_generated(cookies, files, chatbot):
def load_chat_cookies():
API_KEY, LLM_MODEL, AZURE_API_KEY = get_conf('API_KEY', 'LLM_MODEL', 'AZURE_API_KEY')
AZURE_CFG_ARRAY, NUM_CUSTOM_BASIC_BTN = get_conf('AZURE_CFG_ARRAY', 'NUM_CUSTOM_BASIC_BTN')
# deal with azure openai key
if is_any_api_key(AZURE_API_KEY):
if is_any_api_key(API_KEY): API_KEY = API_KEY + ',' + AZURE_API_KEY
else: API_KEY = AZURE_API_KEY
return {'api_key': API_KEY, 'llm_model': LLM_MODEL}
if len(AZURE_CFG_ARRAY) > 0:
for azure_model_name, azure_cfg_dict in AZURE_CFG_ARRAY.items():
if not azure_model_name.startswith('azure'):
raise ValueError("AZURE_CFG_ARRAY中配置的模型必须以azure开头")
AZURE_API_KEY_ = azure_cfg_dict["AZURE_API_KEY"]
if is_any_api_key(AZURE_API_KEY_):
if is_any_api_key(API_KEY): API_KEY = API_KEY + ',' + AZURE_API_KEY_
else: API_KEY = AZURE_API_KEY_
customize_fn_overwrite_ = {}
for k in range(NUM_CUSTOM_BASIC_BTN):
customize_fn_overwrite_.update({
"自定义按钮" + str(k+1):{
"Title": r"",
"Prefix": r"请在自定义菜单中定义提示词前缀.",
"Suffix": r"请在自定义菜单中定义提示词后缀",
}
})
return {'api_key': API_KEY, 'llm_model': LLM_MODEL, 'customize_fn_overwrite': customize_fn_overwrite_}
def is_openai_api_key(key):
CUSTOM_API_KEY_PATTERN, = get_conf('CUSTOM_API_KEY_PATTERN')
CUSTOM_API_KEY_PATTERN = get_conf('CUSTOM_API_KEY_PATTERN')
if len(CUSTOM_API_KEY_PATTERN) != 0:
API_MATCH_ORIGINAL = re.match(CUSTOM_API_KEY_PATTERN, key)
else:
@@ -762,6 +832,11 @@ def read_single_conf_with_lru_cache(arg):
r = getattr(importlib.import_module('config'), arg)
# 在读取API_KEY时,检查一下是不是忘了改config
if arg == 'API_URL_REDIRECT':
oai_rd = r.get("https://api.openai.com/v1/chat/completions", None) # API_URL_REDIRECT填写格式是错误的,请阅读`https://github.com/binary-husky/gpt_academic/wiki/项目配置说明`
if oai_rd and not oai_rd.endswith('/completions'):
print亮红( "\n\n[API_URL_REDIRECT] API_URL_REDIRECT填错了。请阅读`https://github.com/binary-husky/gpt_academic/wiki/项目配置说明`。如果您确信自己没填错,无视此消息即可。")
time.sleep(5)
if arg == 'API_KEY':
print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key,如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
print亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s),也可以在问题输入区输入临时的api-key(s),然后回车键提交后即可生效。")
@@ -786,6 +861,7 @@ def get_conf(*args):
for arg in args:
r = read_single_conf_with_lru_cache(arg)
res.append(r)
if len(res) == 1: return res[0]
return res
@@ -857,7 +933,7 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
直到历史记录的标记数量降低到阈值以下。
"""
import numpy as np
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
def get_token_num(txt):
return len(tokenizer.encode(txt, disallowed_special=()))
input_token_num = get_token_num(inputs)
@@ -946,12 +1022,35 @@ def gen_time_str():
import time
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
def get_log_folder(user='default', plugin_name='shared'):
PATH_LOGGING, = get_conf('PATH_LOGGING')
_dir = pj(PATH_LOGGING, user, plugin_name)
def get_log_folder(user=default_user_name, plugin_name='shared'):
if user is None: user = default_user_name
PATH_LOGGING = get_conf('PATH_LOGGING')
if plugin_name is None:
_dir = pj(PATH_LOGGING, user)
else:
_dir = pj(PATH_LOGGING, user, plugin_name)
if not os.path.exists(_dir): os.makedirs(_dir)
return _dir
def get_upload_folder(user=default_user_name, tag=None):
PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
if user is None: user = default_user_name
if tag is None or len(tag)==0:
target_path_base = pj(PATH_PRIVATE_UPLOAD, user)
else:
target_path_base = pj(PATH_PRIVATE_UPLOAD, user, tag)
return target_path_base
def is_the_upload_folder(string):
PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
pattern = r'^PATH_PRIVATE_UPLOAD[\\/][A-Za-z0-9_-]+[\\/]\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}$'
pattern = pattern.replace('PATH_PRIVATE_UPLOAD', PATH_PRIVATE_UPLOAD)
if re.match(pattern, string): return True
else: return False
def get_user(chatbotwithcookies):
return chatbotwithcookies._cookies.get('user_name', default_user_name)
class ProxyNetworkActivate():
"""
这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理
@@ -964,13 +1063,13 @@ class ProxyNetworkActivate():
else:
# 给定了task, 我们检查一下
from toolbox import get_conf
WHEN_TO_USE_PROXY, = get_conf('WHEN_TO_USE_PROXY')
WHEN_TO_USE_PROXY = get_conf('WHEN_TO_USE_PROXY')
self.valid = (task in WHEN_TO_USE_PROXY)
def __enter__(self):
if not self.valid: return self
from toolbox import get_conf
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
if 'no_proxy' in os.environ: os.environ.pop('no_proxy')
if proxies is not None:
if 'http' in proxies: os.environ['HTTP_PROXY'] = proxies['http']
@@ -1012,7 +1111,7 @@ def Singleton(cls):
"""
========================================================================
第四部分
接驳虚空终端:
接驳void-terminal:
- set_conf: 在运行过程中动态地修改配置
- set_multi_conf: 在运行过程中动态地修改多个配置
- get_plugin_handle: 获取插件的句柄
@@ -1027,7 +1126,7 @@ def set_conf(key, value):
read_single_conf_with_lru_cache.cache_clear()
get_conf.cache_clear()
os.environ[key] = str(value)
altered, = get_conf(key)
altered = get_conf(key)
return altered
def set_multi_conf(dic):
@@ -1048,20 +1147,17 @@ def get_plugin_handle(plugin_name):
def get_chat_handle():
"""
"""
from request_llm.bridge_all import predict_no_ui_long_connection
from request_llms.bridge_all import predict_no_ui_long_connection
return predict_no_ui_long_connection
def get_plugin_default_kwargs():
"""
"""
from toolbox import get_conf, ChatBotWithCookies
WEB_PORT, LLM_MODEL, API_KEY = \
get_conf('WEB_PORT', 'LLM_MODEL', 'API_KEY')
from toolbox import ChatBotWithCookies
cookies = load_chat_cookies()
llm_kwargs = {
'api_key': API_KEY,
'llm_model': LLM_MODEL,
'api_key': cookies['api_key'],
'llm_model': cookies['llm_model'],
'top_p':1.0,
'max_length': None,
'temperature':1.0,
@@ -1076,25 +1172,21 @@ def get_plugin_default_kwargs():
"chatbot_with_cookie": chatbot,
"history": [],
"system_prompt": "You are a good AI.",
"web_port": WEB_PORT
"web_port": None
}
return DEFAULT_FN_GROUPS_kwargs
def get_chat_default_kwargs():
"""
"""
from toolbox import get_conf
LLM_MODEL, API_KEY = get_conf('LLM_MODEL', 'API_KEY')
cookies = load_chat_cookies()
llm_kwargs = {
'api_key': API_KEY,
'llm_model': LLM_MODEL,
'api_key': cookies['api_key'],
'llm_model': cookies['llm_model'],
'top_p':1.0,
'max_length': None,
'temperature':1.0,
}
default_chat_kwargs = {
"inputs": "Hello there, are you ready?",
"llm_kwargs": llm_kwargs,
@@ -1106,3 +1198,12 @@ def get_chat_default_kwargs():
return default_chat_kwargs
def get_max_token(llm_kwargs):
from request_llms.bridge_all import model_info
return model_info[llm_kwargs['llm_model']]['max_token']
def check_packages(packages=[]):
import importlib.util
for p in packages:
spam_spec = importlib.util.find_spec(p)
if spam_spec is None: raise ModuleNotFoundError