优化Token溢出时的处理

这个提交包含在:
Your Name
2023-04-01 03:36:05 +08:00
父节点 17a18e99fa
当前提交 cb9404c4de
共有 9 个文件被更改,包括 4851 次插入66 次删除

查看文件

@@ -2,21 +2,21 @@ import markdown, mdtex2html, threading, importlib, traceback, importlib, inspect
from show_math import convert as convert_math
from functools import wraps
def get_reduce_token_percent(e):
def get_reduce_token_percent(text):
try:
# text = "maximum context length is 4097 tokens. However, your messages resulted in 4870 tokens"
pattern = r"(\d+)\s+tokens\b"
match = re.findall(pattern, text)
eps = 50 # 稍微留一点余地, 确保下次别再超过token
max_limit = float(match[0]) - eps
EXCEED_ALLO = 500 # 稍微留一点余地,否则在回复时会因余量太少出问题
max_limit = float(match[0]) - EXCEED_ALLO
current_tokens = float(match[1])
ratio = max_limit/current_tokens
assert ratio > 0 and ratio < 1
return ratio
return ratio, str(int(current_tokens-max_limit))
except:
return 0.5
return 0.5, '不详'
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=False):
def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], sys_prompt='', long_connection=True):
"""
调用简单的predict_no_ui接口,但是依然保留了些许界面心跳功能,当对话太长时,会自动采用二分法截断
i_say: 当前输入
@@ -45,19 +45,18 @@ def predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temp
break
except ConnectionAbortedError as token_exceeded_error:
# 尝试计算比例,尽可能多地保留文本
p_ratio = get_reduce_token_percent(str(token_exceeded_error))
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
if len(history) > 0:
history = [his[ int(len(his) *p_ratio): ] for his in history if his is not None]
mutable[1] = 'Warning! History conversation is too long, cut into half. '
else:
i_say = i_say[: int(len(i_say) *p_ratio) ]
mutable[1] = 'Warning! Input file is too long, cut into half. '
mutable[1] = f'警告,文本过长将进行截断,Token溢出数{n_exceed},截断比例:{(1-p_ratio):.0%}'
except TimeoutError as e:
mutable[0] = '[Local Message] Failed with timeout.'
mutable[0] = '[Local Message] 请求超时。'
raise TimeoutError
except Exception as e:
mutable[0] = f'[Local Message] Failed with {str(e)}.'
raise RuntimeError(f'[Local Message] Failed with {str(e)}.')
mutable[0] = f'[Local Message] 异常:{str(e)}.'
raise RuntimeError(f'[Local Message] 异常:{str(e)}.')
# 创建新线程发出http请求
thread_name = threading.Thread(target=mt, args=(i_say, history)); thread_name.start()
# 原来的线程则负责持续更新UI,实现一个超时倒计时,并等待新线程的任务完成
@@ -286,7 +285,7 @@ def on_report_generated(files, chatbot):
report_files = find_recent_files('gpt_log')
if len(report_files) == 0: return report_files, chatbot
# files.extend(report_files)
chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧文件上传区,请查收。'])
chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧文件上传区”(可能处于折叠状态),请查收。'])
return report_files, chatbot
def get_conf(*args):