Merge branch 'frontier'

这个提交包含在:
binary-husky
2023-11-07 11:40:27 +08:00
当前提交 5192d316f0
共有 91 个文件被更改,包括 1035 次插入624 次删除

查看文件

@@ -11,7 +11,7 @@ class PaperFileGroup():
self.sp_file_tag = []
# count_token
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

查看文件

@@ -11,7 +11,7 @@ class PaperFileGroup():
self.sp_file_tag = []
# count_token
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

查看文件

@@ -129,7 +129,7 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
else:
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
r = requests.get(url_tar, proxies=proxies)
with open(dst, 'wb+') as f:
f.write(r.content)

查看文件

@@ -20,7 +20,7 @@ class PluginMultiprocessManager():
self.system_prompt = system_prompt
self.web_port = web_port
self.alive = True
self.use_docker, = get_conf('AUTOGEN_USE_DOCKER')
self.use_docker = get_conf('AUTOGEN_USE_DOCKER')
# create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
timeout_seconds = 5*60

查看文件

@@ -5,7 +5,7 @@ import logging
def input_clipping(inputs, history, max_token_limit):
import numpy as np
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
@@ -63,7 +63,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
"""
import time
from concurrent.futures import ThreadPoolExecutor
from request_llm.bridge_all import predict_no_ui_long_connection
from request_llms.bridge_all import predict_no_ui_long_connection
# 用户反馈
chatbot.append([inputs_show_user, ""])
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
@@ -177,11 +177,11 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
"""
import time, random
from concurrent.futures import ThreadPoolExecutor
from request_llm.bridge_all import predict_no_ui_long_connection
from request_llms.bridge_all import predict_no_ui_long_connection
assert len(inputs_array) == len(history_array)
assert len(inputs_array) == len(sys_prompt_array)
if max_workers == -1: # 读取配置文件
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
try: max_workers = get_conf('DEFAULT_WORKER_NUM')
except: max_workers = 8
if max_workers <= 0: max_workers = 3
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
@@ -602,7 +602,7 @@ def get_files_from_everything(txt, type): # type='.md'
import requests
from toolbox import get_conf
from toolbox import get_log_folder, gen_time_str
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
try:
r = requests.get(txt, proxies=proxies)
except:

查看文件

@@ -165,7 +165,7 @@ class LatexPaperFileGroup():
self.sp_file_tag = []
# count_token
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num

查看文件

@@ -14,7 +14,7 @@ import math
class GROBID_OFFLINE_EXCEPTION(Exception): pass
def get_avail_grobid_url():
GROBID_URLS, = get_conf('GROBID_URLS')
GROBID_URLS = get_conf('GROBID_URLS')
if len(GROBID_URLS) == 0: return None
try:
_grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
@@ -103,7 +103,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi
inputs_show_user_array = []
# get_token_num
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info[llm_kwargs['llm_model']]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))

查看文件

@@ -1,7 +1,7 @@
from pydantic import BaseModel, Field
from typing import List
from toolbox import update_ui_lastest_msg, disable_auto_promotion
from request_llm.bridge_all import predict_no_ui_long_connection
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
import copy, json, pickle, os, sys, time

查看文件

@@ -1,13 +1,13 @@
from pydantic import BaseModel, Field
from typing import List
from toolbox import update_ui_lastest_msg, get_conf
from request_llm.bridge_all import predict_no_ui_long_connection
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.json_fns.pydantic_io import GptJsonIO
import copy, json, pickle, os, sys
def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
ALLOW_RESET_CONFIG, = get_conf('ALLOW_RESET_CONFIG')
ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
if not ALLOW_RESET_CONFIG:
yield from update_ui_lastest_msg(
lastmsg=f"当前配置不允许被修改如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
@@ -66,7 +66,7 @@ def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
)
def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
ALLOW_RESET_CONFIG, = get_conf('ALLOW_RESET_CONFIG')
ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
if not ALLOW_RESET_CONFIG:
yield from update_ui_lastest_msg(
lastmsg=f"当前配置不允许被修改如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",

查看文件

@@ -43,7 +43,7 @@ def download_arxiv_(url_pdf):
file_path = download_dir+title_str
print('下载中')
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
r = requests.get(requests_pdf_url, proxies=proxies)
with open(file_path, 'wb+') as f:
f.write(r.content)
@@ -77,7 +77,7 @@ def get_name(_url_):
# print('在缓存中')
# return arxiv_recall[_url_]
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
res = requests.get(_url_, proxies=proxies)
bs = BeautifulSoup(res.text, 'html.parser')

查看文件

@@ -5,9 +5,9 @@ import datetime
def gen_image(llm_kwargs, prompt, resolution="256x256"):
import requests, json, time, os
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
# Set up OpenAI API key and model
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']

查看文件

@@ -41,7 +41,7 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
return
# 检查当前的模型是否符合要求
API_URL_REDIRECT, = get_conf('API_URL_REDIRECT')
API_URL_REDIRECT = get_conf('API_URL_REDIRECT')
if len(API_URL_REDIRECT) > 0:
chatbot.append([f"处理任务: {txt}", f"暂不支持中转."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

查看文件

@@ -32,7 +32,7 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
print(file_content)
# private_upload里面的文件名在解压zip后容易出现乱码rar和7z格式正常,故可以只分析文章内容,不输入文件名
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
max_token = model_info[llm_kwargs['llm_model']]['max_token']
TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(

查看文件

@@ -41,7 +41,7 @@ def split_audio_file(filename, split_duration=1000):
def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
import os, requests
from moviepy.editor import AudioFileClip
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
# 设置OpenAI密钥和模型
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
@@ -79,7 +79,7 @@ def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
chatbot.append([f"{i} 发送到openai音频解析终端 (whisper),当前参数:{parse_prompt}", "正在处理 ..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
response = requests.post(url, headers=headers, files=files, data=data, proxies=proxies).text
chatbot.append(["音频解析结果", response])

查看文件

@@ -13,7 +13,7 @@ class PaperFileGroup():
self.sp_file_tag = []
# count_token
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
@@ -118,7 +118,7 @@ def get_files_from_everything(txt, preference=''):
if txt.startswith('http'):
import requests
from toolbox import get_conf
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
# 网络的远程文件
if preference == 'Github':
logging.info('正在从github下载资源 ...')

查看文件

@@ -21,7 +21,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
TOKEN_LIMIT_PER_FRAGMENT = 2500
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(

查看文件

@@ -95,7 +95,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
# 递归地切割PDF文件
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(

查看文件

@@ -19,7 +19,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
TOKEN_LIMIT_PER_FRAGMENT = 2500
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(

查看文件

@@ -2,7 +2,7 @@ from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
def google(query, proxies):
query = query # 在此处替换您要搜索的关键词
@@ -72,7 +72,7 @@ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
# ------------- < 第1步爬取搜索引擎的结果 > -------------
from toolbox import get_conf
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
urls = google(txt, proxies)
history = []
if len(urls) == 0:

查看文件

@@ -2,7 +2,7 @@ from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
def bing_search(query, proxies=None):
@@ -72,7 +72,7 @@ def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, histor
# ------------- < 第1步爬取搜索引擎的结果 > -------------
from toolbox import get_conf
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
urls = bing_search(txt, proxies)
history = []
if len(urls) == 0:

查看文件

@@ -48,7 +48,7 @@ from pydantic import BaseModel, Field
from typing import List
from toolbox import CatchException, update_ui, is_the_upload_folder
from toolbox import update_ui_lastest_msg, disable_auto_promotion
from request_llm.bridge_all import predict_no_ui_long_connection
from request_llms.bridge_all import predict_no_ui_long_connection
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from crazy_functions.crazy_utils import input_clipping
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError

查看文件

@@ -13,7 +13,7 @@ class PaperFileGroup():
self.sp_file_tag = []
# count_token
from request_llm.bridge_all import model_info
from request_llms.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(
enc.encode(txt, disallowed_special=()))

查看文件

@@ -2,7 +2,7 @@ from toolbox import update_ui
from toolbox import CatchException, get_conf, markdown_convertion
from crazy_functions.crazy_utils import input_clipping
from crazy_functions.agent_fns.watchdog import WatchDog
from request_llm.bridge_all import predict_no_ui_long_connection
from request_llms.bridge_all import predict_no_ui_long_connection
import threading, time
import numpy as np
from .live_audio.aliyunASR import AliyunASR

查看文件

@@ -17,7 +17,7 @@ def get_meta_information(url, chatbot, history):
from urllib.parse import urlparse
session = requests.session()
proxies, = get_conf('proxies')
proxies = get_conf('proxies')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'Accept-Encoding': 'gzip, deflate, br',