镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 22:46:48 +00:00
比较提交
42 次代码提交
purge_prin
...
dynamic_pl
| 作者 | SHA1 | 提交日期 | |
|---|---|---|---|
|
|
d2dd5d3eb1 | ||
|
|
d152f62894 | ||
|
|
197287fc30 | ||
|
|
c37fcc9299 | ||
|
|
91f5e6b8f7 | ||
|
|
4f0851f703 | ||
|
|
2821f27756 | ||
|
|
180550b8f0 | ||
|
|
7497dcb852 | ||
|
|
23ef2ffb22 | ||
|
|
848d0f65c7 | ||
|
|
f0b0364f74 | ||
|
|
69f3755682 | ||
|
|
4727113243 | ||
|
|
310122f5a7 | ||
|
|
c83bf214d0 | ||
|
|
e34c49dce5 | ||
|
|
3890467c84 | ||
|
|
074b3c9828 | ||
|
|
b8e8457a01 | ||
|
|
2c93a24d7e | ||
|
|
e9af6ef3a0 | ||
|
|
5ae8981dbb | ||
|
|
adbed044e4 | ||
|
|
2fe5febaf0 | ||
|
|
f54d8e559a | ||
|
|
e68fc2bc69 | ||
|
|
f695d7f1da | ||
|
|
679352d896 | ||
|
|
12c9ab1e33 | ||
|
|
da4a5efc49 | ||
|
|
9ac450cfb6 | ||
|
|
172f9e220b | ||
|
|
a28b7d8475 | ||
|
|
7d3ed36899 | ||
|
|
a7bc5fa357 | ||
|
|
4f5dd9ebcf | ||
|
|
427feb99d8 | ||
|
|
a01ca93362 | ||
|
|
597c320808 | ||
|
|
18290fd138 | ||
|
|
0d0575a639 |
44
.github/workflows/build-with-jittorllms.yml
vendored
44
.github/workflows/build-with-jittorllms.yml
vendored
@@ -1,44 +0,0 @@
|
||||
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
|
||||
name: build-with-jittorllms
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'master'
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}_jittorllms
|
||||
|
||||
jobs:
|
||||
build-and-push-image:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
file: docs/GithubAction+JittorLLMs
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
@@ -1,14 +1,14 @@
|
||||
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
|
||||
name: build-with-all-capacity-beta
|
||||
name: build-with-latex-arm
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'master'
|
||||
- "master"
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}_with_all_capacity_beta
|
||||
IMAGE_NAME: ${{ github.repository }}_with_latex_arm
|
||||
|
||||
jobs:
|
||||
build-and-push-image:
|
||||
@@ -18,11 +18,17 @@ jobs:
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
@@ -35,10 +41,11 @@ jobs:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
file: docs/GithubAction+AllCapacityBeta
|
||||
platforms: linux/arm64
|
||||
file: docs/GithubAction+NoLocal+Latex
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -160,4 +160,6 @@ test.*
|
||||
temp.*
|
||||
objdump*
|
||||
*.min.*.js
|
||||
TODO
|
||||
TODO
|
||||
experimental_mods
|
||||
search_results
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
> [!IMPORTANT]
|
||||
> 2024.6.1: 版本3.80加入插件二级菜单功能(详见wiki)
|
||||
> 2024.10.10: 突发停电,紧急恢复了提供[whl包](https://drive.google.com/file/d/19U_hsLoMrjOlQSzYS3pzWX9fTzyusArP/view?usp=sharing)的文件服务器
|
||||
> 2024.10.8: 版本3.90加入对llama-index的初步支持,版本3.80加入插件二级菜单功能(详见wiki)
|
||||
> 2024.5.1: 加入Doc2x翻译PDF论文的功能,[查看详情](https://github.com/binary-husky/gpt_academic/wiki/Doc2x)
|
||||
> 2024.3.11: 全力支持Qwen、GLM、DeepseekCoder等中文大语言模型! SoVits语音克隆模块,[查看详情](https://www.bilibili.com/video/BV1Rp421S7tF/)
|
||||
> 2024.1.17: 安装依赖时,请选择`requirements.txt`中**指定的版本**。 安装命令:`pip install -r requirements.txt`。本项目完全开源免费,您可通过订阅[在线服务](https://github.com/binary-husky/gpt_academic/wiki/online)的方式鼓励本项目的发展。
|
||||
|
||||
134
check_proxy.py
134
check_proxy.py
@@ -1,48 +1,77 @@
|
||||
from loguru import logger
|
||||
|
||||
def check_proxy(proxies, return_ip=False):
|
||||
"""
|
||||
检查代理配置并返回结果。
|
||||
|
||||
Args:
|
||||
proxies (dict): 包含http和https代理配置的字典。
|
||||
return_ip (bool, optional): 是否返回代理的IP地址。默认为False。
|
||||
|
||||
Returns:
|
||||
str or None: 检查的结果信息或代理的IP地址(如果`return_ip`为True)。
|
||||
"""
|
||||
import requests
|
||||
proxies_https = proxies['https'] if proxies is not None else '无'
|
||||
ip = None
|
||||
try:
|
||||
response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4)
|
||||
response = requests.get("https://ipapi.co/json/", proxies=proxies, timeout=4) # ⭐ 执行GET请求以获取代理信息
|
||||
data = response.json()
|
||||
if 'country_name' in data:
|
||||
country = data['country_name']
|
||||
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
|
||||
if 'ip' in data: ip = data['ip']
|
||||
if 'ip' in data:
|
||||
ip = data['ip']
|
||||
elif 'error' in data:
|
||||
alternative, ip = _check_with_backup_source(proxies)
|
||||
alternative, ip = _check_with_backup_source(proxies) # ⭐ 调用备用方法检查代理配置
|
||||
if alternative is None:
|
||||
result = f"代理配置 {proxies_https}, 代理所在地:未知,IP查询频率受限"
|
||||
else:
|
||||
result = f"代理配置 {proxies_https}, 代理所在地:{alternative}"
|
||||
else:
|
||||
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
|
||||
|
||||
if not return_ip:
|
||||
print(result)
|
||||
logger.warning(result)
|
||||
return result
|
||||
else:
|
||||
return ip
|
||||
except:
|
||||
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效"
|
||||
if not return_ip:
|
||||
print(result)
|
||||
logger.warning(result)
|
||||
return result
|
||||
else:
|
||||
return ip
|
||||
|
||||
def _check_with_backup_source(proxies):
|
||||
"""
|
||||
通过备份源检查代理,并获取相应信息。
|
||||
|
||||
Args:
|
||||
proxies (dict): 包含代理信息的字典。
|
||||
|
||||
Returns:
|
||||
tuple: 代理信息(geo)和IP地址(ip)的元组。
|
||||
"""
|
||||
import random, string, requests
|
||||
random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
|
||||
try:
|
||||
res_json = requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json()
|
||||
res_json = requests.get(f"http://{random_string}.edns.ip-api.com/json", proxies=proxies, timeout=4).json() # ⭐ 执行代理检查和备份源请求
|
||||
return res_json['dns']['geo'], res_json['dns']['ip']
|
||||
except:
|
||||
return None, None
|
||||
|
||||
def backup_and_download(current_version, remote_version):
|
||||
"""
|
||||
一键更新协议:备份和下载
|
||||
一键更新协议:备份当前版本,下载远程版本并解压缩。
|
||||
|
||||
Args:
|
||||
current_version (str): 当前版本号。
|
||||
remote_version (str): 远程版本号。
|
||||
|
||||
Returns:
|
||||
str: 新版本目录的路径。
|
||||
"""
|
||||
from toolbox import get_conf
|
||||
import shutil
|
||||
@@ -59,7 +88,7 @@ def backup_and_download(current_version, remote_version):
|
||||
proxies = get_conf('proxies')
|
||||
try: r = requests.get('https://github.com/binary-husky/chatgpt_academic/archive/refs/heads/master.zip', proxies=proxies, stream=True)
|
||||
except: r = requests.get('https://public.agent-matrix.com/publish/master.zip', proxies=proxies, stream=True)
|
||||
zip_file_path = backup_dir+'/master.zip'
|
||||
zip_file_path = backup_dir+'/master.zip' # ⭐ 保存备份文件的路径
|
||||
with open(zip_file_path, 'wb+') as f:
|
||||
f.write(r.content)
|
||||
dst_path = new_version_dir
|
||||
@@ -75,6 +104,17 @@ def backup_and_download(current_version, remote_version):
|
||||
def patch_and_restart(path):
|
||||
"""
|
||||
一键更新协议:覆盖和重启
|
||||
|
||||
Args:
|
||||
path (str): 新版本代码所在的路径
|
||||
|
||||
注意事项:
|
||||
如果您的程序没有使用config_private.py私密配置文件,则会将config.py重命名为config_private.py以避免配置丢失。
|
||||
|
||||
更新流程:
|
||||
- 复制最新版本代码到当前目录
|
||||
- 更新pip包依赖
|
||||
- 如果更新失败,则提示手动安装依赖库并重启
|
||||
"""
|
||||
from distutils import dir_util
|
||||
import shutil
|
||||
@@ -82,33 +122,44 @@ def patch_and_restart(path):
|
||||
import sys
|
||||
import time
|
||||
import glob
|
||||
from shared_utils.colorful import print亮黄, print亮绿, print亮红
|
||||
# if not using config_private, move origin config.py as config_private.py
|
||||
from shared_utils.colorful import log亮黄, log亮绿, log亮红
|
||||
|
||||
if not os.path.exists('config_private.py'):
|
||||
print亮黄('由于您没有设置config_private.py私密配置,现将您的现有配置移动至config_private.py以防止配置丢失,',
|
||||
log亮黄('由于您没有设置config_private.py私密配置,现将您的现有配置移动至config_private.py以防止配置丢失,',
|
||||
'另外您可以随时在history子文件夹下找回旧版的程序。')
|
||||
shutil.copyfile('config.py', 'config_private.py')
|
||||
|
||||
path_new_version = glob.glob(path + '/*-master')[0]
|
||||
dir_util.copy_tree(path_new_version, './')
|
||||
print亮绿('代码已经更新,即将更新pip包依赖……')
|
||||
for i in reversed(range(5)): time.sleep(1); print(i)
|
||||
dir_util.copy_tree(path_new_version, './') # ⭐ 将最新版本代码复制到当前目录
|
||||
|
||||
log亮绿('代码已经更新,即将更新pip包依赖……')
|
||||
for i in reversed(range(5)): time.sleep(1); log亮绿(i)
|
||||
|
||||
try:
|
||||
import subprocess
|
||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])
|
||||
except:
|
||||
print亮红('pip包依赖安装出现问题,需要手动安装新增的依赖库 `python -m pip install -r requirements.txt`,然后在用常规的`python main.py`的方式启动。')
|
||||
print亮绿('更新完成,您可以随时在history子文件夹下找回旧版的程序,5s之后重启')
|
||||
print亮红('假如重启失败,您可能需要手动安装新增的依赖库 `python -m pip install -r requirements.txt`,然后在用常规的`python main.py`的方式启动。')
|
||||
print(' ------------------------------ -----------------------------------')
|
||||
for i in reversed(range(8)): time.sleep(1); print(i)
|
||||
os.execl(sys.executable, sys.executable, *sys.argv)
|
||||
log亮红('pip包依赖安装出现问题,需要手动安装新增的依赖库 `python -m pip install -r requirements.txt`,然后在用常规的`python main.py`的方式启动。')
|
||||
|
||||
log亮绿('更新完成,您可以随时在history子文件夹下找回旧版的程序,5s之后重启')
|
||||
log亮红('假如重启失败,您可能需要手动安装新增的依赖库 `python -m pip install -r requirements.txt`,然后在用常规的`python main.py`的方式启动。')
|
||||
log亮绿(' ------------------------------ -----------------------------------')
|
||||
|
||||
for i in reversed(range(8)): time.sleep(1); log亮绿(i)
|
||||
os.execl(sys.executable, sys.executable, *sys.argv) # 重启程序
|
||||
|
||||
|
||||
def get_current_version():
|
||||
"""
|
||||
获取当前的版本号。
|
||||
|
||||
Returns:
|
||||
str: 当前的版本号。如果无法获取版本号,则返回空字符串。
|
||||
"""
|
||||
import json
|
||||
try:
|
||||
with open('./version', 'r', encoding='utf8') as f:
|
||||
current_version = json.loads(f.read())['version']
|
||||
current_version = json.loads(f.read())['version'] # ⭐ 从读取的json数据中提取版本号
|
||||
except:
|
||||
current_version = ""
|
||||
return current_version
|
||||
@@ -117,6 +168,12 @@ def get_current_version():
|
||||
def auto_update(raise_error=False):
|
||||
"""
|
||||
一键更新协议:查询版本和用户意见
|
||||
|
||||
Args:
|
||||
raise_error (bool, optional): 是否在出错时抛出错误。默认为 False。
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
try:
|
||||
from toolbox import get_conf
|
||||
@@ -135,22 +192,22 @@ def auto_update(raise_error=False):
|
||||
current_version = f.read()
|
||||
current_version = json.loads(current_version)['version']
|
||||
if (remote_version - current_version) >= 0.01-1e-5:
|
||||
from shared_utils.colorful import print亮黄
|
||||
print亮黄(f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}')
|
||||
print('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
|
||||
from shared_utils.colorful import log亮黄
|
||||
log亮黄(f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}。{new_feature}') # ⭐ 在控制台打印新版本信息
|
||||
logger.info('(1)Github更新地址:\nhttps://github.com/binary-husky/chatgpt_academic\n')
|
||||
user_instruction = input('(2)是否一键更新代码(Y+回车=确认,输入其他/无输入+回车=不更新)?')
|
||||
if user_instruction in ['Y', 'y']:
|
||||
path = backup_and_download(current_version, remote_version)
|
||||
path = backup_and_download(current_version, remote_version) # ⭐ 备份并下载文件
|
||||
try:
|
||||
patch_and_restart(path)
|
||||
patch_and_restart(path) # ⭐ 执行覆盖并重启操作
|
||||
except:
|
||||
msg = '更新失败。'
|
||||
if raise_error:
|
||||
from toolbox import trimmed_format_exc
|
||||
msg += trimmed_format_exc()
|
||||
print(msg)
|
||||
logger.warning(msg)
|
||||
else:
|
||||
print('自动更新程序:已禁用')
|
||||
logger.info('自动更新程序:已禁用')
|
||||
return
|
||||
else:
|
||||
return
|
||||
@@ -159,10 +216,13 @@ def auto_update(raise_error=False):
|
||||
if raise_error:
|
||||
from toolbox import trimmed_format_exc
|
||||
msg += trimmed_format_exc()
|
||||
print(msg)
|
||||
logger.info(msg)
|
||||
|
||||
def warm_up_modules():
|
||||
print('正在执行一些模块的预热 ...')
|
||||
"""
|
||||
预热模块,加载特定模块并执行预热操作。
|
||||
"""
|
||||
logger.info('正在执行一些模块的预热 ...')
|
||||
from toolbox import ProxyNetworkActivate
|
||||
from request_llms.bridge_all import model_info
|
||||
with ProxyNetworkActivate("Warmup_Modules"):
|
||||
@@ -172,7 +232,17 @@ def warm_up_modules():
|
||||
enc.encode("模块预热", disallowed_special=())
|
||||
|
||||
def warm_up_vectordb():
|
||||
print('正在执行一些模块的预热 ...')
|
||||
"""
|
||||
执行一些模块的预热操作。
|
||||
|
||||
本函数主要用于执行一些模块的预热操作,确保在后续的流程中能够顺利运行。
|
||||
|
||||
⭐ 关键作用:预热模块
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.info('正在执行一些模块的预热 ...')
|
||||
from toolbox import ProxyNetworkActivate
|
||||
with ProxyNetworkActivate("Warmup_Modules"):
|
||||
import nltk
|
||||
@@ -184,4 +254,4 @@ if __name__ == '__main__':
|
||||
os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
||||
from toolbox import get_conf
|
||||
proxies = get_conf('proxies')
|
||||
check_proxy(proxies)
|
||||
check_proxy(proxies)
|
||||
@@ -57,9 +57,9 @@ EMBEDDING_MODEL = "text-embedding-3-small"
|
||||
# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview",
|
||||
# ]
|
||||
# --- --- --- ---
|
||||
# 此外,您还可以在接入one-api/vllm/ollama时,
|
||||
# 使用"one-api-*","vllm-*","ollama-*"前缀直接使用非标准方式接入的模型,例如
|
||||
# AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)"]
|
||||
# 此外,您还可以在接入one-api/vllm/ollama/Openroute时,
|
||||
# 使用"one-api-*","vllm-*","ollama-*","openrouter-*"前缀直接使用非标准方式接入的模型,例如
|
||||
# AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)","openrouter-openai/gpt-4o-mini","openrouter-openai/chatgpt-4o-latest"]
|
||||
# --- --- --- ---
|
||||
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ def get_core_functions():
|
||||
text_show_english=
|
||||
r"Below is a paragraph from an academic paper. Polish the writing to meet the academic style, "
|
||||
r"improve the spelling, grammar, clarity, concision and overall readability. When necessary, rewrite the whole sentence. "
|
||||
r"Firstly, you should provide the polished paragraph. "
|
||||
r"Firstly, you should provide the polished paragraph (in English). "
|
||||
r"Secondly, you should list all your modification and explain the reasons to do so in markdown table.",
|
||||
text_show_chinese=
|
||||
r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性,"
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
|
||||
from toolbox import trimmed_format_exc
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def get_crazy_functions():
|
||||
from crazy_functions.读文章写摘要 import 读文章写摘要
|
||||
from crazy_functions.生成函数注释 import 批量生成函数注释
|
||||
from crazy_functions.Rag_Interface import Rag问答
|
||||
from crazy_functions.SourceCode_Analyse import 解析项目本身
|
||||
from crazy_functions.SourceCode_Analyse import 解析一个Python项目
|
||||
from crazy_functions.SourceCode_Analyse import 解析一个Matlab项目
|
||||
@@ -17,7 +17,7 @@ def get_crazy_functions():
|
||||
from crazy_functions.SourceCode_Analyse import 解析一个前端项目
|
||||
from crazy_functions.高级功能函数模板 import 高阶功能模板函数
|
||||
from crazy_functions.高级功能函数模板 import Demo_Wrap
|
||||
from crazy_functions.Latex全文润色 import Latex英文润色
|
||||
from crazy_functions.Latex_Project_Polish import Latex英文润色
|
||||
from crazy_functions.询问多个大语言模型 import 同时问询
|
||||
from crazy_functions.SourceCode_Analyse import 解析一个Lua项目
|
||||
from crazy_functions.SourceCode_Analyse import 解析一个CSharp项目
|
||||
@@ -33,8 +33,8 @@ def get_crazy_functions():
|
||||
from crazy_functions.PDF_Translate import 批量翻译PDF文档
|
||||
from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
|
||||
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
|
||||
from crazy_functions.Latex全文润色 import Latex中文润色
|
||||
from crazy_functions.Latex全文润色 import Latex英文纠错
|
||||
from crazy_functions.Latex_Project_Polish import Latex中文润色
|
||||
from crazy_functions.Latex_Project_Polish import Latex英文纠错
|
||||
from crazy_functions.Markdown_Translate import Markdown中译英
|
||||
from crazy_functions.虚空终端 import 虚空终端
|
||||
from crazy_functions.生成多种Mermaid图表 import Mermaid_Gen
|
||||
@@ -49,15 +49,9 @@ def get_crazy_functions():
|
||||
from crazy_functions.Image_Generate import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2
|
||||
from crazy_functions.Image_Generate_Wrap import ImageGen_Wrap
|
||||
from crazy_functions.SourceCode_Comment import 注释Python项目
|
||||
from crazy_functions.SourceCode_Comment_Wrap import SourceCodeComment_Wrap
|
||||
|
||||
function_plugins = {
|
||||
"Rag智能召回": {
|
||||
"Group": "对话",
|
||||
"Color": "stop",
|
||||
"AsButton": False,
|
||||
"Info": "将问答数据记录到向量库中,作为长期参考。",
|
||||
"Function": HotReload(Rag问答),
|
||||
},
|
||||
"虚空终端": {
|
||||
"Group": "对话|编程|学术|智能体",
|
||||
"Color": "stop",
|
||||
@@ -78,6 +72,7 @@ def get_crazy_functions():
|
||||
"AsButton": False,
|
||||
"Info": "上传一系列python源文件(或者压缩包), 为这些代码添加docstring | 输入参数为路径",
|
||||
"Function": HotReload(注释Python项目),
|
||||
"Class": SourceCodeComment_Wrap,
|
||||
},
|
||||
"载入对话历史存档(先上传存档或输入路径)": {
|
||||
"Group": "对话",
|
||||
@@ -429,8 +424,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
# try:
|
||||
# from crazy_functions.联网的ChatGPT import 连接网络回答问题
|
||||
@@ -460,8 +455,8 @@ def get_crazy_functions():
|
||||
# }
|
||||
# )
|
||||
# except:
|
||||
# print(trimmed_format_exc())
|
||||
# print("Load function plugin failed")
|
||||
# logger.error(trimmed_format_exc())
|
||||
# logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.SourceCode_Analyse import 解析任意code项目
|
||||
@@ -479,8 +474,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
|
||||
@@ -498,8 +493,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
|
||||
|
||||
@@ -520,8 +515,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.数学动画生成manim import 动画生成
|
||||
@@ -538,8 +533,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.Markdown_Translate import Markdown翻译指定语言
|
||||
@@ -557,8 +552,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.知识库问答 import 知识库文件注入
|
||||
@@ -576,8 +571,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.知识库问答 import 读取知识库作答
|
||||
@@ -595,8 +590,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.交互功能函数模板 import 交互功能模板函数
|
||||
@@ -612,8 +607,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
|
||||
try:
|
||||
@@ -635,8 +630,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.批量翻译PDF文档_NOUGAT import 批量翻译PDF文档
|
||||
@@ -652,8 +647,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.函数动态生成 import 函数动态生成
|
||||
@@ -669,8 +664,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.多智能体 import 多智能体终端
|
||||
@@ -686,8 +681,8 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.互动小游戏 import 随机小游戏
|
||||
@@ -703,8 +698,33 @@ def get_crazy_functions():
|
||||
}
|
||||
)
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
print("Load function plugin failed")
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
try:
|
||||
from crazy_functions.Rag_Interface import Rag问答
|
||||
|
||||
function_plugins.update(
|
||||
{
|
||||
"Rag智能召回": {
|
||||
"Group": "对话",
|
||||
"Color": "stop",
|
||||
"AsButton": False,
|
||||
"Info": "将问答数据记录到向量库中,作为长期参考。",
|
||||
"Function": HotReload(Rag问答),
|
||||
},
|
||||
}
|
||||
)
|
||||
except:
|
||||
logger.error(trimmed_format_exc())
|
||||
logger.error("Load function plugin failed")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# try:
|
||||
# from crazy_functions.高级功能函数模板 import 测试图表渲染
|
||||
@@ -717,22 +737,9 @@ def get_crazy_functions():
|
||||
# }
|
||||
# })
|
||||
# except:
|
||||
# print(trimmed_format_exc())
|
||||
# logger.error(trimmed_format_exc())
|
||||
# print('Load function plugin failed')
|
||||
|
||||
# try:
|
||||
# from crazy_functions.chatglm微调工具 import 微调数据集生成
|
||||
# function_plugins.update({
|
||||
# "黑盒模型学习: 微调数据集生成 (先上传数据集)": {
|
||||
# "Color": "stop",
|
||||
# "AsButton": False,
|
||||
# "AdvancedArgs": True,
|
||||
# "ArgsReminder": "针对数据集输入(如 绿帽子*深蓝色衬衫*黑色运动裤)给出指令,例如您可以将以下命令复制到下方: --llm_to_learn=azure-gpt-3.5 --prompt_prefix='根据下面的服装类型提示,想象一个穿着者,对这个人外貌、身处的环境、内心世界、过去经历进行描写。要求:100字以内,用第二人称。' --system_prompt=''",
|
||||
# "Function": HotReload(微调数据集生成)
|
||||
# }
|
||||
# })
|
||||
# except:
|
||||
# print('Load function plugin failed')
|
||||
|
||||
"""
|
||||
设置默认值:
|
||||
|
||||
@@ -171,7 +171,7 @@ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
||||
system_prompt 给gpt的静默提醒
|
||||
user_request 当前用户的请求信息(IP地址等)
|
||||
"""
|
||||
from .crazy_utils import get_files_from_everything
|
||||
from crazy_functions.crazy_utils import get_files_from_everything
|
||||
success, file_manifest, _ = get_files_from_everything(txt, type='.html')
|
||||
|
||||
if not success:
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
|
||||
from toolbox import get_conf, update_ui
|
||||
from crazy_functions.Image_Generate import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2
|
||||
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
|
||||
|
||||
|
||||
|
||||
def update_js_plugin_info():
|
||||
# encode_plugin_info
|
||||
...
|
||||
|
||||
|
||||
|
||||
class ImageGen_Wrap(GptAcademicPluginTemplate):
|
||||
def __init__(self):
|
||||
"""
|
||||
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
|
||||
"""
|
||||
pass
|
||||
|
||||
def define_arg_selection_menu(self):
|
||||
"""
|
||||
定义插件的二级选项菜单
|
||||
|
||||
第一个参数,名称`main_input`,参数`type`声明这是一个文本框,文本框上方显示`title`,文本框内部显示`description`,`default_value`为默认值;
|
||||
第二个参数,名称`advanced_arg`,参数`type`声明这是一个文本框,文本框上方显示`title`,文本框内部显示`description`,`default_value`为默认值;
|
||||
|
||||
"""
|
||||
gui_definition = {
|
||||
"main_input":
|
||||
ArgProperty(title="输入图片描述", description="需要生成图像的文本描述,尽量使用英文", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
|
||||
"model_name":
|
||||
ArgProperty(title="模型", options=["DALLE2", "DALLE3"], default_value="DALLE3", description="无", type="dropdown").model_dump_json(),
|
||||
"resolution":
|
||||
ArgProperty(title="分辨率", options=["256x256(限DALLE2)", "512x512(限DALLE2)", "1024x1024", "1792x1024(限DALLE3)", "1024x1792(限DALLE3)"], default_value="1024x1024", description="无", type="dropdown").model_dump_json(),
|
||||
"quality (仅DALLE3生效)":
|
||||
ArgProperty(title="质量", options=["standard", "hd"], default_value="standard", description="无", type="dropdown").model_dump_json(),
|
||||
"style (仅DALLE3生效)":
|
||||
ArgProperty(title="风格", options=["vivid", "natural"], default_value="vivid", description="无", type="dropdown").model_dump_json(),
|
||||
}
|
||||
return gui_definition
|
||||
|
||||
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
"""
|
||||
执行插件
|
||||
"""
|
||||
# 分辨率
|
||||
resolution = plugin_kwargs["resolution"].replace("(限DALLE2)", "").replace("(限DALLE3)", "")
|
||||
|
||||
if plugin_kwargs["model_name"] == "DALLE2":
|
||||
plugin_kwargs["advanced_arg"] = resolution
|
||||
yield from 图片生成_DALLE2(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
|
||||
|
||||
elif plugin_kwargs["model_name"] == "DALLE3":
|
||||
quality = plugin_kwargs["quality (仅DALLE3生效)"]
|
||||
style = plugin_kwargs["style (仅DALLE3生效)"]
|
||||
plugin_kwargs["advanced_arg"] = f"{resolution}-{quality}-{style}"
|
||||
yield from 图片生成_DALLE3(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
|
||||
|
||||
else:
|
||||
chatbot.append([None, "抱歉,找不到该模型"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
@@ -30,7 +30,7 @@ def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", qual
|
||||
if style is not None:
|
||||
data['style'] = style
|
||||
response = requests.post(url, headers=headers, json=data, proxies=proxies)
|
||||
print(response.content)
|
||||
# logger.info(response.content)
|
||||
try:
|
||||
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
|
||||
except:
|
||||
@@ -76,7 +76,7 @@ def edit_image(llm_kwargs, prompt, image_path, resolution="1024x1024", model="da
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, files=files, proxies=proxies)
|
||||
print(response.content)
|
||||
# logger.info(response.content)
|
||||
try:
|
||||
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
|
||||
except:
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from toolbox import update_ui, trimmed_format_exc, get_conf, get_log_folder, promote_file_to_downloadzone, check_repeat_upload, map_file_to_sha256
|
||||
from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip_result, gen_time_str
|
||||
from functools import partial
|
||||
import glob, os, requests, time, json, tarfile
|
||||
from loguru import logger
|
||||
|
||||
import glob, os, requests, time, json, tarfile, threading
|
||||
|
||||
pj = os.path.join
|
||||
ARXIV_CACHE_DIR = get_conf("ARXIV_CACHE_DIR")
|
||||
@@ -136,25 +138,43 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
|
||||
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
|
||||
if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
|
||||
|
||||
url_tar = url_.replace('/abs/', '/e-print/')
|
||||
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
|
||||
extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
|
||||
os.makedirs(translation_dir, exist_ok=True)
|
||||
|
||||
# <-------------- download arxiv source file ------------->
|
||||
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
|
||||
dst = pj(translation_dir, arxiv_id + '.tar')
|
||||
if os.path.exists(dst):
|
||||
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
|
||||
os.makedirs(translation_dir, exist_ok=True)
|
||||
# <-------------- download arxiv source file ------------->
|
||||
|
||||
def fix_url_and_download():
|
||||
# for url_tar in [url_.replace('/abs/', '/e-print/'), url_.replace('/abs/', '/src/')]:
|
||||
for url_tar in [url_.replace('/abs/', '/src/'), url_.replace('/abs/', '/e-print/')]:
|
||||
proxies = get_conf('proxies')
|
||||
r = requests.get(url_tar, proxies=proxies)
|
||||
if r.status_code == 200:
|
||||
with open(dst, 'wb+') as f:
|
||||
f.write(r.content)
|
||||
return True
|
||||
return False
|
||||
|
||||
if os.path.exists(dst) and allow_cache:
|
||||
yield from update_ui_lastest_msg(f"调用缓存 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
|
||||
success = True
|
||||
else:
|
||||
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
|
||||
proxies = get_conf('proxies')
|
||||
r = requests.get(url_tar, proxies=proxies)
|
||||
with open(dst, 'wb+') as f:
|
||||
f.write(r.content)
|
||||
yield from update_ui_lastest_msg(f"开始下载 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
|
||||
success = fix_url_and_download()
|
||||
yield from update_ui_lastest_msg(f"下载完成 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
if not success:
|
||||
yield from update_ui_lastest_msg(f"下载失败 {arxiv_id}", chatbot=chatbot, history=history)
|
||||
raise tarfile.ReadError(f"论文下载失败 {arxiv_id}")
|
||||
|
||||
# <-------------- extract file ------------->
|
||||
yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
|
||||
from toolbox import extract_archive
|
||||
extract_archive(file_path=dst, dest_dir=extract_dst)
|
||||
try:
|
||||
extract_archive(file_path=dst, dest_dir=extract_dst)
|
||||
except tarfile.ReadError:
|
||||
os.remove(dst)
|
||||
raise tarfile.ReadError(f"论文下载失败")
|
||||
return extract_dst, arxiv_id
|
||||
|
||||
|
||||
@@ -178,7 +198,7 @@ def pdf2tex_project(pdf_file_path, plugin_kwargs):
|
||||
|
||||
if response.ok:
|
||||
pdf_id = response.json()["pdf_id"]
|
||||
print(f"PDF processing initiated. PDF ID: {pdf_id}")
|
||||
logger.info(f"PDF processing initiated. PDF ID: {pdf_id}")
|
||||
|
||||
# Step 2: Check processing status
|
||||
while True:
|
||||
@@ -186,12 +206,12 @@ def pdf2tex_project(pdf_file_path, plugin_kwargs):
|
||||
conversion_data = conversion_response.json()
|
||||
|
||||
if conversion_data["status"] == "completed":
|
||||
print("PDF processing completed.")
|
||||
logger.info("PDF processing completed.")
|
||||
break
|
||||
elif conversion_data["status"] == "error":
|
||||
print("Error occurred during processing.")
|
||||
logger.info("Error occurred during processing.")
|
||||
else:
|
||||
print(f"Processing status: {conversion_data['status']}")
|
||||
logger.info(f"Processing status: {conversion_data['status']}")
|
||||
time.sleep(5) # wait for a few seconds before checking again
|
||||
|
||||
# Step 3: Save results to local files
|
||||
@@ -206,7 +226,7 @@ def pdf2tex_project(pdf_file_path, plugin_kwargs):
|
||||
output_path = os.path.join(output_dir, output_name)
|
||||
with open(output_path, "wb") as output_file:
|
||||
output_file.write(response.content)
|
||||
print(f"tex.zip file saved at: {output_path}")
|
||||
logger.info(f"tex.zip file saved at: {output_path}")
|
||||
|
||||
import zipfile
|
||||
unzip_dir = os.path.join(output_dir, file_name_wo_dot)
|
||||
@@ -216,7 +236,7 @@ def pdf2tex_project(pdf_file_path, plugin_kwargs):
|
||||
return unzip_dir
|
||||
|
||||
else:
|
||||
print(f"Error sending PDF for processing. Status code: {response.status_code}")
|
||||
logger.error(f"Error sending PDF for processing. Status code: {response.status_code}")
|
||||
return None
|
||||
else:
|
||||
from crazy_functions.pdf_fns.parse_pdf_via_doc2x import 解析PDF_DOC2X_转Latex
|
||||
@@ -318,11 +338,17 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
||||
# <-------------- more requirements ------------->
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
more_req = plugin_kwargs.get("advanced_arg", "")
|
||||
no_cache = more_req.startswith("--no-cache")
|
||||
if no_cache: more_req.lstrip("--no-cache")
|
||||
|
||||
no_cache = ("--no-cache" in more_req)
|
||||
if no_cache: more_req = more_req.replace("--no-cache", "").strip()
|
||||
|
||||
allow_gptac_cloud_io = ("--allow-cloudio" in more_req) # 从云端下载翻译结果,以及上传翻译结果到云端
|
||||
if allow_gptac_cloud_io: more_req = more_req.replace("--allow-cloudio", "").strip()
|
||||
|
||||
allow_cache = not no_cache
|
||||
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
|
||||
|
||||
|
||||
# <-------------- check deps ------------->
|
||||
try:
|
||||
import glob, os, time, subprocess
|
||||
@@ -349,6 +375,20 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
# #################################################################
|
||||
if allow_gptac_cloud_io and arxiv_id:
|
||||
# 访问 GPTAC学术云,查询云端是否存在该论文的翻译版本
|
||||
from crazy_functions.latex_fns.latex_actions import check_gptac_cloud
|
||||
success, downloaded = check_gptac_cloud(arxiv_id, chatbot)
|
||||
if success:
|
||||
chatbot.append([
|
||||
f"检测到GPTAC云端存在翻译版本, 如果不满意翻译结果, 请禁用云端分享, 然后重新执行。",
|
||||
None
|
||||
])
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
return
|
||||
#################################################################
|
||||
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
@@ -386,14 +426,21 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
|
||||
# <-------------- zip PDF ------------->
|
||||
zip_res = zip_result(project_folder)
|
||||
if success:
|
||||
if allow_gptac_cloud_io and arxiv_id:
|
||||
# 如果用户允许,我们将翻译好的arxiv论文PDF上传到GPTAC学术云
|
||||
from crazy_functions.latex_fns.latex_actions import upload_to_gptac_cloud_if_user_allow
|
||||
threading.Thread(target=upload_to_gptac_cloud_if_user_allow,
|
||||
args=(chatbot, arxiv_id), daemon=True).start()
|
||||
|
||||
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
|
||||
yield from update_ui(chatbot=chatbot, history=history);
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
time.sleep(1) # 刷新界面
|
||||
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||
|
||||
else:
|
||||
chatbot.append((f"失败了",
|
||||
'虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux,请检查系统字体(见Github wiki) ...'))
|
||||
yield from update_ui(chatbot=chatbot, history=history);
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
time.sleep(1) # 刷新界面
|
||||
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
|
||||
|
||||
|
||||
@@ -30,6 +30,8 @@ class Arxiv_Localize(GptAcademicPluginTemplate):
|
||||
default_value="", type="string").model_dump_json(), # 高级参数输入区,自动同步
|
||||
"allow_cache":
|
||||
ArgProperty(title="是否允许从缓存中调取结果", options=["允许缓存", "从头执行"], default_value="允许缓存", description="无", type="dropdown").model_dump_json(),
|
||||
"allow_cloudio":
|
||||
ArgProperty(title="是否允许从GPTAC学术云下载(或者上传)翻译结果(仅针对Arxiv论文)", options=["允许", "禁止"], default_value="禁止", description="共享文献,互助互利", type="dropdown").model_dump_json(),
|
||||
}
|
||||
return gui_definition
|
||||
|
||||
@@ -38,9 +40,14 @@ class Arxiv_Localize(GptAcademicPluginTemplate):
|
||||
执行插件
|
||||
"""
|
||||
allow_cache = plugin_kwargs["allow_cache"]
|
||||
allow_cloudio = plugin_kwargs["allow_cloudio"]
|
||||
advanced_arg = plugin_kwargs["advanced_arg"]
|
||||
|
||||
if allow_cache == "从头执行": plugin_kwargs["advanced_arg"] = "--no-cache " + plugin_kwargs["advanced_arg"]
|
||||
|
||||
# 从云端下载翻译结果,以及上传翻译结果到云端;人人为我,我为人人。
|
||||
if allow_cloudio == "允许": plugin_kwargs["advanced_arg"] = "--allow-cloudio " + plugin_kwargs["advanced_arg"]
|
||||
|
||||
yield from Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from toolbox import update_ui, trimmed_format_exc, promote_file_to_downloadzone, get_log_folder
|
||||
from toolbox import CatchException, report_exception, write_history_to_file, zip_folder
|
||||
|
||||
from loguru import logger
|
||||
|
||||
class PaperFileGroup():
|
||||
def __init__(self):
|
||||
@@ -33,7 +33,7 @@ class PaperFileGroup():
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
|
||||
|
||||
print('Segmentation: done')
|
||||
logger.info('Segmentation: done')
|
||||
def merge_result(self):
|
||||
self.file_result = ["" for _ in range(len(self.file_paths))]
|
||||
for r, k in zip(self.sp_file_result, self.sp_file_index):
|
||||
@@ -56,7 +56,7 @@ class PaperFileGroup():
|
||||
|
||||
def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
|
||||
import time, os, re
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
|
||||
|
||||
# <-------- 读取Latex文件,删除其中的所有注释 ---------->
|
||||
@@ -122,7 +122,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
pfg.write_result()
|
||||
pfg.zip_result()
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
|
||||
# <-------- 整理结果,退出 ---------->
|
||||
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
|
||||
@@ -1,6 +1,6 @@
|
||||
from toolbox import update_ui, promote_file_to_downloadzone
|
||||
from toolbox import CatchException, report_exception, write_history_to_file
|
||||
fast_debug = False
|
||||
from loguru import logger
|
||||
|
||||
class PaperFileGroup():
|
||||
def __init__(self):
|
||||
@@ -33,11 +33,11 @@ class PaperFileGroup():
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
|
||||
|
||||
print('Segmentation: done')
|
||||
logger.info('Segmentation: done')
|
||||
|
||||
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
|
||||
import time, os, re
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
|
||||
# <-------- 读取Latex文件,删除其中的所有注释 ---------->
|
||||
pfg = PaperFileGroup()
|
||||
@@ -1,4 +1,5 @@
|
||||
import glob, shutil, os, re, logging
|
||||
import glob, shutil, os, re
|
||||
from loguru import logger
|
||||
from toolbox import update_ui, trimmed_format_exc, gen_time_str
|
||||
from toolbox import CatchException, report_exception, get_log_folder
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
@@ -34,7 +35,7 @@ class PaperFileGroup():
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.md")
|
||||
logging.info('Segmentation: done')
|
||||
logger.info('Segmentation: done')
|
||||
|
||||
def merge_result(self):
|
||||
self.file_result = ["" for _ in range(len(self.file_paths))]
|
||||
@@ -51,7 +52,7 @@ class PaperFileGroup():
|
||||
return manifest
|
||||
|
||||
def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
|
||||
# <-------- 读取Markdown文件,删除其中的所有注释 ---------->
|
||||
pfg = PaperFileGroup()
|
||||
@@ -64,7 +65,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
pfg.file_contents.append(file_content)
|
||||
|
||||
# <-------- 拆分过长的Markdown文件 ---------->
|
||||
pfg.run_file_split(max_token_limit=2048)
|
||||
pfg.run_file_split(max_token_limit=1024)
|
||||
n_split = len(pfg.sp_file_contents)
|
||||
|
||||
# <-------- 多线程翻译开始 ---------->
|
||||
@@ -106,7 +107,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
expected_f_name = plugin_kwargs['markdown_expected_output_path']
|
||||
shutil.copyfile(output_file, expected_f_name)
|
||||
except:
|
||||
logging.error(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
|
||||
# <-------- 整理结果,退出 ---------->
|
||||
create_report_file_name = gen_time_str() + f"-chatgpt.md"
|
||||
@@ -126,7 +127,7 @@ def get_files_from_everything(txt, preference=''):
|
||||
proxies = get_conf('proxies')
|
||||
# 网络的远程文件
|
||||
if preference == 'Github':
|
||||
logging.info('正在从github下载资源 ...')
|
||||
logger.info('正在从github下载资源 ...')
|
||||
if not txt.endswith('.md'):
|
||||
# Make a request to the GitHub API to retrieve the repository information
|
||||
url = txt.replace("https://github.com/", "https://api.github.com/repos/") + '/readme'
|
||||
|
||||
@@ -2,20 +2,7 @@ from toolbox import CatchException, update_ui, get_conf, get_log_folder, update_
|
||||
from crazy_functions.crazy_utils import input_clipping
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
VECTOR_STORE_TYPE = "Milvus"
|
||||
|
||||
if VECTOR_STORE_TYPE == "Milvus":
|
||||
try:
|
||||
from crazy_functions.rag_fns.milvus_worker import MilvusRagWorker as LlamaIndexRagWorker
|
||||
except:
|
||||
VECTOR_STORE_TYPE = "Simple"
|
||||
|
||||
if VECTOR_STORE_TYPE == "Simple":
|
||||
from crazy_functions.rag_fns.llama_index_worker import LlamaIndexRagWorker
|
||||
|
||||
|
||||
RAG_WORKER_REGISTER = {}
|
||||
|
||||
MAX_HISTORY_ROUND = 5
|
||||
MAX_CONTEXT_TOKEN_LIMIT = 4096
|
||||
REMEMBER_PREVIEW = 1000
|
||||
@@ -23,6 +10,16 @@ REMEMBER_PREVIEW = 1000
|
||||
@CatchException
|
||||
def Rag问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
|
||||
# import vector store lib
|
||||
VECTOR_STORE_TYPE = "Milvus"
|
||||
if VECTOR_STORE_TYPE == "Milvus":
|
||||
try:
|
||||
from crazy_functions.rag_fns.milvus_worker import MilvusRagWorker as LlamaIndexRagWorker
|
||||
except:
|
||||
VECTOR_STORE_TYPE = "Simple"
|
||||
if VECTOR_STORE_TYPE == "Simple":
|
||||
from crazy_functions.rag_fns.llama_index_worker import LlamaIndexRagWorker
|
||||
|
||||
# 1. we retrieve rag worker from global context
|
||||
user_name = chatbot.get_user()
|
||||
checkpoint_dir = get_log_folder(user_name, plugin_name='experimental_rag')
|
||||
|
||||
@@ -1,7 +1,13 @@
|
||||
import pickle, os, random
|
||||
from toolbox import CatchException, update_ui, get_conf, get_log_folder, update_ui_lastest_msg
|
||||
from crazy_functions.crazy_utils import input_clipping
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
import pickle, os
|
||||
from request_llms.bridge_all import predict_no_ui_long_connection
|
||||
from crazy_functions.json_fns.select_tool import structure_output, select_tool
|
||||
from pydantic import BaseModel, Field
|
||||
from loguru import logger
|
||||
from typing import List
|
||||
|
||||
|
||||
SOCIAL_NETWOK_WORKER_REGISTER = {}
|
||||
|
||||
@@ -9,7 +15,7 @@ class SocialNetwork():
|
||||
def __init__(self):
|
||||
self.people = []
|
||||
|
||||
class SocialNetworkWorker():
|
||||
class SaveAndLoad():
|
||||
def __init__(self, user_name, llm_kwargs, auto_load_checkpoint=True, checkpoint_dir=None) -> None:
|
||||
self.user_name = user_name
|
||||
self.checkpoint_dir = checkpoint_dir
|
||||
@@ -41,8 +47,105 @@ class SocialNetworkWorker():
|
||||
return SocialNetwork()
|
||||
|
||||
|
||||
class Friend(BaseModel):
|
||||
friend_name: str = Field(description="name of a friend")
|
||||
friend_description: str = Field(description="description of a friend (everything about this friend)")
|
||||
friend_relationship: str = Field(description="The relationship with a friend (e.g. friend, family, colleague)")
|
||||
|
||||
class FriendList(BaseModel):
|
||||
friends_list: List[Friend] = Field(description="The list of friends")
|
||||
|
||||
|
||||
class SocialNetworkWorker(SaveAndLoad):
|
||||
def ai_socail_advice(self, prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type):
|
||||
pass
|
||||
|
||||
def ai_remove_friend(self, prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type):
|
||||
pass
|
||||
|
||||
def ai_list_friends(self, prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type):
|
||||
pass
|
||||
|
||||
def ai_add_multi_friends(self, prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type):
|
||||
friend, err_msg = structure_output(
|
||||
txt=prompt,
|
||||
prompt="根据提示, 解析多个联系人的身份信息\n\n",
|
||||
err_msg=f"不能理解该联系人",
|
||||
run_gpt_fn=run_gpt_fn,
|
||||
pydantic_cls=FriendList
|
||||
)
|
||||
if friend.friends_list:
|
||||
for f in friend.friends_list:
|
||||
self.add_friend(f)
|
||||
msg = f"成功添加{len(friend.friends_list)}个联系人: {str(friend.friends_list)}"
|
||||
yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=0)
|
||||
|
||||
|
||||
def run(self, txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
prompt = txt
|
||||
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
|
||||
self.tools_to_select = {
|
||||
"SocialAdvice":{
|
||||
"explain_to_llm": "如果用户希望获取社交指导,调用SocialAdvice生成一些社交建议",
|
||||
"callback": self.ai_socail_advice,
|
||||
},
|
||||
"AddFriends":{
|
||||
"explain_to_llm": "如果用户给出了联系人,调用AddMultiFriends把联系人添加到数据库",
|
||||
"callback": self.ai_add_multi_friends,
|
||||
},
|
||||
"RemoveFriend":{
|
||||
"explain_to_llm": "如果用户希望移除某个联系人,调用RemoveFriend",
|
||||
"callback": self.ai_remove_friend,
|
||||
},
|
||||
"ListFriends":{
|
||||
"explain_to_llm": "如果用户列举联系人,调用ListFriends",
|
||||
"callback": self.ai_list_friends,
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
Explaination = '\n'.join([f'{k}: {v["explain_to_llm"]}' for k, v in self.tools_to_select.items()])
|
||||
class UserSociaIntention(BaseModel):
|
||||
intention_type: str = Field(
|
||||
description=
|
||||
f"The type of user intention. You must choose from {self.tools_to_select.keys()}.\n\n"
|
||||
f"Explaination:\n{Explaination}",
|
||||
default="SocialAdvice"
|
||||
)
|
||||
pydantic_cls_instance, err_msg = select_tool(
|
||||
prompt=txt,
|
||||
run_gpt_fn=run_gpt_fn,
|
||||
pydantic_cls=UserSociaIntention
|
||||
)
|
||||
except Exception as e:
|
||||
yield from update_ui_lastest_msg(
|
||||
lastmsg=f"无法理解用户意图 {err_msg}",
|
||||
chatbot=chatbot,
|
||||
history=history,
|
||||
delay=0
|
||||
)
|
||||
return
|
||||
|
||||
intention_type = pydantic_cls_instance.intention_type
|
||||
intention_callback = self.tools_to_select[pydantic_cls_instance.intention_type]['callback']
|
||||
yield from intention_callback(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, run_gpt_fn, intention_type)
|
||||
|
||||
|
||||
def add_friend(self, friend):
|
||||
# check whether the friend is already in the social network
|
||||
for f in self.social_network.people:
|
||||
if f.friend_name == friend.friend_name:
|
||||
f.friend_description = friend.friend_description
|
||||
f.friend_relationship = friend.friend_relationship
|
||||
logger.info(f"Repeated friend, update info: {friend}")
|
||||
return
|
||||
logger.info(f"Add a new friend: {friend}")
|
||||
self.social_network.people.append(friend)
|
||||
return
|
||||
|
||||
|
||||
@CatchException
|
||||
def I人助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request, num_day=5):
|
||||
def I人助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
|
||||
# 1. we retrieve worker from global context
|
||||
user_name = chatbot.get_user()
|
||||
@@ -58,8 +161,7 @@ def I人助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt,
|
||||
)
|
||||
|
||||
# 2. save
|
||||
social_network_worker.social_network.people.append("张三")
|
||||
yield from social_network_worker.run(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
|
||||
social_network_worker.save_to_checkpoint(checkpoint_dir)
|
||||
chatbot.append(["good", "work"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ from crazy_functions.crazy_utils import input_clipping
|
||||
|
||||
def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import os, copy
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
summary_batch_isolation = True
|
||||
inputs_array = []
|
||||
|
||||
@@ -6,7 +6,10 @@ from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_ver
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.agent_fns.python_comment_agent import PythonCodeComment
|
||||
from crazy_functions.diagram_fns.file_tree import FileNode
|
||||
from crazy_functions.agent_fns.watchdog import WatchDog
|
||||
from shared_utils.advanced_markdown_format import markdown_convertion_for_file
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
|
||||
@@ -24,12 +27,13 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
file_tree_struct.add_file(file_path, file_path)
|
||||
|
||||
# <第一步,逐个文件分析,多线程>
|
||||
lang = "" if not plugin_kwargs["use_chinese"] else " (you must use Chinese)"
|
||||
for index, fp in enumerate(file_manifest):
|
||||
# 读取文件
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
file_content = f.read()
|
||||
prefix = ""
|
||||
i_say = prefix + f'Please conclude the following source code at {os.path.relpath(fp, project_folder)} with only one sentence, the code is:\n```{file_content}```'
|
||||
i_say = prefix + f'Please conclude the following source code at {os.path.relpath(fp, project_folder)} with only one sentence{lang}, the code is:\n```{file_content}```'
|
||||
i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 请用一句话对下面的程序文件做一个整体概述: {fp}'
|
||||
# 装载请求内容
|
||||
MAX_TOKEN_SINGLE_FILE = 2560
|
||||
@@ -37,7 +41,7 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
inputs_array.append(i_say)
|
||||
inputs_show_user_array.append(i_say_show_user)
|
||||
history_array.append([])
|
||||
sys_prompt_array.append("You are a software architecture analyst analyzing a source code project. Do not dig into details, tell me what the code is doing in general. Your answer must be short, simple and clear.")
|
||||
sys_prompt_array.append(f"You are a software architecture analyst analyzing a source code project. Do not dig into details, tell me what the code is doing in general. Your answer must be short, simple and clear{lang}.")
|
||||
# 文件读取完成,对每一个源代码文件,生成一个请求线程,发送到大模型进行分析
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array = inputs_array,
|
||||
@@ -50,10 +54,20 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
)
|
||||
|
||||
# <第二步,逐个文件分析,生成带注释文件>
|
||||
tasks = ["" for _ in range(len(file_manifest))]
|
||||
def bark_fn(tasks):
|
||||
for i in range(len(tasks)): tasks[i] = "watchdog is dead"
|
||||
wd = WatchDog(timeout=10, bark_fn=lambda: bark_fn(tasks), interval=3, msg="ThreadWatcher timeout")
|
||||
wd.begin_watch()
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
executor = ThreadPoolExecutor(max_workers=get_conf('DEFAULT_WORKER_NUM'))
|
||||
def _task_multi_threading(i_say, gpt_say, fp, file_tree_struct):
|
||||
pcc = PythonCodeComment(llm_kwargs, language='English')
|
||||
def _task_multi_threading(i_say, gpt_say, fp, file_tree_struct, index):
|
||||
language = 'Chinese' if plugin_kwargs["use_chinese"] else 'English'
|
||||
def observe_window_update(x):
|
||||
if tasks[index] == "watchdog is dead":
|
||||
raise TimeoutError("ThreadWatcher: watchdog is dead")
|
||||
tasks[index] = x
|
||||
pcc = PythonCodeComment(llm_kwargs, plugin_kwargs, language=language, observe_window_update=observe_window_update)
|
||||
pcc.read_file(path=fp, brief=gpt_say)
|
||||
revised_path, revised_content = pcc.begin_comment_source_code(None, None)
|
||||
file_tree_struct.manifest[fp].revised_path = revised_path
|
||||
@@ -65,7 +79,8 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
with open("crazy_functions/agent_fns/python_comment_compare.html", 'r', encoding='utf-8') as f:
|
||||
html_template = f.read()
|
||||
warp = lambda x: "```python\n\n" + x + "\n\n```"
|
||||
from themes.theme import advanced_css
|
||||
from themes.theme import load_dynamic_theme
|
||||
_, advanced_css, _, _ = load_dynamic_theme("Default")
|
||||
html_template = html_template.replace("ADVANCED_CSS", advanced_css)
|
||||
html_template = html_template.replace("REPLACE_CODE_FILE_LEFT", pcc.get_markdown_block_in_html(markdown_convertion_for_file(warp(pcc.original_content))))
|
||||
html_template = html_template.replace("REPLACE_CODE_FILE_RIGHT", pcc.get_markdown_block_in_html(markdown_convertion_for_file(warp(revised_content))))
|
||||
@@ -73,17 +88,21 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
file_tree_struct.manifest[fp].compare_html = compare_html_path
|
||||
with open(compare_html_path, 'w', encoding='utf-8') as f:
|
||||
f.write(html_template)
|
||||
print('done 1')
|
||||
tasks[index] = ""
|
||||
|
||||
chatbot.append([None, f"正在处理:"])
|
||||
futures = []
|
||||
index = 0
|
||||
for i_say, gpt_say, fp in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], file_manifest):
|
||||
future = executor.submit(_task_multi_threading, i_say, gpt_say, fp, file_tree_struct)
|
||||
future = executor.submit(_task_multi_threading, i_say, gpt_say, fp, file_tree_struct, index)
|
||||
index += 1
|
||||
futures.append(future)
|
||||
|
||||
# <第三步,等待任务完成>
|
||||
cnt = 0
|
||||
while True:
|
||||
cnt += 1
|
||||
wd.feed()
|
||||
time.sleep(3)
|
||||
worker_done = [h.done() for h in futures]
|
||||
remain = len(worker_done) - sum(worker_done)
|
||||
@@ -92,14 +111,18 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
preview_html_list = []
|
||||
for done, fp in zip(worker_done, file_manifest):
|
||||
if not done: continue
|
||||
preview_html_list.append(file_tree_struct.manifest[fp].compare_html)
|
||||
if hasattr(file_tree_struct.manifest[fp], 'compare_html'):
|
||||
preview_html_list.append(file_tree_struct.manifest[fp].compare_html)
|
||||
else:
|
||||
logger.error(f"文件: {fp} 的注释结果未能成功")
|
||||
file_links = generate_file_link(preview_html_list)
|
||||
|
||||
yield from update_ui_lastest_msg(
|
||||
f"剩余源文件数量: {remain}.\n\n" +
|
||||
f"已完成的文件: {sum(worker_done)}.\n\n" +
|
||||
f"当前任务: <br/>{'<br/>'.join(tasks)}.<br/>" +
|
||||
f"剩余源文件数量: {remain}.<br/>" +
|
||||
f"已完成的文件: {sum(worker_done)}.<br/>" +
|
||||
file_links +
|
||||
"\n\n" +
|
||||
"<br/>" +
|
||||
''.join(['.']*(cnt % 10 + 1)
|
||||
), chatbot=chatbot, history=history, delay=0)
|
||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||
@@ -120,6 +143,7 @@ def 注释源代码(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
|
||||
@CatchException
|
||||
def 注释Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
plugin_kwargs["use_chinese"] = plugin_kwargs.get("use_chinese", False)
|
||||
import glob, os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
|
||||
from toolbox import get_conf, update_ui
|
||||
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
|
||||
from crazy_functions.SourceCode_Comment import 注释Python项目
|
||||
|
||||
class SourceCodeComment_Wrap(GptAcademicPluginTemplate):
|
||||
def __init__(self):
|
||||
"""
|
||||
请注意`execute`会执行在不同的线程中,因此您在定义和使用类变量时,应当慎之又慎!
|
||||
"""
|
||||
pass
|
||||
|
||||
def define_arg_selection_menu(self):
|
||||
"""
|
||||
定义插件的二级选项菜单
|
||||
"""
|
||||
gui_definition = {
|
||||
"main_input":
|
||||
ArgProperty(title="路径", description="程序路径(上传文件后自动填写)", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
|
||||
"use_chinese":
|
||||
ArgProperty(title="注释语言", options=["英文", "中文"], default_value="英文", description="无", type="dropdown").model_dump_json(),
|
||||
# "use_emoji":
|
||||
# ArgProperty(title="在注释中使用emoji", options=["禁止", "允许"], default_value="禁止", description="无", type="dropdown").model_dump_json(),
|
||||
}
|
||||
return gui_definition
|
||||
|
||||
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
"""
|
||||
执行插件
|
||||
"""
|
||||
if plugin_kwargs["use_chinese"] == "中文":
|
||||
plugin_kwargs["use_chinese"] = True
|
||||
else:
|
||||
plugin_kwargs["use_chinese"] = False
|
||||
|
||||
yield from 注释Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
|
||||
@@ -1,4 +1,5 @@
|
||||
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
|
||||
from loguru import logger
|
||||
|
||||
class EchoDemo(PluginMultiprocessManager):
|
||||
def subprocess_worker(self, child_conn):
|
||||
@@ -16,4 +17,4 @@ class EchoDemo(PluginMultiprocessManager):
|
||||
elif msg.cmd == "terminate":
|
||||
self.child_conn.send(PipeCom("done", ""))
|
||||
break
|
||||
print('[debug] subprocess_worker terminated')
|
||||
logger.info('[debug] subprocess_worker terminated')
|
||||
@@ -1,5 +1,6 @@
|
||||
from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
|
||||
from crazy_functions.agent_fns.watchdog import WatchDog
|
||||
from loguru import logger
|
||||
import time, os
|
||||
|
||||
class PipeCom:
|
||||
@@ -47,7 +48,7 @@ class PluginMultiprocessManager:
|
||||
def terminate(self):
|
||||
self.p.terminate()
|
||||
self.alive = False
|
||||
print("[debug] instance terminated")
|
||||
logger.info("[debug] instance terminated")
|
||||
|
||||
def subprocess_worker(self, child_conn):
|
||||
# ⭐⭐ run in subprocess
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
from toolbox import CatchException, update_ui
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from request_llms.bridge_all import predict_no_ui_long_connection
|
||||
import datetime
|
||||
import re
|
||||
import os
|
||||
from loguru import logger
|
||||
from textwrap import dedent
|
||||
from toolbox import CatchException, update_ui
|
||||
from request_llms.bridge_all import predict_no_ui_long_connection
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
# TODO: 解决缩进问题
|
||||
|
||||
find_function_end_prompt = '''
|
||||
@@ -66,6 +68,7 @@ Be aware:
|
||||
1. You must NOT modify the indent of code.
|
||||
2. You are NOT authorized to change or translate non-comment code, and you are NOT authorized to add empty lines either, toggle qu.
|
||||
3. Use {LANG} to add comments and docstrings. Do NOT translate Chinese that is already in the code.
|
||||
4. Besides adding a docstring, use the ⭐ symbol to annotate the most core and important line of code within the function, explaining its role.
|
||||
|
||||
------------------ Example ------------------
|
||||
INPUT:
|
||||
@@ -114,10 +117,66 @@ def zip_result(folder):
|
||||
'''
|
||||
|
||||
|
||||
revise_funtion_prompt_chinese = '''
|
||||
您需要阅读以下代码,并根据以下说明修订源代码({FILE_BASENAME}):
|
||||
1. 如果源代码中包含函数的话, 你应该分析给定函数实现了什么功能
|
||||
2. 如果源代码中包含函数的话, 你需要为函数添加docstring, docstring必须使用中文
|
||||
|
||||
请注意:
|
||||
1. 你不得修改代码的缩进
|
||||
2. 你无权更改或翻译代码中的非注释部分,也不允许添加空行
|
||||
3. 使用 {LANG} 添加注释和文档字符串。不要翻译代码中已有的中文
|
||||
4. 除了添加docstring之外, 使用⭐符号给该函数中最核心、最重要的一行代码添加注释,并说明其作用
|
||||
|
||||
------------------ 示例 ------------------
|
||||
INPUT:
|
||||
```
|
||||
L0000 |
|
||||
L0001 |def zip_result(folder):
|
||||
L0002 | t = gen_time_str()
|
||||
L0003 | zip_folder(folder, get_log_folder(), f"result.zip")
|
||||
L0004 | return os.path.join(get_log_folder(), f"result.zip")
|
||||
L0005 |
|
||||
L0006 |
|
||||
```
|
||||
|
||||
OUTPUT:
|
||||
|
||||
<instruction_1_purpose>
|
||||
该函数用于压缩指定文件夹,并返回生成的`zip`文件的路径。
|
||||
</instruction_1_purpose>
|
||||
<instruction_2_revised_code>
|
||||
```
|
||||
def zip_result(folder):
|
||||
"""
|
||||
该函数将指定的文件夹压缩成ZIP文件, 并将其存储在日志文件夹中。
|
||||
|
||||
输入参数:
|
||||
folder (str): 需要压缩的文件夹的路径。
|
||||
返回值:
|
||||
str: 日志文件夹中创建的ZIP文件的路径。
|
||||
"""
|
||||
t = gen_time_str()
|
||||
zip_folder(folder, get_log_folder(), f"result.zip") # ⭐ 执行文件夹的压缩
|
||||
return os.path.join(get_log_folder(), f"result.zip")
|
||||
```
|
||||
</instruction_2_revised_code>
|
||||
------------------ End of Example ------------------
|
||||
|
||||
|
||||
------------------ the real INPUT you need to process NOW ({FILE_BASENAME}) ------------------
|
||||
```
|
||||
{THE_CODE}
|
||||
```
|
||||
{INDENT_REMINDER}
|
||||
{BRIEF_REMINDER}
|
||||
{HINT_REMINDER}
|
||||
'''
|
||||
|
||||
|
||||
class PythonCodeComment():
|
||||
|
||||
def __init__(self, llm_kwargs, language) -> None:
|
||||
def __init__(self, llm_kwargs, plugin_kwargs, language, observe_window_update) -> None:
|
||||
self.original_content = ""
|
||||
self.full_context = []
|
||||
self.full_context_with_line_no = []
|
||||
@@ -125,7 +184,13 @@ class PythonCodeComment():
|
||||
self.page_limit = 100 # 100 lines of code each page
|
||||
self.ignore_limit = 20
|
||||
self.llm_kwargs = llm_kwargs
|
||||
self.plugin_kwargs = plugin_kwargs
|
||||
self.language = language
|
||||
self.observe_window_update = observe_window_update
|
||||
if self.language == "chinese":
|
||||
self.core_prompt = revise_funtion_prompt_chinese
|
||||
else:
|
||||
self.core_prompt = revise_funtion_prompt
|
||||
self.path = None
|
||||
self.file_basename = None
|
||||
self.file_brief = ""
|
||||
@@ -256,7 +321,7 @@ class PythonCodeComment():
|
||||
hint_reminder = "" if hint is None else f"(Reminder: do not ignore or modify code such as `{hint}`, provide complete code in the OUTPUT.)"
|
||||
self.llm_kwargs['temperature'] = 0
|
||||
result = predict_no_ui_long_connection(
|
||||
inputs=revise_funtion_prompt.format(
|
||||
inputs=self.core_prompt.format(
|
||||
LANG=self.language,
|
||||
FILE_BASENAME=self.file_basename,
|
||||
THE_CODE=code,
|
||||
@@ -346,6 +411,7 @@ class PythonCodeComment():
|
||||
try:
|
||||
# yield from update_ui_lastest_msg(f"({self.file_basename}) 正在读取下一段代码片段:\n", chatbot=chatbot, history=history, delay=0)
|
||||
next_batch, line_no_start, line_no_end = self.get_next_batch()
|
||||
self.observe_window_update(f"正在处理{self.file_basename} - {line_no_start}/{len(self.full_context)}\n")
|
||||
# yield from update_ui_lastest_msg(f"({self.file_basename}) 处理代码片段:\n\n{next_batch}", chatbot=chatbot, history=history, delay=0)
|
||||
|
||||
hint = None
|
||||
@@ -355,7 +421,7 @@ class PythonCodeComment():
|
||||
try:
|
||||
successful, hint = self.verify_successful(next_batch, result)
|
||||
except Exception as e:
|
||||
print('ignored exception:\n' + str(e))
|
||||
logger.error('ignored exception:\n' + str(e))
|
||||
break
|
||||
if successful:
|
||||
break
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import threading, time
|
||||
from loguru import logger
|
||||
|
||||
class WatchDog():
|
||||
def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
|
||||
@@ -13,7 +14,7 @@ class WatchDog():
|
||||
while True:
|
||||
if self.kill_dog: break
|
||||
if time.time() - self.last_feed > self.timeout:
|
||||
if len(self.msg) > 0: print(self.msg)
|
||||
if len(self.msg) > 0: logger.info(self.msg)
|
||||
self.bark_fn()
|
||||
break
|
||||
time.sleep(self.interval)
|
||||
|
||||
@@ -1,39 +1,47 @@
|
||||
import ast
|
||||
import token
|
||||
import tokenize
|
||||
import copy
|
||||
import io
|
||||
|
||||
class CommentRemover(ast.NodeTransformer):
|
||||
def visit_FunctionDef(self, node):
|
||||
# 移除函数的文档字符串
|
||||
if (node.body and isinstance(node.body[0], ast.Expr) and
|
||||
isinstance(node.body[0].value, ast.Str)):
|
||||
node.body = node.body[1:]
|
||||
self.generic_visit(node)
|
||||
return node
|
||||
|
||||
def visit_ClassDef(self, node):
|
||||
# 移除类的文档字符串
|
||||
if (node.body and isinstance(node.body[0], ast.Expr) and
|
||||
isinstance(node.body[0].value, ast.Str)):
|
||||
node.body = node.body[1:]
|
||||
self.generic_visit(node)
|
||||
return node
|
||||
def remove_python_comments(input_source: str) -> str:
|
||||
source_flag = copy.copy(input_source)
|
||||
source = io.StringIO(input_source)
|
||||
ls = input_source.split('\n')
|
||||
prev_toktype = token.INDENT
|
||||
readline = source.readline
|
||||
|
||||
def visit_Module(self, node):
|
||||
# 移除模块的文档字符串
|
||||
if (node.body and isinstance(node.body[0], ast.Expr) and
|
||||
isinstance(node.body[0].value, ast.Str)):
|
||||
node.body = node.body[1:]
|
||||
self.generic_visit(node)
|
||||
return node
|
||||
|
||||
def get_char_index(lineno, col):
|
||||
# find the index of the char in the source code
|
||||
if lineno == 1:
|
||||
return len('\n'.join(ls[:(lineno-1)])) + col
|
||||
else:
|
||||
return len('\n'.join(ls[:(lineno-1)])) + col + 1
|
||||
|
||||
def replace_char_between(start_lineno, start_col, end_lineno, end_col, source, replace_char, ls):
|
||||
# replace char between start_lineno, start_col and end_lineno, end_col with replace_char, but keep '\n' and ' '
|
||||
b = get_char_index(start_lineno, start_col)
|
||||
e = get_char_index(end_lineno, end_col)
|
||||
for i in range(b, e):
|
||||
if source[i] == '\n':
|
||||
source = source[:i] + '\n' + source[i+1:]
|
||||
elif source[i] == ' ':
|
||||
source = source[:i] + ' ' + source[i+1:]
|
||||
else:
|
||||
source = source[:i] + replace_char + source[i+1:]
|
||||
return source
|
||||
|
||||
tokgen = tokenize.generate_tokens(readline)
|
||||
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
|
||||
if toktype == token.STRING and (prev_toktype == token.INDENT):
|
||||
source_flag = replace_char_between(slineno, scol, elineno, ecol, source_flag, ' ', ls)
|
||||
elif toktype == token.STRING and (prev_toktype == token.NEWLINE):
|
||||
source_flag = replace_char_between(slineno, scol, elineno, ecol, source_flag, ' ', ls)
|
||||
elif toktype == tokenize.COMMENT:
|
||||
source_flag = replace_char_between(slineno, scol, elineno, ecol, source_flag, ' ', ls)
|
||||
prev_toktype = toktype
|
||||
return source_flag
|
||||
|
||||
def remove_python_comments(source_code):
|
||||
# 解析源代码为 AST
|
||||
tree = ast.parse(source_code)
|
||||
# 移除注释
|
||||
transformer = CommentRemover()
|
||||
tree = transformer.visit(tree)
|
||||
# 将处理后的 AST 转换回源代码
|
||||
return ast.unparse(tree)
|
||||
|
||||
# 示例使用
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,141 +0,0 @@
|
||||
from toolbox import CatchException, update_ui, promote_file_to_downloadzone
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
import datetime, json
|
||||
|
||||
def fetch_items(list_of_items, batch_size):
|
||||
for i in range(0, len(list_of_items), batch_size):
|
||||
yield list_of_items[i:i + batch_size]
|
||||
|
||||
def string_to_options(arguments):
|
||||
import argparse
|
||||
import shlex
|
||||
|
||||
# Create an argparse.ArgumentParser instance
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
# Add command-line arguments
|
||||
parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo")
|
||||
parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='')
|
||||
parser.add_argument("--system_prompt", type=str, help="System prompt", default='')
|
||||
parser.add_argument("--batch", type=int, help="System prompt", default=50)
|
||||
parser.add_argument("--pre_seq_len", type=int, help="pre_seq_len", default=50)
|
||||
parser.add_argument("--learning_rate", type=float, help="learning_rate", default=2e-2)
|
||||
parser.add_argument("--num_gpus", type=int, help="num_gpus", default=1)
|
||||
parser.add_argument("--json_dataset", type=str, help="json_dataset", default="")
|
||||
parser.add_argument("--ptuning_directory", type=str, help="ptuning_directory", default="")
|
||||
|
||||
|
||||
|
||||
# Parse the arguments
|
||||
args = parser.parse_args(shlex.split(arguments))
|
||||
|
||||
return args
|
||||
|
||||
@CatchException
|
||||
def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
user_request 当前用户的请求信息(IP地址等)
|
||||
"""
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
args = plugin_kwargs.get("advanced_arg", None)
|
||||
if args is None:
|
||||
chatbot.append(("没给定指令", "退出"))
|
||||
yield from update_ui(chatbot=chatbot, history=history); return
|
||||
else:
|
||||
arguments = string_to_options(arguments=args)
|
||||
|
||||
dat = []
|
||||
with open(txt, 'r', encoding='utf8') as f:
|
||||
for line in f.readlines():
|
||||
json_dat = json.loads(line)
|
||||
dat.append(json_dat["content"])
|
||||
|
||||
llm_kwargs['llm_model'] = arguments.llm_to_learn
|
||||
for batch in fetch_items(dat, arguments.batch):
|
||||
res = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array=[f"{arguments.prompt_prefix}\n\n{b}" for b in (batch)],
|
||||
inputs_show_user_array=[f"Show Nothing" for _ in (batch)],
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[[] for _ in (batch)],
|
||||
sys_prompt_array=[arguments.system_prompt for _ in (batch)],
|
||||
max_workers=10 # OpenAI所允许的最大并行过载
|
||||
)
|
||||
|
||||
with open(txt+'.generated.json', 'a+', encoding='utf8') as f:
|
||||
for b, r in zip(batch, res[1::2]):
|
||||
f.write(json.dumps({"content":b, "summary":r}, ensure_ascii=False)+'\n')
|
||||
|
||||
promote_file_to_downloadzone(txt+'.generated.json', rename_file='generated.json', chatbot=chatbot)
|
||||
return
|
||||
|
||||
|
||||
|
||||
@CatchException
|
||||
def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
user_request 当前用户的请求信息(IP地址等)
|
||||
"""
|
||||
import subprocess
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
args = plugin_kwargs.get("advanced_arg", None)
|
||||
if args is None:
|
||||
chatbot.append(("没给定指令", "退出"))
|
||||
yield from update_ui(chatbot=chatbot, history=history); return
|
||||
else:
|
||||
arguments = string_to_options(arguments=args)
|
||||
|
||||
|
||||
|
||||
pre_seq_len = arguments.pre_seq_len # 128
|
||||
learning_rate = arguments.learning_rate # 2e-2
|
||||
num_gpus = arguments.num_gpus # 1
|
||||
json_dataset = arguments.json_dataset # 't_code.json'
|
||||
ptuning_directory = arguments.ptuning_directory # '/home/hmp/ChatGLM2-6B/ptuning'
|
||||
|
||||
command = f"torchrun --standalone --nnodes=1 --nproc-per-node={num_gpus} main.py \
|
||||
--do_train \
|
||||
--train_file AdvertiseGen/{json_dataset} \
|
||||
--validation_file AdvertiseGen/{json_dataset} \
|
||||
--preprocessing_num_workers 20 \
|
||||
--prompt_column content \
|
||||
--response_column summary \
|
||||
--overwrite_cache \
|
||||
--model_name_or_path THUDM/chatglm2-6b \
|
||||
--output_dir output/clothgen-chatglm2-6b-pt-{pre_seq_len}-{learning_rate} \
|
||||
--overwrite_output_dir \
|
||||
--max_source_length 256 \
|
||||
--max_target_length 256 \
|
||||
--per_device_train_batch_size 1 \
|
||||
--per_device_eval_batch_size 1 \
|
||||
--gradient_accumulation_steps 16 \
|
||||
--predict_with_generate \
|
||||
--max_steps 100 \
|
||||
--logging_steps 10 \
|
||||
--save_steps 20 \
|
||||
--learning_rate {learning_rate} \
|
||||
--pre_seq_len {pre_seq_len} \
|
||||
--quantization_bit 4"
|
||||
|
||||
process = subprocess.Popen(command, shell=True, cwd=ptuning_directory)
|
||||
try:
|
||||
process.communicate(timeout=3600*24)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
return
|
||||
@@ -1,8 +1,8 @@
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
|
||||
from shared_utils.char_visual_effect import scolling_visual_effect
|
||||
import threading
|
||||
import os
|
||||
import logging
|
||||
import threading
|
||||
from loguru import logger
|
||||
from shared_utils.char_visual_effect import scolling_visual_effect
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
|
||||
|
||||
def input_clipping(inputs, history, max_token_limit, return_clip_flags=False):
|
||||
"""
|
||||
@@ -133,7 +133,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
||||
except:
|
||||
# 【第三种情况】:其他错误:重试几次
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
print(tb_str)
|
||||
logger.error(tb_str)
|
||||
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
if retry_op > 0:
|
||||
retry_op -= 1
|
||||
@@ -283,7 +283,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
# 【第三种情况】:其他错误
|
||||
if detect_timeout(): raise RuntimeError("检测到程序终止。")
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
print(tb_str)
|
||||
logger.error(tb_str)
|
||||
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
||||
if retry_op > 0:
|
||||
@@ -378,7 +378,7 @@ def read_and_clean_pdf_text(fp):
|
||||
import fitz, copy
|
||||
import re
|
||||
import numpy as np
|
||||
from shared_utils.colorful import print亮黄, print亮绿
|
||||
# from shared_utils.colorful import print亮黄, print亮绿
|
||||
fc = 0 # Index 0 文本
|
||||
fs = 1 # Index 1 字体
|
||||
fb = 2 # Index 2 框框
|
||||
@@ -595,7 +595,7 @@ class nougat_interface():
|
||||
def nougat_with_timeout(self, command, cwd, timeout=3600):
|
||||
import subprocess
|
||||
from toolbox import ProxyNetworkActivate
|
||||
logging.info(f'正在执行命令 {command}')
|
||||
logger.info(f'正在执行命令 {command}')
|
||||
with ProxyNetworkActivate("Nougat_Download"):
|
||||
process = subprocess.Popen(command, shell=False, cwd=cwd, env=os.environ)
|
||||
try:
|
||||
@@ -603,7 +603,7 @@ class nougat_interface():
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
stdout, stderr = process.communicate()
|
||||
print("Process timed out!")
|
||||
logger.error("Process timed out!")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
from textwrap import indent
|
||||
from loguru import logger
|
||||
|
||||
class FileNode:
|
||||
def __init__(self, name, build_manifest=False):
|
||||
@@ -60,7 +61,7 @@ class FileNode:
|
||||
current_node.children.append(term)
|
||||
|
||||
def print_files_recursively(self, level=0, code="R0"):
|
||||
print(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
|
||||
logger.info(' '*level + self.name + ' ' + str(self.is_leaf) + ' ' + str(self.level))
|
||||
for j, child in enumerate(self.children):
|
||||
child.print_files_recursively(level=level+1, code=code+str(j))
|
||||
self.parenting_ship.extend(child.parenting_ship)
|
||||
@@ -123,4 +124,4 @@ if __name__ == "__main__":
|
||||
"用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器用于加载和分割文件中的文本的通用文件加载器",
|
||||
"包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类包含了用于构建和管理向量数据库的函数和类",
|
||||
]
|
||||
print(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))
|
||||
logger.info(build_file_tree_mermaid_diagram(file_manifest, file_comments, "项目文件树"))
|
||||
@@ -24,8 +24,8 @@ class Actor(BaseModel):
|
||||
film_names: List[str] = Field(description="list of names of films they starred in")
|
||||
"""
|
||||
|
||||
import json, re, logging
|
||||
|
||||
import json, re
|
||||
from loguru import logger as logging
|
||||
|
||||
PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
||||
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
||||
|
||||
def structure_output(txt, prompt, err_msg, run_gpt_fn, pydantic_cls):
|
||||
gpt_json_io = GptJsonIO(pydantic_cls)
|
||||
analyze_res = run_gpt_fn(
|
||||
txt,
|
||||
sys_prompt=prompt + gpt_json_io.format_instructions
|
||||
)
|
||||
try:
|
||||
friend = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
|
||||
except JsonStringError as e:
|
||||
return None, err_msg
|
||||
|
||||
err_msg = ""
|
||||
return friend, err_msg
|
||||
|
||||
|
||||
def select_tool(prompt, run_gpt_fn, pydantic_cls):
|
||||
pydantic_cls_instance, err_msg = structure_output(
|
||||
txt=prompt,
|
||||
prompt="根据提示, 分析应该调用哪个工具函数\n\n",
|
||||
err_msg=f"不能理解该联系人",
|
||||
run_gpt_fn=run_gpt_fn,
|
||||
pydantic_cls=pydantic_cls
|
||||
)
|
||||
return pydantic_cls_instance, err_msg
|
||||
@@ -1,15 +1,17 @@
|
||||
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
|
||||
from toolbox import get_conf, promote_file_to_downloadzone
|
||||
from .latex_toolbox import PRESERVE, TRANSFORM
|
||||
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
|
||||
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
|
||||
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
|
||||
from .latex_toolbox import find_title_and_abs
|
||||
from .latex_pickle_io import objdump, objload
|
||||
|
||||
import os, shutil
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
from toolbox import update_ui, update_ui_lastest_msg, get_log_folder, gen_time_str
|
||||
from toolbox import get_conf, promote_file_to_downloadzone
|
||||
from crazy_functions.latex_fns.latex_toolbox import PRESERVE, TRANSFORM
|
||||
from crazy_functions.latex_fns.latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
|
||||
from crazy_functions.latex_fns.latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
|
||||
from crazy_functions.latex_fns.latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
|
||||
from crazy_functions.latex_fns.latex_toolbox import find_title_and_abs
|
||||
from crazy_functions.latex_fns.latex_pickle_io import objdump, objload
|
||||
|
||||
|
||||
pj = os.path.join
|
||||
|
||||
@@ -323,7 +325,7 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
|
||||
buggy_lines = [int(l) for l in buggy_lines]
|
||||
buggy_lines = sorted(buggy_lines)
|
||||
buggy_line = buggy_lines[0]-1
|
||||
print("reversing tex line that has errors", buggy_line)
|
||||
logger.warning("reversing tex line that has errors", buggy_line)
|
||||
|
||||
# 重组,逆转出错的段落
|
||||
if buggy_line not in fixed_line:
|
||||
@@ -337,7 +339,7 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
|
||||
|
||||
return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
|
||||
except:
|
||||
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
||||
logger.error("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
||||
return False, -1, [-1]
|
||||
|
||||
|
||||
@@ -380,7 +382,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||
|
||||
if mode!='translate_zh':
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
|
||||
logger.info( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
|
||||
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
|
||||
|
||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||
@@ -419,7 +421,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||
shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
|
||||
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
pass
|
||||
return True # 成功啦
|
||||
else:
|
||||
@@ -465,4 +467,71 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
|
||||
promote_file_to_downloadzone(file=res, chatbot=chatbot)
|
||||
except:
|
||||
from toolbox import trimmed_format_exc
|
||||
print('writing html result failed:', trimmed_format_exc())
|
||||
logger.error('writing html result failed:', trimmed_format_exc())
|
||||
|
||||
|
||||
def upload_to_gptac_cloud_if_user_allow(chatbot, arxiv_id):
|
||||
try:
|
||||
# 如果用户允许,我们将arxiv论文PDF上传到GPTAC学术云
|
||||
from toolbox import map_file_to_sha256
|
||||
# 检查是否顺利,如果没有生成预期的文件,则跳过
|
||||
is_result_good = False
|
||||
for file_path in chatbot._cookies.get("files_to_promote", []):
|
||||
if file_path.endswith('translate_zh.pdf'):
|
||||
is_result_good = True
|
||||
if not is_result_good:
|
||||
return
|
||||
# 上传文件
|
||||
for file_path in chatbot._cookies.get("files_to_promote", []):
|
||||
align_name = None
|
||||
# normalized name
|
||||
for name in ['translate_zh.pdf', 'comparison.pdf']:
|
||||
if file_path.endswith(name): align_name = name
|
||||
# if match any align name
|
||||
if align_name:
|
||||
logger.info(f'Uploading to GPTAC cloud as the user has set `allow_cloud_io`: {file_path}')
|
||||
with open(file_path, 'rb') as f:
|
||||
import requests
|
||||
url = 'https://cloud-2.agent-matrix.com/arxiv_tf_paper_normal_upload'
|
||||
files = {'file': (align_name, f, 'application/octet-stream')}
|
||||
data = {
|
||||
'arxiv_id': arxiv_id,
|
||||
'file_hash': map_file_to_sha256(file_path),
|
||||
'language': 'zh',
|
||||
'trans_prompt': 'to_be_implemented',
|
||||
'llm_model': 'to_be_implemented',
|
||||
'llm_model_param': 'to_be_implemented',
|
||||
}
|
||||
resp = requests.post(url=url, files=files, data=data, timeout=30)
|
||||
logger.info(f'Uploading terminate ({resp.status_code})`: {file_path}')
|
||||
except:
|
||||
# 如果上传失败,不会中断程序,因为这是次要功能
|
||||
pass
|
||||
|
||||
def check_gptac_cloud(arxiv_id, chatbot):
|
||||
import requests
|
||||
success = False
|
||||
downloaded = []
|
||||
try:
|
||||
for pdf_target in ['translate_zh.pdf', 'comparison.pdf']:
|
||||
url = 'https://cloud-2.agent-matrix.com/arxiv_tf_paper_normal_exist'
|
||||
data = {
|
||||
'arxiv_id': arxiv_id,
|
||||
'name': pdf_target,
|
||||
}
|
||||
resp = requests.post(url=url, data=data)
|
||||
cache_hit_result = resp.text.strip('"')
|
||||
if cache_hit_result.startswith("http"):
|
||||
url = cache_hit_result
|
||||
logger.info(f'Downloading from GPTAC cloud: {url}')
|
||||
resp = requests.get(url=url, timeout=30)
|
||||
target = os.path.join(get_log_folder(plugin_name='gptac_cloud'), gen_time_str(), pdf_target)
|
||||
os.makedirs(os.path.dirname(target), exist_ok=True)
|
||||
with open(target, 'wb') as f:
|
||||
f.write(resp.content)
|
||||
new_path = promote_file_to_downloadzone(target, chatbot=chatbot)
|
||||
success = True
|
||||
downloaded.append(new_path)
|
||||
except:
|
||||
pass
|
||||
return success, downloaded
|
||||
|
||||
@@ -6,12 +6,16 @@ class SafeUnpickler(pickle.Unpickler):
|
||||
def get_safe_classes(self):
|
||||
from crazy_functions.latex_fns.latex_actions import LatexPaperFileGroup, LatexPaperSplit
|
||||
from crazy_functions.latex_fns.latex_toolbox import LinkedListNode
|
||||
from numpy.core.multiarray import scalar
|
||||
from numpy import dtype
|
||||
# 定义允许的安全类
|
||||
safe_classes = {
|
||||
# 在这里添加其他安全的类
|
||||
'LatexPaperFileGroup': LatexPaperFileGroup,
|
||||
'LatexPaperSplit': LatexPaperSplit,
|
||||
'LinkedListNode': LinkedListNode,
|
||||
'scalar': scalar,
|
||||
'dtype': dtype,
|
||||
}
|
||||
return safe_classes
|
||||
|
||||
@@ -22,8 +26,6 @@ class SafeUnpickler(pickle.Unpickler):
|
||||
for class_name in self.safe_classes.keys():
|
||||
if (class_name in f'{module}.{name}'):
|
||||
match_class_name = class_name
|
||||
if module == 'numpy' or module.startswith('numpy.'):
|
||||
return super().find_class(module, name)
|
||||
if match_class_name is not None:
|
||||
return self.safe_classes[match_class_name]
|
||||
# 如果尝试加载未授权的类,则抛出异常
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import os, shutil
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
||||
PRESERVE = 0
|
||||
TRANSFORM = 1
|
||||
@@ -55,7 +57,7 @@ def post_process(root):
|
||||
str_stack.append("{")
|
||||
elif c == "}":
|
||||
if len(str_stack) == 1:
|
||||
print("stack fix")
|
||||
logger.warning("fixing brace error")
|
||||
return i
|
||||
str_stack.pop(-1)
|
||||
else:
|
||||
@@ -601,7 +603,7 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
stdout, stderr = process.communicate()
|
||||
print("Process timed out!")
|
||||
logger.error("Process timed out (compile_latex_with_timeout)!")
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -642,6 +644,216 @@ def run_in_subprocess(func):
|
||||
|
||||
|
||||
def _merge_pdfs(pdf1_path, pdf2_path, output_path):
|
||||
try:
|
||||
logger.info("Merging PDFs using _merge_pdfs_ng")
|
||||
_merge_pdfs_ng(pdf1_path, pdf2_path, output_path)
|
||||
except:
|
||||
logger.info("Merging PDFs using _merge_pdfs_legacy")
|
||||
_merge_pdfs_legacy(pdf1_path, pdf2_path, output_path)
|
||||
|
||||
|
||||
def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
|
||||
from PyPDF2.generic import NameObject, TextStringObject, ArrayObject, FloatObject, NumberObject
|
||||
|
||||
Percent = 1
|
||||
# raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.')
|
||||
# Open the first PDF file
|
||||
with open(pdf1_path, "rb") as pdf1_file:
|
||||
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
|
||||
# Open the second PDF file
|
||||
with open(pdf2_path, "rb") as pdf2_file:
|
||||
pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
|
||||
# Create a new PDF file to store the merged pages
|
||||
output_writer = PyPDF2.PdfFileWriter()
|
||||
# Determine the number of pages in each PDF file
|
||||
num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
|
||||
# Merge the pages from the two PDF files
|
||||
for page_num in range(num_pages):
|
||||
# Add the page from the first PDF file
|
||||
if page_num < pdf1_reader.numPages:
|
||||
page1 = pdf1_reader.getPage(page_num)
|
||||
else:
|
||||
page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
|
||||
# Add the page from the second PDF file
|
||||
if page_num < pdf2_reader.numPages:
|
||||
page2 = pdf2_reader.getPage(page_num)
|
||||
else:
|
||||
page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
|
||||
# Create a new empty page with double width
|
||||
new_page = PyPDF2.PageObject.createBlankPage(
|
||||
width=int(
|
||||
int(page1.mediaBox.getWidth())
|
||||
+ int(page2.mediaBox.getWidth()) * Percent
|
||||
),
|
||||
height=max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight()),
|
||||
)
|
||||
new_page.mergeTranslatedPage(page1, 0, 0)
|
||||
new_page.mergeTranslatedPage(
|
||||
page2,
|
||||
int(
|
||||
int(page1.mediaBox.getWidth())
|
||||
- int(page2.mediaBox.getWidth()) * (1 - Percent)
|
||||
),
|
||||
0,
|
||||
)
|
||||
if "/Annots" in new_page:
|
||||
annotations = new_page["/Annots"]
|
||||
for i, annot in enumerate(annotations):
|
||||
annot_obj = annot.get_object()
|
||||
|
||||
# 检查注释类型是否是链接(/Link)
|
||||
if annot_obj.get("/Subtype") == "/Link":
|
||||
# 检查是否为内部链接跳转(/GoTo)或外部URI链接(/URI)
|
||||
action = annot_obj.get("/A")
|
||||
if action:
|
||||
|
||||
if "/S" in action and action["/S"] == "/GoTo":
|
||||
# 内部链接:跳转到文档中的某个页面
|
||||
dest = action.get("/D") # 目标页或目标位置
|
||||
# if dest and annot.idnum in page2_annot_id:
|
||||
# if dest in pdf2_reader.named_destinations:
|
||||
if dest and page2.annotations:
|
||||
if annot in page2.annotations:
|
||||
# 获取原始文件中跳转信息,包括跳转页面
|
||||
destination = pdf2_reader.named_destinations[
|
||||
dest
|
||||
]
|
||||
page_number = (
|
||||
pdf2_reader.get_destination_page_number(
|
||||
destination
|
||||
)
|
||||
)
|
||||
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
|
||||
# “/D”:[10,'/XYZ',100,100,0]
|
||||
if destination.dest_array[1] == "/XYZ":
|
||||
annot_obj["/A"].update(
|
||||
{
|
||||
NameObject("/D"): ArrayObject(
|
||||
[
|
||||
NumberObject(page_number),
|
||||
destination.dest_array[1],
|
||||
FloatObject(
|
||||
destination.dest_array[
|
||||
2
|
||||
]
|
||||
+ int(
|
||||
page1.mediaBox.getWidth()
|
||||
)
|
||||
),
|
||||
destination.dest_array[3],
|
||||
destination.dest_array[4],
|
||||
]
|
||||
) # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
else:
|
||||
annot_obj["/A"].update(
|
||||
{
|
||||
NameObject("/D"): ArrayObject(
|
||||
[
|
||||
NumberObject(page_number),
|
||||
destination.dest_array[1],
|
||||
]
|
||||
) # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
|
||||
rect = annot_obj.get("/Rect")
|
||||
# 更新点击坐标
|
||||
rect = ArrayObject(
|
||||
[
|
||||
FloatObject(
|
||||
rect[0]
|
||||
+ int(page1.mediaBox.getWidth())
|
||||
),
|
||||
rect[1],
|
||||
FloatObject(
|
||||
rect[2]
|
||||
+ int(page1.mediaBox.getWidth())
|
||||
),
|
||||
rect[3],
|
||||
]
|
||||
)
|
||||
annot_obj.update(
|
||||
{
|
||||
NameObject(
|
||||
"/Rect"
|
||||
): rect # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
# if dest and annot.idnum in page1_annot_id:
|
||||
# if dest in pdf1_reader.named_destinations:
|
||||
if dest and page1.annotations:
|
||||
if annot in page1.annotations:
|
||||
# 获取原始文件中跳转信息,包括跳转页面
|
||||
destination = pdf1_reader.named_destinations[
|
||||
dest
|
||||
]
|
||||
page_number = (
|
||||
pdf1_reader.get_destination_page_number(
|
||||
destination
|
||||
)
|
||||
)
|
||||
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
|
||||
# “/D”:[10,'/XYZ',100,100,0]
|
||||
if destination.dest_array[1] == "/XYZ":
|
||||
annot_obj["/A"].update(
|
||||
{
|
||||
NameObject("/D"): ArrayObject(
|
||||
[
|
||||
NumberObject(page_number),
|
||||
destination.dest_array[1],
|
||||
FloatObject(
|
||||
destination.dest_array[
|
||||
2
|
||||
]
|
||||
),
|
||||
destination.dest_array[3],
|
||||
destination.dest_array[4],
|
||||
]
|
||||
) # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
else:
|
||||
annot_obj["/A"].update(
|
||||
{
|
||||
NameObject("/D"): ArrayObject(
|
||||
[
|
||||
NumberObject(page_number),
|
||||
destination.dest_array[1],
|
||||
]
|
||||
) # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
|
||||
rect = annot_obj.get("/Rect")
|
||||
rect = ArrayObject(
|
||||
[
|
||||
FloatObject(rect[0]),
|
||||
rect[1],
|
||||
FloatObject(rect[2]),
|
||||
rect[3],
|
||||
]
|
||||
)
|
||||
annot_obj.update(
|
||||
{
|
||||
NameObject(
|
||||
"/Rect"
|
||||
): rect # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
|
||||
elif "/S" in action and action["/S"] == "/URI":
|
||||
# 外部链接:跳转到某个URI
|
||||
uri = action.get("/URI")
|
||||
output_writer.addPage(new_page)
|
||||
# Save the merged PDF file
|
||||
with open(output_path, "wb") as output_file:
|
||||
output_writer.write(output_file)
|
||||
|
||||
|
||||
def _merge_pdfs_legacy(pdf1_path, pdf2_path, output_path):
|
||||
import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
|
||||
|
||||
Percent = 0.95
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import time, logging, json, sys, struct
|
||||
import time, json, sys, struct
|
||||
import numpy as np
|
||||
from loguru import logger as logging
|
||||
from scipy.io.wavfile import WAVE_FORMAT
|
||||
|
||||
def write_numpy_to_wave(filename, rate, data, add_header=False):
|
||||
@@ -106,18 +107,14 @@ def is_speaker_speaking(vad, data, sample_rate):
|
||||
class AliyunASR():
|
||||
|
||||
def test_on_sentence_begin(self, message, *args):
|
||||
# print("test_on_sentence_begin:{}".format(message))
|
||||
pass
|
||||
|
||||
def test_on_sentence_end(self, message, *args):
|
||||
# print("test_on_sentence_end:{}".format(message))
|
||||
message = json.loads(message)
|
||||
self.parsed_sentence = message['payload']['result']
|
||||
self.event_on_entence_end.set()
|
||||
# print(self.parsed_sentence)
|
||||
|
||||
def test_on_start(self, message, *args):
|
||||
# print("test_on_start:{}".format(message))
|
||||
pass
|
||||
|
||||
def test_on_error(self, message, *args):
|
||||
@@ -129,13 +126,11 @@ class AliyunASR():
|
||||
pass
|
||||
|
||||
def test_on_result_chg(self, message, *args):
|
||||
# print("test_on_chg:{}".format(message))
|
||||
message = json.loads(message)
|
||||
self.parsed_text = message['payload']['result']
|
||||
self.event_on_result_chg.set()
|
||||
|
||||
def test_on_completed(self, message, *args):
|
||||
# print("on_completed:args=>{} message=>{}".format(args, message))
|
||||
pass
|
||||
|
||||
def audio_convertion_thread(self, uuid):
|
||||
@@ -248,14 +243,14 @@ class AliyunASR():
|
||||
|
||||
try:
|
||||
response = client.do_action_with_exception(request)
|
||||
print(response)
|
||||
logging.info(response)
|
||||
jss = json.loads(response)
|
||||
if 'Token' in jss and 'Id' in jss['Token']:
|
||||
token = jss['Token']['Id']
|
||||
expireTime = jss['Token']['ExpireTime']
|
||||
print("token = " + token)
|
||||
print("expireTime = " + str(expireTime))
|
||||
logging.info("token = " + token)
|
||||
logging.info("expireTime = " + str(expireTime))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logging.error(e)
|
||||
|
||||
return token
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
|
||||
from loguru import logger
|
||||
|
||||
def force_breakdown(txt, limit, get_token_fn):
|
||||
""" 当无法用标点、空行分割时,我们用最暴力的方法切割
|
||||
@@ -76,7 +77,7 @@ def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=F
|
||||
remain_txt_to_cut = post
|
||||
remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
|
||||
process = fin_len/total_len
|
||||
print(f'正在文本切分 {int(process*100)}%')
|
||||
logger.info(f'正在文本切分 {int(process*100)}%')
|
||||
if len(remain_txt_to_cut.strip()) == 0:
|
||||
break
|
||||
return res
|
||||
@@ -119,7 +120,7 @@ if __name__ == '__main__':
|
||||
for i in range(5):
|
||||
file_content += file_content
|
||||
|
||||
print(len(file_content))
|
||||
logger.info(len(file_content))
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
||||
res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from crazy_functions.crazy_utils import read_and_clean_pdf_text
|
||||
from shared_utils.colorful import *
|
||||
from loguru import logger
|
||||
import os
|
||||
|
||||
def 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
@@ -93,7 +94,7 @@ def 解析PDF_简单拆解(file_manifest, project_folder, llm_kwargs, plugin_kwa
|
||||
generated_html_files.append(ch.save_file(create_report_file_name))
|
||||
except:
|
||||
from toolbox import trimmed_format_exc
|
||||
print('writing html result failed:', trimmed_format_exc())
|
||||
logger.error('writing html result failed:', trimmed_format_exc())
|
||||
|
||||
# 准备文件的下载
|
||||
for pdf_path in generated_conclusion_files:
|
||||
|
||||
@@ -4,7 +4,9 @@ from toolbox import promote_file_to_downloadzone, extract_archive
|
||||
from toolbox import generate_file_link, zip_folder
|
||||
from crazy_functions.crazy_utils import get_files_from_everything
|
||||
from shared_utils.colorful import *
|
||||
from loguru import logger
|
||||
import os
|
||||
import time
|
||||
|
||||
def refresh_key(doc2x_api_key):
|
||||
import requests, json
|
||||
@@ -22,105 +24,140 @@ def refresh_key(doc2x_api_key):
|
||||
raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
|
||||
return doc2x_api_key
|
||||
|
||||
|
||||
|
||||
def 解析PDF_DOC2X_转Latex(pdf_file_path):
|
||||
zip_file_path, unzipped_folder = 解析PDF_DOC2X(pdf_file_path, format='tex')
|
||||
return unzipped_folder
|
||||
|
||||
|
||||
def 解析PDF_DOC2X(pdf_file_path, format='tex'):
|
||||
"""
|
||||
format: 'tex', 'md', 'docx'
|
||||
"""
|
||||
import requests, json, os
|
||||
DOC2X_API_KEY = get_conf('DOC2X_API_KEY')
|
||||
latex_dir = get_log_folder(plugin_name="pdf_ocr_latex")
|
||||
markdown_dir = get_log_folder(plugin_name="pdf_ocr")
|
||||
doc2x_api_key = DOC2X_API_KEY
|
||||
if doc2x_api_key.startswith('sk-'):
|
||||
url = "https://api.doc2x.noedgeai.com/api/v1/pdf"
|
||||
else:
|
||||
doc2x_api_key = refresh_key(doc2x_api_key)
|
||||
url = "https://api.doc2x.noedgeai.com/api/platform/pdf"
|
||||
|
||||
|
||||
# < ------ 第1步:上传 ------ >
|
||||
logger.info("Doc2x 第1步:上传")
|
||||
with open(pdf_file_path, 'rb') as file:
|
||||
res = requests.post(
|
||||
"https://v2.doc2x.noedgeai.com/api/v2/parse/pdf",
|
||||
headers={"Authorization": "Bearer " + doc2x_api_key},
|
||||
data=file
|
||||
)
|
||||
# res_json = []
|
||||
if res.status_code == 200:
|
||||
res_json = res.json()
|
||||
else:
|
||||
raise RuntimeError(f"Doc2x return an error: {res.json()}")
|
||||
uuid = res_json['data']['uid']
|
||||
|
||||
# < ------ 第2步:轮询等待 ------ >
|
||||
logger.info("Doc2x 第2步:轮询等待")
|
||||
params = {'uid': uuid}
|
||||
while True:
|
||||
res = requests.get(
|
||||
'https://v2.doc2x.noedgeai.com/api/v2/parse/status',
|
||||
headers={"Authorization": "Bearer " + doc2x_api_key},
|
||||
params=params
|
||||
)
|
||||
res_json = res.json()
|
||||
if res_json['data']['status'] == "success":
|
||||
break
|
||||
elif res_json['data']['status'] == "processing":
|
||||
time.sleep(3)
|
||||
logger.info(f"Doc2x is processing at {res_json['data']['progress']}%")
|
||||
elif res_json['data']['status'] == "failed":
|
||||
raise RuntimeError(f"Doc2x return an error: {res_json}")
|
||||
|
||||
|
||||
# < ------ 第3步:提交转化 ------ >
|
||||
logger.info("Doc2x 第3步:提交转化")
|
||||
data = {
|
||||
"uid": uuid,
|
||||
"to": format,
|
||||
"formula_mode": "dollar",
|
||||
"filename": "output"
|
||||
}
|
||||
res = requests.post(
|
||||
url,
|
||||
files={"file": open(pdf_file_path, "rb")},
|
||||
data={"ocr": "1"},
|
||||
headers={"Authorization": "Bearer " + doc2x_api_key}
|
||||
'https://v2.doc2x.noedgeai.com/api/v2/convert/parse',
|
||||
headers={"Authorization": "Bearer " + doc2x_api_key},
|
||||
json=data
|
||||
)
|
||||
res_json = []
|
||||
if res.status_code == 200:
|
||||
decoded = res.content.decode("utf-8")
|
||||
for z_decoded in decoded.split('\n'):
|
||||
if len(z_decoded) == 0: continue
|
||||
assert z_decoded.startswith("data: ")
|
||||
z_decoded = z_decoded[len("data: "):]
|
||||
decoded_json = json.loads(z_decoded)
|
||||
res_json.append(decoded_json)
|
||||
res_json = res.json()
|
||||
else:
|
||||
raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
|
||||
raise RuntimeError(f"Doc2x return an error: {res.json()}")
|
||||
|
||||
uuid = res_json[0]['uuid']
|
||||
to = "latex" # latex, md, docx
|
||||
url = "https://api.doc2x.noedgeai.com/api/export"+"?request_id="+uuid+"&to="+to
|
||||
|
||||
res = requests.get(url, headers={"Authorization": "Bearer " + doc2x_api_key})
|
||||
latex_zip_path = os.path.join(latex_dir, gen_time_str() + '.zip')
|
||||
latex_unzip_path = os.path.join(latex_dir, gen_time_str())
|
||||
if res.status_code == 200:
|
||||
with open(latex_zip_path, "wb") as f: f.write(res.content)
|
||||
else:
|
||||
raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
|
||||
# < ------ 第4步:等待结果 ------ >
|
||||
logger.info("Doc2x 第4步:等待结果")
|
||||
params = {'uid': uuid}
|
||||
while True:
|
||||
res = requests.get(
|
||||
'https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result',
|
||||
headers={"Authorization": "Bearer " + doc2x_api_key},
|
||||
params=params
|
||||
)
|
||||
res_json = res.json()
|
||||
if res_json['data']['status'] == "success":
|
||||
break
|
||||
elif res_json['data']['status'] == "processing":
|
||||
time.sleep(3)
|
||||
logger.info(f"Doc2x still processing")
|
||||
elif res_json['data']['status'] == "failed":
|
||||
raise RuntimeError(f"Doc2x return an error: {res_json}")
|
||||
|
||||
|
||||
# < ------ 第5步:最后的处理 ------ >
|
||||
logger.info("Doc2x 第5步:最后的处理")
|
||||
|
||||
if format=='tex':
|
||||
target_path = latex_dir
|
||||
if format=='md':
|
||||
target_path = markdown_dir
|
||||
os.makedirs(target_path, exist_ok=True)
|
||||
|
||||
max_attempt = 3
|
||||
# < ------ 下载 ------ >
|
||||
for attempt in range(max_attempt):
|
||||
try:
|
||||
result_url = res_json['data']['url']
|
||||
res = requests.get(result_url)
|
||||
zip_path = os.path.join(target_path, gen_time_str() + '.zip')
|
||||
unzip_path = os.path.join(target_path, gen_time_str())
|
||||
if res.status_code == 200:
|
||||
with open(zip_path, "wb") as f: f.write(res.content)
|
||||
else:
|
||||
raise RuntimeError(f"Doc2x return an error: {res.json()}")
|
||||
except Exception as e:
|
||||
if attempt < max_attempt - 1:
|
||||
logger.error(f"Failed to download latex file, retrying... {e}")
|
||||
time.sleep(3)
|
||||
continue
|
||||
else:
|
||||
raise e
|
||||
|
||||
# < ------ 解压 ------ >
|
||||
import zipfile
|
||||
with zipfile.ZipFile(latex_zip_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(latex_unzip_path)
|
||||
|
||||
|
||||
return latex_unzip_path
|
||||
|
||||
|
||||
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(unzip_path)
|
||||
return zip_path, unzip_path
|
||||
|
||||
|
||||
def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request):
|
||||
|
||||
|
||||
def pdf2markdown(filepath):
|
||||
import requests, json, os
|
||||
markdown_dir = get_log_folder(plugin_name="pdf_ocr")
|
||||
doc2x_api_key = DOC2X_API_KEY
|
||||
if doc2x_api_key.startswith('sk-'):
|
||||
url = "https://api.doc2x.noedgeai.com/api/v1/pdf"
|
||||
else:
|
||||
doc2x_api_key = refresh_key(doc2x_api_key)
|
||||
url = "https://api.doc2x.noedgeai.com/api/platform/pdf"
|
||||
|
||||
chatbot.append((None, "加载PDF文件,发送至DOC2X解析..."))
|
||||
chatbot.append((None, f"Doc2x 解析中"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
res = requests.post(
|
||||
url,
|
||||
files={"file": open(filepath, "rb")},
|
||||
data={"ocr": "1"},
|
||||
headers={"Authorization": "Bearer " + doc2x_api_key}
|
||||
)
|
||||
res_json = []
|
||||
if res.status_code == 200:
|
||||
decoded = res.content.decode("utf-8")
|
||||
for z_decoded in decoded.split('\n'):
|
||||
if len(z_decoded) == 0: continue
|
||||
assert z_decoded.startswith("data: ")
|
||||
z_decoded = z_decoded[len("data: "):]
|
||||
decoded_json = json.loads(z_decoded)
|
||||
res_json.append(decoded_json)
|
||||
if 'limit exceeded' in decoded_json.get('status', ''):
|
||||
raise RuntimeError("Doc2x API 页数受限,请联系 Doc2x 方面,并更换新的 API 秘钥。")
|
||||
else:
|
||||
raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
|
||||
uuid = res_json[0]['uuid']
|
||||
to = "md" # latex, md, docx
|
||||
url = "https://api.doc2x.noedgeai.com/api/export"+"?request_id="+uuid+"&to="+to
|
||||
md_zip_path, unzipped_folder = 解析PDF_DOC2X(filepath, format='md')
|
||||
|
||||
chatbot.append((None, f"读取解析: {url} ..."))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
res = requests.get(url, headers={"Authorization": "Bearer " + doc2x_api_key})
|
||||
md_zip_path = os.path.join(markdown_dir, gen_time_str() + '.zip')
|
||||
if res.status_code == 200:
|
||||
with open(md_zip_path, "wb") as f: f.write(res.content)
|
||||
else:
|
||||
raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
|
||||
promote_file_to_downloadzone(md_zip_path, chatbot=chatbot)
|
||||
chatbot.append((None, f"完成解析 {md_zip_path} ..."))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import llama_index
|
||||
import os
|
||||
import atexit
|
||||
from loguru import logger
|
||||
from typing import List
|
||||
from llama_index.core import Document
|
||||
from llama_index.core.schema import TextNode
|
||||
@@ -41,14 +42,14 @@ class SaveLoad():
|
||||
return True
|
||||
|
||||
def save_to_checkpoint(self, checkpoint_dir=None):
|
||||
print(f'saving vector store to: {checkpoint_dir}')
|
||||
logger.info(f'saving vector store to: {checkpoint_dir}')
|
||||
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
|
||||
self.vs_index.storage_context.persist(persist_dir=checkpoint_dir)
|
||||
|
||||
def load_from_checkpoint(self, checkpoint_dir=None):
|
||||
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
|
||||
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
|
||||
print('loading checkpoint from disk')
|
||||
logger.info('loading checkpoint from disk')
|
||||
from llama_index.core import StorageContext, load_index_from_storage
|
||||
storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir)
|
||||
self.vs_index = load_index_from_storage(storage_context, embed_model=self.embed_model)
|
||||
@@ -85,9 +86,9 @@ class LlamaIndexRagWorker(SaveLoad):
|
||||
self.vs_index.storage_context.index_store.to_dict()
|
||||
docstore = self.vs_index.storage_context.docstore.docs
|
||||
vector_store_preview = "\n".join([ f"{_id} | {tn.text}" for _id, tn in docstore.items() ])
|
||||
print('\n++ --------inspect_vector_store begin--------')
|
||||
print(vector_store_preview)
|
||||
print('oo --------inspect_vector_store end--------')
|
||||
logger.info('\n++ --------inspect_vector_store begin--------')
|
||||
logger.info(vector_store_preview)
|
||||
logger.info('oo --------inspect_vector_store end--------')
|
||||
return vector_store_preview
|
||||
|
||||
def add_documents_to_vector_store(self, document_list):
|
||||
@@ -125,5 +126,5 @@ class LlamaIndexRagWorker(SaveLoad):
|
||||
|
||||
def generate_node_array_preview(self, nodes):
|
||||
buf = "\n".join(([f"(No.{i+1} | score {n.score:.3f}): {n.text}" for i, n in enumerate(nodes)]))
|
||||
if self.debug_mode: print(buf)
|
||||
if self.debug_mode: logger.info(buf)
|
||||
return buf
|
||||
|
||||
@@ -2,6 +2,7 @@ import llama_index
|
||||
import os
|
||||
import atexit
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
from llama_index.core import Document
|
||||
from llama_index.core.schema import TextNode
|
||||
from request_llms.embed_models.openai_embed import OpenAiEmbeddingModel
|
||||
@@ -44,14 +45,14 @@ class MilvusSaveLoad():
|
||||
return True
|
||||
|
||||
def save_to_checkpoint(self, checkpoint_dir=None):
|
||||
print(f'saving vector store to: {checkpoint_dir}')
|
||||
logger.info(f'saving vector store to: {checkpoint_dir}')
|
||||
# if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
|
||||
# self.vs_index.storage_context.persist(persist_dir=checkpoint_dir)
|
||||
|
||||
def load_from_checkpoint(self, checkpoint_dir=None):
|
||||
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
|
||||
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
|
||||
print('loading checkpoint from disk')
|
||||
logger.info('loading checkpoint from disk')
|
||||
from llama_index.core import StorageContext, load_index_from_storage
|
||||
storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir)
|
||||
try:
|
||||
@@ -101,7 +102,7 @@ class MilvusRagWorker(MilvusSaveLoad, LlamaIndexRagWorker):
|
||||
vector_store_preview = "\n".join(
|
||||
[f"{node.id_} | {node.text}" for node in dummy_retrieve_res]
|
||||
)
|
||||
print('\n++ --------inspect_vector_store begin--------')
|
||||
print(vector_store_preview)
|
||||
print('oo --------inspect_vector_store end--------')
|
||||
logger.info('\n++ --------inspect_vector_store begin--------')
|
||||
logger.info(vector_store_preview)
|
||||
logger.info('oo --------inspect_vector_store end--------')
|
||||
return vector_store_preview
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
# From project chatglm-langchain
|
||||
|
||||
import threading
|
||||
from toolbox import Singleton
|
||||
import os
|
||||
import shutil
|
||||
import os
|
||||
import uuid
|
||||
import tqdm
|
||||
import shutil
|
||||
import threading
|
||||
import numpy as np
|
||||
from toolbox import Singleton
|
||||
from loguru import logger
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.docstore.document import Document
|
||||
from typing import List, Tuple
|
||||
import numpy as np
|
||||
from crazy_functions.vector_fns.general_file_loader import load_file
|
||||
|
||||
embedding_model_dict = {
|
||||
@@ -150,17 +151,17 @@ class LocalDocQA:
|
||||
failed_files = []
|
||||
if isinstance(filepath, str):
|
||||
if not os.path.exists(filepath):
|
||||
print("路径不存在")
|
||||
logger.error("路径不存在")
|
||||
return None
|
||||
elif os.path.isfile(filepath):
|
||||
file = os.path.split(filepath)[-1]
|
||||
try:
|
||||
docs = load_file(filepath, SENTENCE_SIZE)
|
||||
print(f"{file} 已成功加载")
|
||||
logger.info(f"{file} 已成功加载")
|
||||
loaded_files.append(filepath)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print(f"{file} 未能成功加载")
|
||||
logger.error(e)
|
||||
logger.error(f"{file} 未能成功加载")
|
||||
return None
|
||||
elif os.path.isdir(filepath):
|
||||
docs = []
|
||||
@@ -170,23 +171,23 @@ class LocalDocQA:
|
||||
docs += load_file(fullfilepath, SENTENCE_SIZE)
|
||||
loaded_files.append(fullfilepath)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error(e)
|
||||
failed_files.append(file)
|
||||
|
||||
if len(failed_files) > 0:
|
||||
print("以下文件未能成功加载:")
|
||||
logger.error("以下文件未能成功加载:")
|
||||
for file in failed_files:
|
||||
print(f"{file}\n")
|
||||
logger.error(f"{file}\n")
|
||||
|
||||
else:
|
||||
docs = []
|
||||
for file in filepath:
|
||||
docs += load_file(file, SENTENCE_SIZE)
|
||||
print(f"{file} 已成功加载")
|
||||
logger.info(f"{file} 已成功加载")
|
||||
loaded_files.append(file)
|
||||
|
||||
if len(docs) > 0:
|
||||
print("文件加载完毕,正在生成向量库")
|
||||
logger.info("文件加载完毕,正在生成向量库")
|
||||
if vs_path and os.path.isdir(vs_path):
|
||||
try:
|
||||
self.vector_store = FAISS.load_local(vs_path, text2vec)
|
||||
@@ -233,7 +234,7 @@ class LocalDocQA:
|
||||
prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)])
|
||||
prompt += "\n\n---\n\n"
|
||||
prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
||||
# print(prompt)
|
||||
# logger.info(prompt)
|
||||
response = {"query": query, "source_documents": related_docs_with_score}
|
||||
return response, prompt
|
||||
|
||||
@@ -262,7 +263,7 @@ def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_co
|
||||
else:
|
||||
pass
|
||||
# file_status = "文件未成功加载,请重新上传文件"
|
||||
# print(file_status)
|
||||
# logger.info(file_status)
|
||||
return local_doc_qa, vs_path
|
||||
|
||||
@Singleton
|
||||
@@ -278,7 +279,7 @@ class knowledge_archive_interface():
|
||||
if self.text2vec_large_chinese is None:
|
||||
# < -------------------预热文本向量化模组--------------- >
|
||||
from toolbox import ProxyNetworkActivate
|
||||
print('Checking Text2vec ...')
|
||||
logger.info('Checking Text2vec ...')
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
|
||||
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
import re, requests, unicodedata, os
|
||||
from toolbox import update_ui, get_log_folder
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
from toolbox import CatchException, report_exception, get_conf
|
||||
import re, requests, unicodedata, os
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from loguru import logger
|
||||
|
||||
def download_arxiv_(url_pdf):
|
||||
if 'arxiv.org' not in url_pdf:
|
||||
if ('.' in url_pdf) and ('/' not in url_pdf):
|
||||
new_url = 'https://arxiv.org/abs/'+url_pdf
|
||||
print('下载编号:', url_pdf, '自动定位:', new_url)
|
||||
logger.info('下载编号:', url_pdf, '自动定位:', new_url)
|
||||
# download_arxiv_(new_url)
|
||||
return download_arxiv_(new_url)
|
||||
else:
|
||||
print('不能识别的URL!')
|
||||
logger.info('不能识别的URL!')
|
||||
return None
|
||||
if 'abs' in url_pdf:
|
||||
url_pdf = url_pdf.replace('abs', 'pdf')
|
||||
@@ -42,15 +44,12 @@ def download_arxiv_(url_pdf):
|
||||
requests_pdf_url = url_pdf
|
||||
file_path = download_dir+title_str
|
||||
|
||||
print('下载中')
|
||||
logger.info('下载中')
|
||||
proxies = get_conf('proxies')
|
||||
r = requests.get(requests_pdf_url, proxies=proxies)
|
||||
with open(file_path, 'wb+') as f:
|
||||
f.write(r.content)
|
||||
print('下载完成')
|
||||
|
||||
# print('输出下载命令:','aria2c -o \"%s\" %s'%(title_str,url_pdf))
|
||||
# subprocess.call('aria2c --all-proxy=\"172.18.116.150:11084\" -o \"%s\" %s'%(download_dir+title_str,url_pdf), shell=True)
|
||||
logger.info('下载完成')
|
||||
|
||||
x = "%s %s %s.bib" % (paper_id, other_info['year'], other_info['authors'])
|
||||
x = x.replace('?', '?')\
|
||||
@@ -63,19 +62,9 @@ def download_arxiv_(url_pdf):
|
||||
|
||||
|
||||
def get_name(_url_):
|
||||
import os
|
||||
from bs4 import BeautifulSoup
|
||||
print('正在获取文献名!')
|
||||
print(_url_)
|
||||
|
||||
# arxiv_recall = {}
|
||||
# if os.path.exists('./arxiv_recall.pkl'):
|
||||
# with open('./arxiv_recall.pkl', 'rb') as f:
|
||||
# arxiv_recall = pickle.load(f)
|
||||
|
||||
# if _url_ in arxiv_recall:
|
||||
# print('在缓存中')
|
||||
# return arxiv_recall[_url_]
|
||||
logger.info('正在获取文献名!')
|
||||
logger.info(_url_)
|
||||
|
||||
proxies = get_conf('proxies')
|
||||
res = requests.get(_url_, proxies=proxies)
|
||||
@@ -92,7 +81,7 @@ def get_name(_url_):
|
||||
other_details['abstract'] = abstract
|
||||
except:
|
||||
other_details['year'] = ''
|
||||
print('年份获取失败')
|
||||
logger.info('年份获取失败')
|
||||
|
||||
# get author
|
||||
try:
|
||||
@@ -101,7 +90,7 @@ def get_name(_url_):
|
||||
other_details['authors'] = authors
|
||||
except:
|
||||
other_details['authors'] = ''
|
||||
print('authors获取失败')
|
||||
logger.info('authors获取失败')
|
||||
|
||||
# get comment
|
||||
try:
|
||||
@@ -116,11 +105,11 @@ def get_name(_url_):
|
||||
other_details['comment'] = ''
|
||||
except:
|
||||
other_details['comment'] = ''
|
||||
print('年份获取失败')
|
||||
logger.info('年份获取失败')
|
||||
|
||||
title_str = BeautifulSoup(
|
||||
res.text, 'html.parser').find('title').contents[0]
|
||||
print('获取成功:', title_str)
|
||||
logger.info('获取成功:', title_str)
|
||||
# arxiv_recall[_url_] = (title_str+'.pdf', other_details)
|
||||
# with open('./arxiv_recall.pkl', 'wb') as f:
|
||||
# pickle.dump(arxiv_recall, f)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from toolbox import CatchException, update_ui
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
@CatchException
|
||||
def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
|
||||
@@ -16,8 +16,8 @@ Testing:
|
||||
|
||||
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
|
||||
from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
|
||||
from .crazy_utils import input_clipping, try_install_deps
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
|
||||
from crazy_functions.crazy_utils import input_clipping, try_install_deps
|
||||
from crazy_functions.gen_fns.gen_fns_shared import is_function_successfully_generated
|
||||
from crazy_functions.gen_fns.gen_fns_shared import get_class_name
|
||||
from crazy_functions.gen_fns.gen_fns_shared import subprocess_worker
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from toolbox import CatchException, update_ui, gen_time_str
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from .crazy_utils import input_clipping
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import input_clipping
|
||||
import copy, json
|
||||
|
||||
@CatchException
|
||||
|
||||
@@ -6,13 +6,14 @@
|
||||
"""
|
||||
|
||||
|
||||
import time
|
||||
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
|
||||
from toolbox import get_conf, select_api_key, update_ui_lastest_msg, Singleton
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_plugin_arg
|
||||
from crazy_functions.crazy_utils import input_clipping, try_install_deps
|
||||
from crazy_functions.agent_fns.persistent import GradioMultiuserManagerForPersistentClasses
|
||||
from crazy_functions.agent_fns.auto_agent import AutoGenMath
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
def remove_model_prefix(llm):
|
||||
if llm.startswith('api2d-'): llm = llm.replace('api2d-', '')
|
||||
@@ -80,12 +81,12 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
|
||||
persistent_key = f"{user_uuid}->多智能体终端"
|
||||
if persistent_class_multi_user_manager.already_alive(persistent_key):
|
||||
# 当已经存在一个正在运行的多智能体终端时,直接将用户输入传递给它,而不是再次启动一个新的多智能体终端
|
||||
print('[debug] feed new user input')
|
||||
logger.info('[debug] feed new user input')
|
||||
executor = persistent_class_multi_user_manager.get(persistent_key)
|
||||
exit_reason = yield from executor.main_process_ui_control(txt, create_or_resume="resume")
|
||||
else:
|
||||
# 运行多智能体终端 (首次)
|
||||
print('[debug] create new executor instance')
|
||||
logger.info('[debug] create new executor instance')
|
||||
history = []
|
||||
chatbot.append(["正在启动: 多智能体终端", "插件动态生成, 执行开始, 作者 Microsoft & Binary-Husky."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_exception
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
fast_debug = False
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from toolbox import CatchException, report_exception, select_api_key, update_ui, get_conf
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone, get_log_folder
|
||||
|
||||
def split_audio_file(filename, split_duration=1000):
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
from loguru import logger
|
||||
|
||||
from toolbox import update_ui, promote_file_to_downloadzone, gen_time_str
|
||||
from toolbox import CatchException, report_exception
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from .crazy_utils import read_and_clean_pdf_text
|
||||
from .crazy_utils import input_clipping
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import read_and_clean_pdf_text
|
||||
from crazy_functions.crazy_utils import input_clipping
|
||||
|
||||
|
||||
|
||||
def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
file_write_buffer = []
|
||||
for file_name in file_manifest:
|
||||
print('begin analysis on:', file_name)
|
||||
logger.info('begin analysis on:', file_name)
|
||||
############################## <第 0 步,切割PDF> ##################################
|
||||
# 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割)
|
||||
# 的长度必须小于 2500 个 Token
|
||||
@@ -38,7 +40,7 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
||||
last_iteration_result = paper_meta # 初始值是摘要
|
||||
MAX_WORD_TOTAL = 4096 * 0.7
|
||||
n_fragment = len(paper_fragments)
|
||||
if n_fragment >= 20: print('文章极长,不能达到预期效果')
|
||||
if n_fragment >= 20: logger.warning('文章极长,不能达到预期效果')
|
||||
for i in range(n_fragment):
|
||||
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
|
||||
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} Chinese characters: {paper_fragments[i]}"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from loguru import logger
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_exception
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
|
||||
fast_debug = False
|
||||
@@ -57,7 +58,6 @@ def readPdf(pdfPath):
|
||||
layout = device.get_result()
|
||||
for obj in layout._objs:
|
||||
if isinstance(obj, pdfminer.layout.LTTextBoxHorizontal):
|
||||
# print(obj.get_text())
|
||||
outTextList.append(obj.get_text())
|
||||
|
||||
return outTextList
|
||||
@@ -66,7 +66,7 @@ def readPdf(pdfPath):
|
||||
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import time, glob, os
|
||||
from bs4 import BeautifulSoup
|
||||
print('begin analysis on:', file_manifest)
|
||||
logger.info('begin analysis on:', file_manifest)
|
||||
for index, fp in enumerate(file_manifest):
|
||||
if ".tex" in fp:
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from toolbox import CatchException, report_exception, get_log_folder, gen_time_str
|
||||
from toolbox import update_ui, promote_file_to_downloadzone, update_ui_lastest_msg, disable_auto_promotion
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from .crazy_utils import read_and_clean_pdf_text
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from crazy_functions.crazy_utils import read_and_clean_pdf_text
|
||||
from .pdf_fns.parse_pdf import parse_pdf, get_avail_grobid_url, translate_pdf
|
||||
from shared_utils.colorful import *
|
||||
import copy
|
||||
@@ -60,7 +60,7 @@ def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
# 清空历史,以免输入溢出
|
||||
history = []
|
||||
|
||||
from .crazy_utils import get_files_from_everything
|
||||
from crazy_functions.crazy_utils import get_files_from_everything
|
||||
success, file_manifest, project_folder = get_files_from_everything(txt, type='.pdf')
|
||||
if len(file_manifest) > 0:
|
||||
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
from loguru import logger
|
||||
from toolbox import CatchException, update_ui, gen_time_str, promote_file_to_downloadzone
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import input_clipping
|
||||
@@ -34,10 +35,10 @@ def eval_manim(code):
|
||||
return f'gpt_log/{time_str}.mp4'
|
||||
except subprocess.CalledProcessError as e:
|
||||
output = e.output.decode()
|
||||
print(f"Command returned non-zero exit status {e.returncode}: {output}.")
|
||||
logger.error(f"Command returned non-zero exit status {e.returncode}: {output}.")
|
||||
return f"Evaluating python script failed: {e.output}."
|
||||
except:
|
||||
print('generating mp4 failed')
|
||||
logger.error('generating mp4 failed')
|
||||
return "Generating mp4 failed."
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
from loguru import logger
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_exception
|
||||
from .crazy_utils import read_and_clean_pdf_text
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
fast_debug = False
|
||||
from crazy_functions.crazy_utils import read_and_clean_pdf_text
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
|
||||
def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import tiktoken
|
||||
print('begin analysis on:', file_name)
|
||||
logger.info('begin analysis on:', file_name)
|
||||
|
||||
############################## <第 0 步,切割PDF> ##################################
|
||||
# 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割)
|
||||
@@ -36,7 +35,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
||||
last_iteration_result = paper_meta # 初始值是摘要
|
||||
MAX_WORD_TOTAL = 4096
|
||||
n_fragment = len(paper_fragments)
|
||||
if n_fragment >= 20: print('文章极长,不能达到预期效果')
|
||||
if n_fragment >= 20: logger.warning('文章极长,不能达到预期效果')
|
||||
for i in range(n_fragment):
|
||||
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
|
||||
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}"
|
||||
@@ -57,7 +56,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
||||
chatbot.append([i_say_show_user, gpt_say])
|
||||
|
||||
############################## <第 4 步,设置一个token上限,防止回答时Token溢出> ##################################
|
||||
from .crazy_utils import input_clipping
|
||||
from crazy_functions.crazy_utils import input_clipping
|
||||
_, final_results = input_clipping("", final_results, max_token_limit=3200)
|
||||
yield from update_ui(chatbot=chatbot, history=final_results) # 注意这里的历史记录被替代了
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
from loguru import logger
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_exception
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
fast_debug = False
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import time, os
|
||||
print('begin analysis on:', file_manifest)
|
||||
logger.info('begin analysis on:', file_manifest)
|
||||
for index, fp in enumerate(file_manifest):
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
file_content = f.read()
|
||||
@@ -16,22 +16,20 @@ def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
if not fast_debug:
|
||||
msg = '正常'
|
||||
# ** gpt request **
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
|
||||
msg = '正常'
|
||||
# ** gpt request **
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
|
||||
|
||||
chatbot[-1] = (i_say_show_user, gpt_say)
|
||||
history.append(i_say_show_user); history.append(gpt_say)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
||||
if not fast_debug: time.sleep(2)
|
||||
|
||||
if not fast_debug:
|
||||
res = write_history_to_file(history)
|
||||
promote_file_to_downloadzone(res, chatbot=chatbot)
|
||||
chatbot.append(("完成了吗?", res))
|
||||
chatbot[-1] = (i_say_show_user, gpt_say)
|
||||
history.append(i_say_show_user); history.append(gpt_say)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
||||
time.sleep(2)
|
||||
|
||||
res = write_history_to_file(history)
|
||||
promote_file_to_downloadzone(res, chatbot=chatbot)
|
||||
chatbot.append(("完成了吗?", res))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from toolbox import CatchException, update_ui, report_exception
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.plugin_template.plugin_class_template import (
|
||||
GptAcademicPluginTemplate,
|
||||
)
|
||||
@@ -201,8 +201,7 @@ def 解析历史输入(history, llm_kwargs, file_manifest, chatbot, plugin_kwarg
|
||||
MAX_WORD_TOTAL = 4096
|
||||
n_txt = len(txt)
|
||||
last_iteration_result = "从以下文本中提取摘要。"
|
||||
if n_txt >= 20:
|
||||
print("文章极长,不能达到预期效果")
|
||||
|
||||
for i in range(n_txt):
|
||||
NUM_OF_WORD = MAX_WORD_TOTAL // n_txt
|
||||
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words in Chinese: {txt[i]}"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg, get_log_folder, get_user
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
|
||||
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
|
||||
from loguru import logger
|
||||
install_msg ="""
|
||||
|
||||
1. python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
|
||||
@@ -40,7 +40,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
except Exception as e:
|
||||
chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
# from .crazy_utils import try_install_deps
|
||||
# from crazy_functions.crazy_utils import try_install_deps
|
||||
# try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
|
||||
# yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
|
||||
return
|
||||
@@ -60,7 +60,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
# < -------------------预热文本向量化模组--------------- >
|
||||
chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
print('Checking Text2vec ...')
|
||||
logger.info('Checking Text2vec ...')
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
|
||||
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
||||
@@ -68,7 +68,7 @@ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
# < -------------------构建知识库--------------- >
|
||||
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
print('Establishing knowledge archive ...')
|
||||
logger.info('Establishing knowledge archive ...')
|
||||
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
|
||||
kai = knowledge_archive_interface()
|
||||
vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store')
|
||||
@@ -93,7 +93,7 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
except Exception as e:
|
||||
chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
# from .crazy_utils import try_install_deps
|
||||
# from crazy_functions.crazy_utils import try_install_deps
|
||||
# try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
|
||||
# yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
|
||||
return
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from toolbox import CatchException, update_ui
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from request_llms.bridge_all import model_info
|
||||
@@ -23,8 +23,8 @@ def google(query, proxies):
|
||||
item = {'title': title, 'link': link}
|
||||
results.append(item)
|
||||
|
||||
for r in results:
|
||||
print(r['link'])
|
||||
# for r in results:
|
||||
# print(r['link'])
|
||||
return results
|
||||
|
||||
def scrape_text(url, proxies) -> str:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from toolbox import CatchException, update_ui
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from request_llms.bridge_all import model_info
|
||||
@@ -22,8 +22,8 @@ def bing_search(query, proxies=None):
|
||||
item = {'title': title, 'link': link}
|
||||
results.append(item)
|
||||
|
||||
for r in results:
|
||||
print(r['link'])
|
||||
# for r in results:
|
||||
# print(r['link'])
|
||||
return results
|
||||
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ def parseNotebook(filename, enable_markdown=1):
|
||||
|
||||
|
||||
def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
enable_markdown = plugin_kwargs.get("advanced_arg", "1")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from toolbox import CatchException, update_ui, get_conf
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
import datetime
|
||||
@CatchException
|
||||
def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, get_conf, markdown_convertion
|
||||
from request_llms.bridge_all import predict_no_ui_long_connection
|
||||
from crazy_functions.crazy_utils import input_clipping
|
||||
from crazy_functions.agent_fns.watchdog import WatchDog
|
||||
from request_llms.bridge_all import predict_no_ui_long_connection
|
||||
from crazy_functions.live_audio.aliyunASR import AliyunASR
|
||||
from loguru import logger
|
||||
|
||||
import threading, time
|
||||
import numpy as np
|
||||
from .live_audio.aliyunASR import AliyunASR
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -42,9 +44,9 @@ class AsyncGptTask():
|
||||
gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt,
|
||||
observe_window=observe_window[index], console_slience=True)
|
||||
except ConnectionAbortedError as token_exceed_err:
|
||||
print('至少一个线程任务Token溢出而失败', e)
|
||||
logger.error('至少一个线程任务Token溢出而失败', e)
|
||||
except Exception as e:
|
||||
print('至少一个线程任务意外失败', e)
|
||||
logger.error('至少一个线程任务意外失败', e)
|
||||
|
||||
def add_async_gpt_task(self, i_say, chatbot_index, llm_kwargs, history, system_prompt):
|
||||
self.observe_future.append([""])
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_exception
|
||||
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
|
||||
|
||||
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
import time, glob, os
|
||||
print('begin analysis on:', file_manifest)
|
||||
for index, fp in enumerate(file_manifest):
|
||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||
file_content = f.read()
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from toolbox import CatchException, report_exception, promote_file_to_downloadzone
|
||||
from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion, write_history_to_file
|
||||
import logging
|
||||
|
||||
@@ -180,6 +180,7 @@ version: '3'
|
||||
services:
|
||||
gpt_academic_with_latex:
|
||||
image: ghcr.io/binary-husky/gpt_academic_with_latex:master # (Auto Built by Dockerfile: docs/GithubAction+NoLocal+Latex)
|
||||
# 对于ARM64设备,请将以上镜像名称替换为 ghcr.io/binary-husky/gpt_academic_with_latex_arm:master
|
||||
environment:
|
||||
# 请查阅 `config.py` 以查看所有的配置信息
|
||||
API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx '
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
# 此Dockerfile不再维护,请前往docs/GithubAction+JittorLLMs
|
||||
@@ -1,57 +0,0 @@
|
||||
# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacity --network=host --build-arg http_proxy=http://localhost:10881 --build-arg https_proxy=http://localhost:10881 .
|
||||
# docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacityBeta --network=host .
|
||||
# docker run -it --net=host gpt-academic-all-capacity bash
|
||||
|
||||
# 从NVIDIA源,从而支持显卡(检查宿主的nvidia-smi中的cuda版本必须>=11.3)
|
||||
FROM fuqingxu/11.3.1-runtime-ubuntu20.04-with-texlive:latest
|
||||
|
||||
# edge-tts需要的依赖,某些pip包所需的依赖
|
||||
RUN apt update && apt install ffmpeg build-essential -y
|
||||
|
||||
# use python3 as the system default python
|
||||
WORKDIR /gpt
|
||||
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
|
||||
|
||||
# # 非必要步骤,更换pip源 (以下三行,可以删除)
|
||||
# RUN echo '[global]' > /etc/pip.conf && \
|
||||
# echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
|
||||
# echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
|
||||
|
||||
# 下载pytorch
|
||||
RUN python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
|
||||
# 准备pip依赖
|
||||
RUN python3 -m pip install openai numpy arxiv rich
|
||||
RUN python3 -m pip install colorama Markdown pygments pymupdf
|
||||
RUN python3 -m pip install python-docx moviepy pdfminer
|
||||
RUN python3 -m pip install zh_langchain==0.2.1 pypinyin
|
||||
RUN python3 -m pip install rarfile py7zr
|
||||
RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
|
||||
# 下载分支
|
||||
WORKDIR /gpt
|
||||
RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
|
||||
WORKDIR /gpt/gpt_academic
|
||||
RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss
|
||||
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
RUN python3 -m pip install -r request_llms/requirements_moss.txt
|
||||
RUN python3 -m pip install -r request_llms/requirements_qwen.txt
|
||||
RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
|
||||
RUN python3 -m pip install -r request_llms/requirements_newbing.txt
|
||||
RUN python3 -m pip install nougat-ocr
|
||||
|
||||
|
||||
# 预热Tiktoken模块
|
||||
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
|
||||
# 安装知识库插件的额外依赖
|
||||
RUN apt-get update && apt-get install libgl1 -y
|
||||
RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
|
||||
RUN pip3 install unstructured[all-docs] --upgrade
|
||||
RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
|
||||
RUN rm -rf /usr/local/lib/python3.8/dist-packages/tests
|
||||
|
||||
|
||||
# COPY .cache /root/.cache
|
||||
# COPY config_private.py config_private.py
|
||||
# 启动
|
||||
CMD ["python3", "-u", "main.py"]
|
||||
@@ -1,35 +1,34 @@
|
||||
# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM
|
||||
# 此Dockerfile适用于"无本地模型"的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM
|
||||
# - 1 修改 `config.py`
|
||||
# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex .
|
||||
# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
|
||||
|
||||
FROM fuqingxu/python311_texlive_ctex:latest
|
||||
ENV PATH "$PATH:/usr/local/texlive/2022/bin/x86_64-linux"
|
||||
ENV PATH "$PATH:/usr/local/texlive/2023/bin/x86_64-linux"
|
||||
ENV PATH "$PATH:/usr/local/texlive/2024/bin/x86_64-linux"
|
||||
ENV PATH "$PATH:/usr/local/texlive/2025/bin/x86_64-linux"
|
||||
ENV PATH "$PATH:/usr/local/texlive/2026/bin/x86_64-linux"
|
||||
|
||||
# 指定路径
|
||||
FROM menghuan1918/ubuntu_uv_ctex:latest
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
WORKDIR /gpt
|
||||
|
||||
RUN pip3 install openai numpy arxiv rich
|
||||
RUN pip3 install colorama Markdown pygments pymupdf
|
||||
RUN pip3 install python-docx pdfminer
|
||||
RUN pip3 install nougat-ocr
|
||||
|
||||
# 装载项目文件
|
||||
COPY . .
|
||||
|
||||
# 先复制依赖文件
|
||||
COPY requirements.txt .
|
||||
|
||||
# 安装依赖
|
||||
RUN pip3 install -r requirements.txt
|
||||
RUN pip install --break-system-packages openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \
|
||||
&& pip install --break-system-packages -r requirements.txt \
|
||||
&& if [ "$(uname -m)" = "x86_64" ]; then \
|
||||
pip install --break-system-packages nougat-ocr; \
|
||||
fi \
|
||||
&& pip cache purge \
|
||||
&& rm -rf /root/.cache/pip/*
|
||||
|
||||
# edge-tts需要的依赖
|
||||
RUN apt update && apt install ffmpeg -y
|
||||
# 创建非root用户
|
||||
RUN useradd -m gptuser && chown -R gptuser /gpt
|
||||
USER gptuser
|
||||
|
||||
# 最后才复制代码文件,这样代码更新时只需重建最后几层,可以大幅减少docker pull所需的大小
|
||||
COPY --chown=gptuser:gptuser . .
|
||||
|
||||
# 可选步骤,用于预热模块
|
||||
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
|
||||
# 启动
|
||||
CMD ["python3", "-u", "main.py"]
|
||||
|
||||
@@ -4,7 +4,7 @@ We currently support fastapi in order to solve sub-path deploy issue.
|
||||
|
||||
1. change CUSTOM_PATH setting in `config.py`
|
||||
|
||||
``` sh
|
||||
```sh
|
||||
nano config.py
|
||||
```
|
||||
|
||||
@@ -35,9 +35,8 @@ if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
|
||||
3. Go!
|
||||
|
||||
``` sh
|
||||
```sh
|
||||
python main.py
|
||||
```
|
||||
|
||||
@@ -149,6 +149,19 @@
|
||||
DEFINE_ARG_INPUT_INTERFACE = json.dumps(define_arg_selection)
|
||||
return base64.b64encode(DEFINE_ARG_INPUT_INTERFACE.encode('utf-8')).decode('utf-8')
|
||||
```
|
||||
1-2. 预留4个动态插件按钮(常规状态隐藏)
|
||||
|
||||
点击 “+插件按钮”:跳转到插件市场
|
||||
|
||||
点击 加载插件:
|
||||
- 下载文件
|
||||
- 注册 exe_dynamic_plugin开始执行
|
||||
- 执行浏览器js函数
|
||||
|
||||
点击动态插件按钮
|
||||
- js: 检查register_advanced_plugin_init_code_arr,如果为空,提示
|
||||
- 如果非空,先跳二级菜单,设定诸元后,执行一个隐藏的按钮 -- 关联 exe_dynamic_plugin
|
||||
- exe_dynamic_plugin开始执行
|
||||
|
||||
|
||||
2. 用户加载阶段(主javascript程序`common.js`中),浏览器加载`register_advanced_plugin_init_code_arr`,存入本地的字典`advanced_plugin_init_code_lib`:
|
||||
|
||||
文件差异内容过多而无法显示
加载差异
@@ -108,5 +108,22 @@
|
||||
"解析PDF_简单拆解": "ParsePDF_simpleDecomposition",
|
||||
"解析PDF_DOC2X_单文件": "ParsePDF_DOC2X_singleFile",
|
||||
"注释Python项目": "CommentPythonProject",
|
||||
"注释源代码": "CommentSourceCode"
|
||||
"注释源代码": "CommentSourceCode",
|
||||
"log亮黄": "log_yellow",
|
||||
"log亮绿": "log_green",
|
||||
"log亮红": "log_red",
|
||||
"log亮紫": "log_purple",
|
||||
"log亮蓝": "log_blue",
|
||||
"Rag问答": "RagQA",
|
||||
"sprint红": "sprint_red",
|
||||
"sprint绿": "sprint_green",
|
||||
"sprint黄": "sprint_yellow",
|
||||
"sprint蓝": "sprint_blue",
|
||||
"sprint紫": "sprint_purple",
|
||||
"sprint靛": "sprint_indigo",
|
||||
"sprint亮红": "sprint_bright_red",
|
||||
"sprint亮绿": "sprint_bright_green",
|
||||
"sprint亮黄": "sprint_bright_yellow",
|
||||
"sprint亮蓝": "sprint_bright_blue",
|
||||
"sprint亮紫": "sprint_bright_purple"
|
||||
}
|
||||
29
main.py
29
main.py
@@ -13,16 +13,10 @@ help_menu_description = \
|
||||
</br></br>如何语音对话: 请阅读Wiki
|
||||
</br></br>如何临时更换API_KEY: 在输入区输入临时API_KEY后提交(网页刷新后失效)"""
|
||||
|
||||
from loguru import logger
|
||||
def enable_log(PATH_LOGGING):
|
||||
import logging
|
||||
admin_log_path = os.path.join(PATH_LOGGING, "admin")
|
||||
os.makedirs(admin_log_path, exist_ok=True)
|
||||
log_dir = os.path.join(admin_log_path, "chat_secrets.log")
|
||||
try:logging.basicConfig(filename=log_dir, level=logging.INFO, encoding="utf-8", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
||||
except:logging.basicConfig(filename=log_dir, level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
||||
# Disable logging output from the 'httpx' logger
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
print(f"所有对话记录将自动保存在本地目录{log_dir}, 请注意自我隐私保护哦!")
|
||||
from shared_utils.logging import setup_logging
|
||||
setup_logging(PATH_LOGGING)
|
||||
|
||||
def encode_plugin_info(k, plugin)->str:
|
||||
import copy
|
||||
@@ -42,9 +36,16 @@ def main():
|
||||
import gradio as gr
|
||||
if gr.__version__ not in ['3.32.9', '3.32.10', '3.32.11']:
|
||||
raise ModuleNotFoundError("使用项目内置Gradio获取最优体验! 请运行 `pip install -r requirements.txt` 指令安装内置Gradio及其他依赖, 详情信息见requirements.txt.")
|
||||
from request_llms.bridge_all import predict
|
||||
|
||||
# 一些基础工具
|
||||
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
|
||||
|
||||
# 对话、日志记录
|
||||
enable_log(get_conf("PATH_LOGGING"))
|
||||
|
||||
# 对话句柄
|
||||
from request_llms.bridge_all import predict
|
||||
|
||||
# 读取配置
|
||||
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION = get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION')
|
||||
CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = get_conf('CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
|
||||
@@ -61,8 +62,6 @@ def main():
|
||||
from themes.theme import load_dynamic_theme, to_cookie_str, from_cookie_str, assign_user_uuid
|
||||
title_html = f"<h1 align=\"center\">GPT 学术优化 {get_current_version()}</h1>{theme_declaration}"
|
||||
|
||||
# 对话、日志记录
|
||||
enable_log(PATH_LOGGING)
|
||||
|
||||
# 一些普通功能模块
|
||||
from core_functional import get_core_functions
|
||||
@@ -339,9 +338,9 @@ def main():
|
||||
# Gradio的inbrowser触发不太稳定,回滚代码到原始的浏览器打开函数
|
||||
def run_delayed_tasks():
|
||||
import threading, webbrowser, time
|
||||
print(f"如果浏览器没有自动打开,请复制并转到以下URL:")
|
||||
if DARK_MODE: print(f"\t「暗色主题已启用(支持动态切换主题)」: http://localhost:{PORT}")
|
||||
else: print(f"\t「亮色主题已启用(支持动态切换主题)」: http://localhost:{PORT}")
|
||||
logger.info(f"如果浏览器没有自动打开,请复制并转到以下URL:")
|
||||
if DARK_MODE: logger.info(f"\t「暗色主题已启用(支持动态切换主题)」: http://localhost:{PORT}")
|
||||
else: logger.info(f"\t「亮色主题已启用(支持动态切换主题)」: http://localhost:{PORT}")
|
||||
|
||||
def auto_updates(): time.sleep(0); auto_update()
|
||||
def open_browser(): time.sleep(2); webbrowser.open_new_tab(f"http://localhost:{PORT}")
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
2. predict_no_ui_long_connection(...)
|
||||
"""
|
||||
import tiktoken, copy, re
|
||||
from loguru import logger
|
||||
from functools import lru_cache
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from toolbox import get_conf, trimmed_format_exc, apply_gpt_academic_string_mask, read_one_api_model_name
|
||||
@@ -51,9 +52,9 @@ class LazyloadTiktoken(object):
|
||||
@staticmethod
|
||||
@lru_cache(maxsize=128)
|
||||
def get_encoder(model):
|
||||
print('正在加载tokenizer,如果是第一次运行,可能需要一点时间下载参数')
|
||||
logger.info('正在加载tokenizer,如果是第一次运行,可能需要一点时间下载参数')
|
||||
tmp = tiktoken.encoding_for_model(model)
|
||||
print('加载tokenizer完毕')
|
||||
logger.info('加载tokenizer完毕')
|
||||
return tmp
|
||||
|
||||
def encode(self, *args, **kwargs):
|
||||
@@ -83,7 +84,7 @@ try:
|
||||
API_URL = get_conf("API_URL")
|
||||
if API_URL != "https://api.openai.com/v1/chat/completions":
|
||||
openai_endpoint = API_URL
|
||||
print("警告!API_URL配置选项将被弃用,请更换为API_URL_REDIRECT配置")
|
||||
logger.warning("警告!API_URL配置选项将被弃用,请更换为API_URL_REDIRECT配置")
|
||||
except:
|
||||
pass
|
||||
# 新版配置
|
||||
@@ -248,6 +249,27 @@ model_info = {
|
||||
"token_cnt": get_token_num_gpt4,
|
||||
},
|
||||
|
||||
"o1-preview": {
|
||||
"fn_with_ui": chatgpt_ui,
|
||||
"fn_without_ui": chatgpt_noui,
|
||||
"endpoint": openai_endpoint,
|
||||
"max_token": 128000,
|
||||
"tokenizer": tokenizer_gpt4,
|
||||
"token_cnt": get_token_num_gpt4,
|
||||
"openai_disable_system_prompt": True,
|
||||
"openai_disable_stream": True,
|
||||
},
|
||||
"o1-mini": {
|
||||
"fn_with_ui": chatgpt_ui,
|
||||
"fn_without_ui": chatgpt_noui,
|
||||
"endpoint": openai_endpoint,
|
||||
"max_token": 128000,
|
||||
"tokenizer": tokenizer_gpt4,
|
||||
"token_cnt": get_token_num_gpt4,
|
||||
"openai_disable_system_prompt": True,
|
||||
"openai_disable_stream": True,
|
||||
},
|
||||
|
||||
"gpt-4-turbo": {
|
||||
"fn_with_ui": chatgpt_ui,
|
||||
"fn_without_ui": chatgpt_noui,
|
||||
@@ -363,6 +385,14 @@ model_info = {
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
"glm-4-plus":{
|
||||
"fn_with_ui": zhipu_ui,
|
||||
"fn_without_ui": zhipu_noui,
|
||||
"endpoint": None,
|
||||
"max_token": 10124 * 8,
|
||||
"tokenizer": tokenizer_gpt35,
|
||||
"token_cnt": get_token_num_gpt35,
|
||||
},
|
||||
|
||||
# api_2d (此后不需要在此处添加api2d的接口了,因为下面的代码会自动添加)
|
||||
"api2d-gpt-4": {
|
||||
@@ -662,7 +692,7 @@ if "newbing" in AVAIL_LLM_MODELS: # same with newbing-free
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
|
||||
try:
|
||||
from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
|
||||
@@ -678,7 +708,7 @@ if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 上海AI-LAB书生大模型 -=-=-=-=-=-=-
|
||||
if "internlm" in AVAIL_LLM_MODELS:
|
||||
try:
|
||||
@@ -695,7 +725,7 @@ if "internlm" in AVAIL_LLM_MODELS:
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
if "chatglm_onnx" in AVAIL_LLM_MODELS:
|
||||
try:
|
||||
from .bridge_chatglmonnx import predict_no_ui_long_connection as chatglm_onnx_noui
|
||||
@@ -711,7 +741,7 @@ if "chatglm_onnx" in AVAIL_LLM_MODELS:
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 通义-本地模型 -=-=-=-=-=-=-
|
||||
if "qwen-local" in AVAIL_LLM_MODELS:
|
||||
try:
|
||||
@@ -729,7 +759,7 @@ if "qwen-local" in AVAIL_LLM_MODELS:
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
|
||||
if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai
|
||||
try:
|
||||
@@ -765,7 +795,7 @@ if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
|
||||
yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
|
||||
if any(item in yi_models for item in AVAIL_LLM_MODELS):
|
||||
@@ -845,7 +875,7 @@ if any(item in yi_models for item in AVAIL_LLM_MODELS):
|
||||
},
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 讯飞星火认知大模型 -=-=-=-=-=-=-
|
||||
if "spark" in AVAIL_LLM_MODELS:
|
||||
try:
|
||||
@@ -863,7 +893,7 @@ if "spark" in AVAIL_LLM_MODELS:
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
if "sparkv2" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
|
||||
try:
|
||||
from .bridge_spark import predict_no_ui_long_connection as spark_noui
|
||||
@@ -880,7 +910,7 @@ if "sparkv2" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
if any(x in AVAIL_LLM_MODELS for x in ("sparkv3", "sparkv3.5", "sparkv4")): # 讯飞星火认知大模型
|
||||
try:
|
||||
from .bridge_spark import predict_no_ui_long_connection as spark_noui
|
||||
@@ -915,7 +945,7 @@ if any(x in AVAIL_LLM_MODELS for x in ("sparkv3", "sparkv3.5", "sparkv4")): #
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
if "llama2" in AVAIL_LLM_MODELS: # llama2
|
||||
try:
|
||||
from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui
|
||||
@@ -931,7 +961,7 @@ if "llama2" in AVAIL_LLM_MODELS: # llama2
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 智谱 -=-=-=-=-=-=-
|
||||
if "zhipuai" in AVAIL_LLM_MODELS: # zhipuai 是glm-4的别名,向后兼容配置
|
||||
try:
|
||||
@@ -946,7 +976,7 @@ if "zhipuai" in AVAIL_LLM_MODELS: # zhipuai 是glm-4的别名,向后兼容
|
||||
},
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 幻方-深度求索大模型 -=-=-=-=-=-=-
|
||||
if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
|
||||
try:
|
||||
@@ -963,7 +993,7 @@ if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
|
||||
}
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
|
||||
if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
|
||||
try:
|
||||
@@ -991,7 +1021,7 @@ if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
|
||||
},
|
||||
})
|
||||
except:
|
||||
print(trimmed_format_exc())
|
||||
logger.error(trimmed_format_exc())
|
||||
# -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
|
||||
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
|
||||
# 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
|
||||
@@ -1004,7 +1034,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
|
||||
# 如果是已知模型,则尝试获取其信息
|
||||
original_model_info = model_info.get(origin_model_name.replace("one-api-", "", 1), None)
|
||||
except:
|
||||
print(f"one-api模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
|
||||
logger.error(f"one-api模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
|
||||
continue
|
||||
this_model_info = {
|
||||
"fn_with_ui": chatgpt_ui,
|
||||
@@ -1035,7 +1065,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]:
|
||||
try:
|
||||
_, max_token_tmp = read_one_api_model_name(model)
|
||||
except:
|
||||
print(f"vllm模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
|
||||
logger.error(f"vllm模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
|
||||
continue
|
||||
model_info.update({
|
||||
model: {
|
||||
@@ -1062,7 +1092,7 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("ollama-")]:
|
||||
try:
|
||||
_, max_token_tmp = read_one_api_model_name(model)
|
||||
except:
|
||||
print(f"ollama模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
|
||||
logger.error(f"ollama模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
|
||||
continue
|
||||
model_info.update({
|
||||
model: {
|
||||
@@ -1098,6 +1128,24 @@ if len(AZURE_CFG_ARRAY) > 0:
|
||||
if azure_model_name not in AVAIL_LLM_MODELS:
|
||||
AVAIL_LLM_MODELS += [azure_model_name]
|
||||
|
||||
# -=-=-=-=-=-=- Openrouter模型对齐支持 -=-=-=-=-=-=-
|
||||
# 为了更灵活地接入Openrouter路由,设计了此接口
|
||||
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("openrouter-")]:
|
||||
from request_llms.bridge_openrouter import predict_no_ui_long_connection as openrouter_noui
|
||||
from request_llms.bridge_openrouter import predict as openrouter_ui
|
||||
model_info.update({
|
||||
model: {
|
||||
"fn_with_ui": openrouter_ui,
|
||||
"fn_without_ui": openrouter_noui,
|
||||
# 以下参数参考gpt-4o-mini的配置, 请根据实际情况修改
|
||||
"endpoint": openai_endpoint,
|
||||
"has_multimodal_capacity": True,
|
||||
"max_token": 128000,
|
||||
"tokenizer": tokenizer_gpt4,
|
||||
"token_cnt": get_token_num_gpt4,
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
# -=-=-=-=-=-=--=-=-=-=-=-=--=-=-=-=-=-=--=-=-=-=-=-=-=-=
|
||||
# -=-=-=-=-=-=-=-=-=- ☝️ 以上是模型路由 -=-=-=-=-=-=-=-=-=
|
||||
@@ -1243,5 +1291,5 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot,
|
||||
if additional_fn: # 根据基础功能区 ModelOverride 参数调整模型类型
|
||||
llm_kwargs, additional_fn, method = execute_model_override(llm_kwargs, additional_fn, method)
|
||||
|
||||
# 更新一下llm_kwargs的参数,否则会出现参数不匹配的问题
|
||||
yield from method(inputs, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, stream, additional_fn)
|
||||
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
from loguru import logger
|
||||
from toolbox import update_ui, get_conf
|
||||
from multiprocessing import Process, Pipe
|
||||
import time
|
||||
import os
|
||||
import json
|
||||
import threading
|
||||
import importlib
|
||||
from toolbox import update_ui, get_conf
|
||||
from multiprocessing import Process, Pipe
|
||||
|
||||
load_message = "ChatGLMFT尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,ChatGLMFT消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……"
|
||||
|
||||
@@ -78,7 +79,7 @@ class GetGLMFTHandle(Process):
|
||||
config.pre_seq_len = model_args['pre_seq_len']
|
||||
config.prefix_projection = model_args['prefix_projection']
|
||||
|
||||
print(f"Loading prefix_encoder weight from {CHATGLM_PTUNING_CHECKPOINT}")
|
||||
logger.info(f"Loading prefix_encoder weight from {CHATGLM_PTUNING_CHECKPOINT}")
|
||||
model = AutoModel.from_pretrained(model_args['model_name_or_path'], config=config, trust_remote_code=True)
|
||||
prefix_state_dict = torch.load(os.path.join(CHATGLM_PTUNING_CHECKPOINT, "pytorch_model.bin"))
|
||||
new_prefix_state_dict = {}
|
||||
@@ -88,7 +89,7 @@ class GetGLMFTHandle(Process):
|
||||
model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
|
||||
|
||||
if model_args['quantization_bit'] is not None and model_args['quantization_bit'] != 0:
|
||||
print(f"Quantized to {model_args['quantization_bit']} bit")
|
||||
logger.info(f"Quantized to {model_args['quantization_bit']} bit")
|
||||
model = model.quantize(model_args['quantization_bit'])
|
||||
model = model.cuda()
|
||||
if model_args['pre_seq_len'] is not None:
|
||||
|
||||
@@ -12,11 +12,12 @@ import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
import traceback
|
||||
import requests
|
||||
import random
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# config_private.py放自己的秘密如API和代理网址
|
||||
# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
|
||||
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history
|
||||
@@ -133,21 +134,32 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
|
||||
observe_window = None:
|
||||
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
|
||||
"""
|
||||
from request_llms.bridge_all import model_info
|
||||
|
||||
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
|
||||
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
|
||||
|
||||
if model_info[llm_kwargs['llm_model']].get('openai_disable_stream', False): stream = False
|
||||
else: stream = True
|
||||
|
||||
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=stream)
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
# make a POST request to the API endpoint, stream=False
|
||||
from .bridge_all import model_info
|
||||
endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
|
||||
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
||||
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
|
||||
json=payload, stream=stream, timeout=TIMEOUT_SECONDS); break
|
||||
except requests.exceptions.ReadTimeout as e:
|
||||
retry += 1
|
||||
traceback.print_exc()
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
|
||||
if not stream:
|
||||
# 该分支仅适用于不支持stream的o1模型,其他情形一律不适用
|
||||
chunkjson = json.loads(response.content.decode())
|
||||
gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
|
||||
return gpt_replying_buffer
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
result = ''
|
||||
@@ -190,10 +202,13 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
|
||||
if (time.time()-observe_window[1]) > watch_dog_patience:
|
||||
raise RuntimeError("用户取消了程序。")
|
||||
else: raise RuntimeError("意外Json结构:"+delta)
|
||||
if json_data and json_data['finish_reason'] == 'content_filter':
|
||||
raise RuntimeError("由于提问含不合规内容被Azure过滤。")
|
||||
if json_data and json_data['finish_reason'] == 'length':
|
||||
|
||||
finish_reason = json_data.get('finish_reason', None) if json_data else None
|
||||
if finish_reason == 'content_filter':
|
||||
raise RuntimeError("由于提问含不合规内容被过滤。")
|
||||
if finish_reason == 'length':
|
||||
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -208,7 +223,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
||||
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
||||
additional_fn代表点击的哪个按钮,按钮见functional.py
|
||||
"""
|
||||
from .bridge_all import model_info
|
||||
from request_llms.bridge_all import model_info
|
||||
if is_any_api_key(inputs):
|
||||
chatbot._cookies['api_key'] = inputs
|
||||
chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
|
||||
@@ -237,6 +252,10 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
||||
chatbot.append((_inputs, ""))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
||||
|
||||
# 禁用stream的特殊模型处理
|
||||
if model_info[llm_kwargs['llm_model']].get('openai_disable_stream', False): stream = False
|
||||
else: stream = True
|
||||
|
||||
# check mis-behavior
|
||||
if is_the_upload_folder(user_input):
|
||||
chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。")
|
||||
@@ -270,7 +289,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
||||
try:
|
||||
# make a POST request to the API endpoint, stream=True
|
||||
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
||||
json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
|
||||
json=payload, stream=stream, timeout=TIMEOUT_SECONDS);break
|
||||
except:
|
||||
retry += 1
|
||||
chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
|
||||
@@ -278,10 +297,15 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
|
||||
gpt_replying_buffer = ""
|
||||
|
||||
is_head_of_the_stream = True
|
||||
if not stream:
|
||||
# 该分支仅适用于不支持stream的o1模型,其他情形一律不适用
|
||||
yield from handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history)
|
||||
return
|
||||
|
||||
if stream:
|
||||
gpt_replying_buffer = ""
|
||||
is_head_of_the_stream = True
|
||||
stream_response = response.iter_lines()
|
||||
while True:
|
||||
try:
|
||||
@@ -317,7 +341,6 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
||||
# 前者是API2D的结束条件,后者是OPENAI的结束条件
|
||||
if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
|
||||
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
||||
# logging.info(f'[response] {gpt_replying_buffer}')
|
||||
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
|
||||
break
|
||||
# 处理数据流的主体
|
||||
@@ -343,12 +366,24 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
||||
print(error_msg)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析异常" + error_msg) # 刷新界面
|
||||
logger.error(error_msg)
|
||||
return
|
||||
return # return from stream-branch
|
||||
|
||||
def handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history):
|
||||
try:
|
||||
chunkjson = json.loads(response.content.decode())
|
||||
gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
|
||||
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
|
||||
history[-1] = gpt_replying_buffer
|
||||
chatbot[-1] = (history[-2], history[-1])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
except Exception as e:
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析异常" + response.text) # 刷新界面
|
||||
|
||||
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
|
||||
from .bridge_all import model_info
|
||||
from request_llms.bridge_all import model_info
|
||||
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
|
||||
if "reduce the length" in error_msg:
|
||||
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
||||
@@ -381,6 +416,8 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
|
||||
"""
|
||||
整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
|
||||
"""
|
||||
from request_llms.bridge_all import model_info
|
||||
|
||||
if not is_any_api_key(llm_kwargs['api_key']):
|
||||
raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
|
||||
|
||||
@@ -409,10 +446,16 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
|
||||
else:
|
||||
enable_multimodal_capacity = False
|
||||
|
||||
conversation_cnt = len(history) // 2
|
||||
openai_disable_system_prompt = model_info[llm_kwargs['llm_model']].get('openai_disable_system_prompt', False)
|
||||
|
||||
if openai_disable_system_prompt:
|
||||
messages = [{"role": "user", "content": system_prompt}]
|
||||
else:
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
|
||||
if not enable_multimodal_capacity:
|
||||
# 不使用多模态能力
|
||||
conversation_cnt = len(history) // 2
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2*conversation_cnt, 2):
|
||||
what_i_have_asked = {}
|
||||
@@ -434,8 +477,6 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
|
||||
messages.append(what_i_ask_now)
|
||||
else:
|
||||
# 多模态能力
|
||||
conversation_cnt = len(history) // 2
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2*conversation_cnt, 2):
|
||||
what_i_have_asked = {}
|
||||
@@ -486,7 +527,6 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
|
||||
"gpt-3.5-turbo-16k-0613",
|
||||
"gpt-3.5-turbo-0301",
|
||||
])
|
||||
logging.info("Random select model:" + model)
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
@@ -496,10 +536,6 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
|
||||
"n": 1,
|
||||
"stream": stream,
|
||||
}
|
||||
# try:
|
||||
# print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
|
||||
# except:
|
||||
# print('输入中可能存在乱码。')
|
||||
|
||||
return headers,payload
|
||||
|
||||
|
||||
|
||||
@@ -8,15 +8,15 @@
|
||||
2. predict_no_ui_long_connection:支持多线程
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import requests
|
||||
import base64
|
||||
import os
|
||||
import glob
|
||||
from loguru import logger
|
||||
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \
|
||||
update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files
|
||||
update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files, log_chat
|
||||
|
||||
|
||||
proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
|
||||
@@ -100,7 +100,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
|
||||
raw_input = inputs
|
||||
logging.info(f'[raw_input] {raw_input}')
|
||||
def make_media_input(inputs, image_paths):
|
||||
for image_path in image_paths:
|
||||
inputs = inputs + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
|
||||
@@ -185,7 +184,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
||||
lastmsg = chatbot[-1][-1] + f"\n\n\n\n「{llm_kwargs['llm_model']}调用结束,该模型不具备上下文对话能力,如需追问,请及时切换模型。」"
|
||||
yield from update_ui_lastest_msg(lastmsg, chatbot, history, delay=1)
|
||||
logging.info(f'[response] {gpt_replying_buffer}')
|
||||
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
|
||||
break
|
||||
# 处理数据流的主体
|
||||
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
|
||||
@@ -210,7 +209,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
error_msg = chunk_decoded
|
||||
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
||||
print(error_msg)
|
||||
logger.error(error_msg)
|
||||
return
|
||||
|
||||
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key=""):
|
||||
@@ -301,10 +300,7 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
|
||||
"presence_penalty": 0,
|
||||
"frequency_penalty": 0,
|
||||
}
|
||||
try:
|
||||
print(f" {llm_kwargs['llm_model']} : {inputs[:100]} ..........")
|
||||
except:
|
||||
print('输入中可能存在乱码。')
|
||||
|
||||
return headers, payload, api_key
|
||||
|
||||
|
||||
|
||||
@@ -1,281 +0,0 @@
|
||||
# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
|
||||
|
||||
"""
|
||||
该文件中主要包含三个函数
|
||||
|
||||
不具备多线程能力的函数:
|
||||
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
|
||||
|
||||
具备多线程调用能力的函数
|
||||
2. predict_no_ui_long_connection:支持多线程
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
import gradio as gr
|
||||
import logging
|
||||
import traceback
|
||||
import requests
|
||||
import importlib
|
||||
|
||||
# config_private.py放自己的秘密如API和代理网址
|
||||
# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
|
||||
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc
|
||||
proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG = \
|
||||
get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG')
|
||||
|
||||
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
|
||||
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
|
||||
|
||||
def get_full_error(chunk, stream_response):
|
||||
"""
|
||||
获取完整的从Openai返回的报错
|
||||
"""
|
||||
while True:
|
||||
try:
|
||||
chunk += next(stream_response)
|
||||
except:
|
||||
break
|
||||
return chunk
|
||||
|
||||
|
||||
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
|
||||
"""
|
||||
发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
|
||||
inputs:
|
||||
是本次问询的输入
|
||||
sys_prompt:
|
||||
系统静默prompt
|
||||
llm_kwargs:
|
||||
chatGPT的内部调优参数
|
||||
history:
|
||||
是之前的对话列表
|
||||
observe_window = None:
|
||||
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
|
||||
"""
|
||||
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
|
||||
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
# make a POST request to the API endpoint, stream=False
|
||||
from .bridge_all import model_info
|
||||
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
||||
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
||||
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
|
||||
except requests.exceptions.ReadTimeout as e:
|
||||
retry += 1
|
||||
traceback.print_exc()
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
result = ''
|
||||
while True:
|
||||
try: chunk = next(stream_response).decode()
|
||||
except StopIteration:
|
||||
break
|
||||
except requests.exceptions.ConnectionError:
|
||||
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
|
||||
if len(chunk)==0: continue
|
||||
if not chunk.startswith('data:'):
|
||||
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
|
||||
if "reduce the length" in error_msg:
|
||||
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
|
||||
else:
|
||||
raise RuntimeError("OpenAI拒绝了请求:" + error_msg)
|
||||
if ('data: [DONE]' in chunk): break # api2d 正常完成
|
||||
json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
|
||||
delta = json_data["delta"]
|
||||
if len(delta) == 0: break
|
||||
if "role" in delta: continue
|
||||
if "content" in delta:
|
||||
result += delta["content"]
|
||||
if not console_slience: print(delta["content"], end='')
|
||||
if observe_window is not None:
|
||||
# 观测窗,把已经获取的数据显示出去
|
||||
if len(observe_window) >= 1: observe_window[0] += delta["content"]
|
||||
# 看门狗,如果超过期限没有喂狗,则终止
|
||||
if len(observe_window) >= 2:
|
||||
if (time.time()-observe_window[1]) > watch_dog_patience:
|
||||
raise RuntimeError("用户取消了程序。")
|
||||
else: raise RuntimeError("意外Json结构:"+delta)
|
||||
if json_data['finish_reason'] == 'content_filter':
|
||||
raise RuntimeError("由于提问含不合规内容被Azure过滤。")
|
||||
if json_data['finish_reason'] == 'length':
|
||||
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
||||
return result
|
||||
|
||||
|
||||
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
|
||||
"""
|
||||
发送至chatGPT,流式获取输出。
|
||||
用于基础的对话功能。
|
||||
inputs 是本次问询的输入
|
||||
top_p, temperature是chatGPT的内部调优参数
|
||||
history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
|
||||
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
||||
additional_fn代表点击的哪个按钮,按钮见functional.py
|
||||
"""
|
||||
if additional_fn is not None:
|
||||
from core_functional import handle_core_functionality
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
|
||||
raw_input = inputs
|
||||
logging.info(f'[raw_input] {raw_input}')
|
||||
chatbot.append((inputs, ""))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
||||
|
||||
try:
|
||||
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
|
||||
except RuntimeError as e:
|
||||
chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
|
||||
return
|
||||
|
||||
history.append(inputs); history.append("")
|
||||
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
# make a POST request to the API endpoint, stream=True
|
||||
from .bridge_all import model_info
|
||||
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
||||
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
||||
json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
|
||||
except:
|
||||
retry += 1
|
||||
chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
|
||||
retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
|
||||
gpt_replying_buffer = ""
|
||||
|
||||
is_head_of_the_stream = True
|
||||
if stream:
|
||||
stream_response = response.iter_lines()
|
||||
while True:
|
||||
try:
|
||||
chunk = next(stream_response)
|
||||
except StopIteration:
|
||||
# 非OpenAI官方接口的出现这样的报错,OpenAI和API2D不会走这里
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
|
||||
return
|
||||
|
||||
# print(chunk.decode()[6:])
|
||||
if is_head_of_the_stream and (r'"object":"error"' not in chunk.decode()):
|
||||
# 数据流的第一帧不携带content
|
||||
is_head_of_the_stream = False; continue
|
||||
|
||||
if chunk:
|
||||
try:
|
||||
chunk_decoded = chunk.decode()
|
||||
# 前者是API2D的结束条件,后者是OPENAI的结束条件
|
||||
if 'data: [DONE]' in chunk_decoded:
|
||||
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
||||
logging.info(f'[response] {gpt_replying_buffer}')
|
||||
break
|
||||
# 处理数据流的主体
|
||||
chunkjson = json.loads(chunk_decoded[6:])
|
||||
status_text = f"finish_reason: {chunkjson['choices'][0]['finish_reason']}"
|
||||
delta = chunkjson['choices'][0]["delta"]
|
||||
if "content" in delta:
|
||||
gpt_replying_buffer = gpt_replying_buffer + delta["content"]
|
||||
history[-1] = gpt_replying_buffer
|
||||
chatbot[-1] = (history[-2], history[-1])
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
|
||||
except Exception as e:
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
|
||||
chunk = get_full_error(chunk, stream_response)
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
||||
print(error_msg)
|
||||
return
|
||||
|
||||
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
|
||||
from .bridge_all import model_info
|
||||
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
|
||||
if "reduce the length" in error_msg:
|
||||
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
||||
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
||||
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
||||
# history = [] # 清除历史
|
||||
elif "does not exist" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
|
||||
elif "Incorrect API key" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
|
||||
elif "exceeded your current quota" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
|
||||
elif "account is not active" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
|
||||
elif "associated with a deactivated account" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
|
||||
elif "bad forward key" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
|
||||
elif "Not enough point" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
|
||||
else:
|
||||
from toolbox import regular_txt_to_markdown
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
|
||||
return chatbot, history
|
||||
|
||||
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
||||
"""
|
||||
整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
|
||||
"""
|
||||
if not is_any_api_key(llm_kwargs['api_key']):
|
||||
raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
conversation_cnt = len(history) // 2
|
||||
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2*conversation_cnt, 2):
|
||||
what_i_have_asked = {}
|
||||
what_i_have_asked["role"] = "user"
|
||||
what_i_have_asked["content"] = history[index]
|
||||
what_gpt_answer = {}
|
||||
what_gpt_answer["role"] = "assistant"
|
||||
what_gpt_answer["content"] = history[index+1]
|
||||
if what_i_have_asked["content"] != "":
|
||||
if what_gpt_answer["content"] == "": continue
|
||||
if what_gpt_answer["content"] == timeout_bot_msg: continue
|
||||
messages.append(what_i_have_asked)
|
||||
messages.append(what_gpt_answer)
|
||||
else:
|
||||
messages[-1]['content'] = what_gpt_answer['content']
|
||||
|
||||
what_i_ask_now = {}
|
||||
what_i_ask_now["role"] = "user"
|
||||
what_i_ask_now["content"] = inputs
|
||||
messages.append(what_i_ask_now)
|
||||
|
||||
payload = {
|
||||
"model": llm_kwargs['llm_model'].strip('api2d-'),
|
||||
"messages": messages,
|
||||
"temperature": llm_kwargs['temperature'], # 1.0,
|
||||
"top_p": llm_kwargs['top_p'], # 1.0,
|
||||
"n": 1,
|
||||
"stream": stream,
|
||||
"presence_penalty": 0,
|
||||
"frequency_penalty": 0,
|
||||
}
|
||||
try:
|
||||
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
|
||||
except:
|
||||
print('输入中可能存在乱码。')
|
||||
return headers,payload
|
||||
|
||||
|
||||
@@ -9,13 +9,14 @@
|
||||
具备多线程调用能力的函数
|
||||
2. predict_no_ui_long_connection:支持多线程
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
import json
|
||||
import requests
|
||||
from loguru import logger
|
||||
from toolbox import get_conf, update_ui, trimmed_format_exc, encode_image, every_image_file_in_path, log_chat
|
||||
|
||||
picture_system_prompt = "\n当回复图像时,必须说明正在回复哪张图像。所有图像仅在最后一个问题中提供,即使它们在历史记录中被提及。请使用'这是第X张图像:'的格式来指明您正在描述的是哪张图像。"
|
||||
Claude_3_Models = ["claude-3-haiku-20240307", "claude-3-sonnet-20240229", "claude-3-opus-20240229", "claude-3-5-sonnet-20240620"]
|
||||
|
||||
@@ -101,7 +102,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
|
||||
retry += 1
|
||||
traceback.print_exc()
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
stream_response = response.iter_lines()
|
||||
result = ''
|
||||
while True:
|
||||
@@ -116,12 +117,11 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
|
||||
if need_to_pass:
|
||||
pass
|
||||
elif is_last_chunk:
|
||||
# logging.info(f'[response] {result}')
|
||||
# logger.info(f'[response] {result}')
|
||||
break
|
||||
else:
|
||||
if chunkjson and chunkjson['type'] == 'content_block_delta':
|
||||
result += chunkjson['delta']['text']
|
||||
print(chunkjson['delta']['text'], end='')
|
||||
if observe_window is not None:
|
||||
# 观测窗,把已经获取的数据显示出去
|
||||
if len(observe_window) >= 1:
|
||||
@@ -134,7 +134,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
|
||||
chunk = get_full_error(chunk, stream_response)
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
print(error_msg)
|
||||
logger.error(error_msg)
|
||||
raise RuntimeError("Json解析不合常规")
|
||||
|
||||
return result
|
||||
@@ -200,7 +200,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
retry += 1
|
||||
traceback.print_exc()
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
stream_response = response.iter_lines()
|
||||
gpt_replying_buffer = ""
|
||||
|
||||
@@ -217,7 +217,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
pass
|
||||
elif is_last_chunk:
|
||||
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
|
||||
# logging.info(f'[response] {gpt_replying_buffer}')
|
||||
# logger.info(f'[response] {gpt_replying_buffer}')
|
||||
break
|
||||
else:
|
||||
if chunkjson and chunkjson['type'] == 'content_block_delta':
|
||||
@@ -230,7 +230,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
chunk = get_full_error(chunk, stream_response)
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
print(error_msg)
|
||||
logger.error(error_msg)
|
||||
raise RuntimeError("Json解析不合常规")
|
||||
|
||||
def multiple_picture_types(image_paths):
|
||||
|
||||
@@ -13,11 +13,9 @@
|
||||
import json
|
||||
import time
|
||||
import gradio as gr
|
||||
import logging
|
||||
import traceback
|
||||
import requests
|
||||
import importlib
|
||||
import random
|
||||
from loguru import logger
|
||||
|
||||
# config_private.py放自己的秘密如API和代理网址
|
||||
# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
|
||||
@@ -98,7 +96,7 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
|
||||
retry += 1
|
||||
traceback.print_exc()
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
result = ''
|
||||
@@ -153,7 +151,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
|
||||
raw_input = inputs
|
||||
# logging.info(f'[raw_input] {raw_input}')
|
||||
# logger.info(f'[raw_input] {raw_input}')
|
||||
chatbot.append((inputs, ""))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
||||
|
||||
@@ -237,7 +235,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
|
||||
error_msg = chunk_decoded
|
||||
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
||||
print(error_msg)
|
||||
logger.error(error_msg)
|
||||
return
|
||||
|
||||
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
model_name = "deepseek-coder-6.7b-instruct"
|
||||
cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
|
||||
|
||||
import os
|
||||
from toolbox import ProxyNetworkActivate
|
||||
from toolbox import get_conf
|
||||
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
|
||||
from request_llms.local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
|
||||
from threading import Thread
|
||||
from loguru import logger
|
||||
import torch
|
||||
import os
|
||||
|
||||
def download_huggingface_model(model_name, max_retry, local_dir):
|
||||
from huggingface_hub import snapshot_download
|
||||
@@ -15,7 +16,7 @@ def download_huggingface_model(model_name, max_retry, local_dir):
|
||||
snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
|
||||
break
|
||||
except Exception as e:
|
||||
print(f'\n\n下载失败,重试第{i}次中...\n\n')
|
||||
logger.error(f'\n\n下载失败,重试第{i}次中...\n\n')
|
||||
return local_dir
|
||||
# ------------------------------------------------------------------------------------------------------------------------
|
||||
# 🔌💻 Local Model
|
||||
@@ -112,7 +113,6 @@ class GetCoderLMHandle(LocalLLMHandle):
|
||||
generated_text = ""
|
||||
for new_text in self._streamer:
|
||||
generated_text += new_text
|
||||
# print(generated_text)
|
||||
yield generated_text
|
||||
|
||||
|
||||
|
||||
@@ -65,10 +65,10 @@ class GetInternlmHandle(LocalLLMHandle):
|
||||
|
||||
def llm_stream_generator(self, **kwargs):
|
||||
import torch
|
||||
import logging
|
||||
import copy
|
||||
import warnings
|
||||
import torch.nn as nn
|
||||
from loguru import logger as logging
|
||||
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
|
||||
|
||||
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||
@@ -119,7 +119,7 @@ class GetInternlmHandle(LocalLLMHandle):
|
||||
elif generation_config.max_new_tokens is not None:
|
||||
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
|
||||
if not has_default_max_length:
|
||||
logging.warn(
|
||||
logging.warning(
|
||||
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
|
||||
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
|
||||
"Please refer to the documentation for more information. "
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
|
||||
from toolbox import get_conf, update_ui, log_chat
|
||||
from toolbox import ChatBotWithCookies
|
||||
|
||||
@@ -13,11 +13,11 @@
|
||||
import json
|
||||
import time
|
||||
import gradio as gr
|
||||
import logging
|
||||
import traceback
|
||||
import requests
|
||||
import importlib
|
||||
import random
|
||||
from loguru import logger
|
||||
|
||||
# config_private.py放自己的秘密如API和代理网址
|
||||
# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
|
||||
@@ -81,7 +81,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
|
||||
retry += 1
|
||||
traceback.print_exc()
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
result = ''
|
||||
@@ -96,7 +96,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
|
||||
try:
|
||||
if is_last_chunk:
|
||||
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
||||
logging.info(f'[response] {result}')
|
||||
logger.info(f'[response] {result}')
|
||||
break
|
||||
result += chunkjson['message']["content"]
|
||||
if not console_slience: print(chunkjson['message']["content"], end='')
|
||||
@@ -112,7 +112,7 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
|
||||
chunk = get_full_error(chunk, stream_response)
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
print(error_msg)
|
||||
logger.error(error_msg)
|
||||
raise RuntimeError("Json解析不合常规")
|
||||
return result
|
||||
|
||||
@@ -134,7 +134,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
|
||||
raw_input = inputs
|
||||
logging.info(f'[raw_input] {raw_input}')
|
||||
logger.info(f'[raw_input] {raw_input}')
|
||||
chatbot.append((inputs, ""))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
||||
|
||||
@@ -183,7 +183,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
try:
|
||||
if is_last_chunk:
|
||||
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
||||
logging.info(f'[response] {gpt_replying_buffer}')
|
||||
logger.info(f'[response] {gpt_replying_buffer}')
|
||||
break
|
||||
# 处理数据流的主体
|
||||
try:
|
||||
@@ -202,7 +202,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
||||
error_msg = chunk_decoded
|
||||
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
||||
print(error_msg)
|
||||
logger.error(error_msg)
|
||||
return
|
||||
|
||||
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
|
||||
@@ -265,8 +265,5 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
||||
"messages": messages,
|
||||
"options": options,
|
||||
}
|
||||
try:
|
||||
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
|
||||
except:
|
||||
print('输入中可能存在乱码。')
|
||||
|
||||
return headers,payload
|
||||
|
||||
541
request_llms/bridge_openrouter.py
普通文件
541
request_llms/bridge_openrouter.py
普通文件
@@ -0,0 +1,541 @@
|
||||
"""
|
||||
该文件中主要包含三个函数
|
||||
|
||||
不具备多线程能力的函数:
|
||||
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
|
||||
|
||||
具备多线程调用能力的函数
|
||||
2. predict_no_ui_long_connection:支持多线程
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import traceback
|
||||
import requests
|
||||
import random
|
||||
from loguru import logger
|
||||
|
||||
# config_private.py放自己的秘密如API和代理网址
|
||||
# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
|
||||
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history
|
||||
from toolbox import trimmed_format_exc, is_the_upload_folder, read_one_api_model_name, log_chat
|
||||
from toolbox import ChatBotWithCookies, have_any_recent_upload_image_files, encode_image
|
||||
proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
|
||||
get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')
|
||||
|
||||
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
|
||||
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
|
||||
|
||||
def get_full_error(chunk, stream_response):
|
||||
"""
|
||||
获取完整的从Openai返回的报错
|
||||
"""
|
||||
while True:
|
||||
try:
|
||||
chunk += next(stream_response)
|
||||
except:
|
||||
break
|
||||
return chunk
|
||||
|
||||
def make_multimodal_input(inputs, image_paths):
|
||||
image_base64_array = []
|
||||
for image_path in image_paths:
|
||||
path = os.path.abspath(image_path)
|
||||
base64 = encode_image(path)
|
||||
inputs = inputs + f'<br/><br/><div align="center"><img src="file={path}" base64="{base64}"></div>'
|
||||
image_base64_array.append(base64)
|
||||
return inputs, image_base64_array
|
||||
|
||||
def reverse_base64_from_input(inputs):
|
||||
# 定义一个正则表达式来匹配 Base64 字符串(假设格式为 base64="<Base64编码>")
|
||||
# pattern = re.compile(r'base64="([^"]+)"></div>')
|
||||
pattern = re.compile(r'<br/><br/><div align="center"><img[^<>]+base64="([^"]+)"></div>')
|
||||
# 使用 findall 方法查找所有匹配的 Base64 字符串
|
||||
base64_strings = pattern.findall(inputs)
|
||||
# 返回反转后的 Base64 字符串列表
|
||||
return base64_strings
|
||||
|
||||
def contain_base64(inputs):
|
||||
base64_strings = reverse_base64_from_input(inputs)
|
||||
return len(base64_strings) > 0
|
||||
|
||||
def append_image_if_contain_base64(inputs):
|
||||
if not contain_base64(inputs):
|
||||
return inputs
|
||||
else:
|
||||
image_base64_array = reverse_base64_from_input(inputs)
|
||||
pattern = re.compile(r'<br/><br/><div align="center"><img[^><]+></div>')
|
||||
inputs = re.sub(pattern, '', inputs)
|
||||
res = []
|
||||
res.append({
|
||||
"type": "text",
|
||||
"text": inputs
|
||||
})
|
||||
for image_base64 in image_base64_array:
|
||||
res.append({
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{image_base64}"
|
||||
}
|
||||
})
|
||||
return res
|
||||
|
||||
def remove_image_if_contain_base64(inputs):
|
||||
if not contain_base64(inputs):
|
||||
return inputs
|
||||
else:
|
||||
pattern = re.compile(r'<br/><br/><div align="center"><img[^><]+></div>')
|
||||
inputs = re.sub(pattern, '', inputs)
|
||||
return inputs
|
||||
|
||||
def decode_chunk(chunk):
|
||||
# 提前读取一些信息 (用于判断异常)
|
||||
chunk_decoded = chunk.decode()
|
||||
chunkjson = None
|
||||
has_choices = False
|
||||
choice_valid = False
|
||||
has_content = False
|
||||
has_role = False
|
||||
try:
|
||||
chunkjson = json.loads(chunk_decoded[6:])
|
||||
has_choices = 'choices' in chunkjson
|
||||
if has_choices: choice_valid = (len(chunkjson['choices']) > 0)
|
||||
if has_choices and choice_valid: has_content = ("content" in chunkjson['choices'][0]["delta"])
|
||||
if has_content: has_content = (chunkjson['choices'][0]["delta"]["content"] is not None)
|
||||
if has_choices and choice_valid: has_role = "role" in chunkjson['choices'][0]["delta"]
|
||||
except:
|
||||
pass
|
||||
return chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role
|
||||
|
||||
from functools import lru_cache
|
||||
@lru_cache(maxsize=32)
|
||||
def verify_endpoint(endpoint):
|
||||
"""
|
||||
检查endpoint是否可用
|
||||
"""
|
||||
if "你亲手写的api名称" in endpoint:
|
||||
raise ValueError("Endpoint不正确, 请检查AZURE_ENDPOINT的配置! 当前的Endpoint为:" + endpoint)
|
||||
return endpoint
|
||||
|
||||
def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[], sys_prompt:str="", observe_window:list=None, console_slience:bool=False):
|
||||
"""
|
||||
发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
|
||||
inputs:
|
||||
是本次问询的输入
|
||||
sys_prompt:
|
||||
系统静默prompt
|
||||
llm_kwargs:
|
||||
chatGPT的内部调优参数
|
||||
history:
|
||||
是之前的对话列表
|
||||
observe_window = None:
|
||||
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
|
||||
"""
|
||||
from request_llms.bridge_all import model_info
|
||||
|
||||
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
|
||||
|
||||
if model_info[llm_kwargs['llm_model']].get('openai_disable_stream', False): stream = False
|
||||
else: stream = True
|
||||
|
||||
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=stream)
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
# make a POST request to the API endpoint, stream=False
|
||||
endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
|
||||
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
||||
json=payload, stream=stream, timeout=TIMEOUT_SECONDS); break
|
||||
except requests.exceptions.ReadTimeout as e:
|
||||
retry += 1
|
||||
traceback.print_exc()
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
if MAX_RETRY!=0: logger.error(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||
|
||||
if not stream:
|
||||
# 该分支仅适用于不支持stream的o1模型,其他情形一律不适用
|
||||
chunkjson = json.loads(response.content.decode())
|
||||
gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
|
||||
return gpt_replying_buffer
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
result = ''
|
||||
json_data = None
|
||||
while True:
|
||||
try: chunk = next(stream_response)
|
||||
except StopIteration:
|
||||
break
|
||||
except requests.exceptions.ConnectionError:
|
||||
chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
|
||||
chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
|
||||
if len(chunk_decoded)==0: continue
|
||||
if not chunk_decoded.startswith('data:'):
|
||||
error_msg = get_full_error(chunk, stream_response).decode()
|
||||
if "reduce the length" in error_msg:
|
||||
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
|
||||
elif """type":"upstream_error","param":"307""" in error_msg:
|
||||
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
||||
else:
|
||||
raise RuntimeError("OpenAI拒绝了请求:" + error_msg)
|
||||
if ('data: [DONE]' in chunk_decoded): break # api2d 正常完成
|
||||
# 提前读取一些信息 (用于判断异常)
|
||||
if (has_choices and not choice_valid) or ('OPENROUTER PROCESSING' in chunk_decoded):
|
||||
# 一些垃圾第三方接口的出现这样的错误,openrouter的特殊处理
|
||||
continue
|
||||
json_data = chunkjson['choices'][0]
|
||||
delta = json_data["delta"]
|
||||
if len(delta) == 0: break
|
||||
if (not has_content) and has_role: continue
|
||||
if (not has_content) and (not has_role): continue # raise RuntimeError("发现不标准的第三方接口:"+delta)
|
||||
if has_content: # has_role = True/False
|
||||
result += delta["content"]
|
||||
if not console_slience: print(delta["content"], end='')
|
||||
if observe_window is not None:
|
||||
# 观测窗,把已经获取的数据显示出去
|
||||
if len(observe_window) >= 1:
|
||||
observe_window[0] += delta["content"]
|
||||
# 看门狗,如果超过期限没有喂狗,则终止
|
||||
if len(observe_window) >= 2:
|
||||
if (time.time()-observe_window[1]) > watch_dog_patience:
|
||||
raise RuntimeError("用户取消了程序。")
|
||||
else: raise RuntimeError("意外Json结构:"+delta)
|
||||
if json_data and json_data['finish_reason'] == 'content_filter':
|
||||
raise RuntimeError("由于提问含不合规内容被Azure过滤。")
|
||||
if json_data and json_data['finish_reason'] == 'length':
|
||||
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
||||
return result
|
||||
|
||||
|
||||
def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWithCookies,
|
||||
history:list=[], system_prompt:str='', stream:bool=True, additional_fn:str=None):
|
||||
"""
|
||||
发送至chatGPT,流式获取输出。
|
||||
用于基础的对话功能。
|
||||
inputs 是本次问询的输入
|
||||
top_p, temperature是chatGPT的内部调优参数
|
||||
history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
|
||||
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
|
||||
additional_fn代表点击的哪个按钮,按钮见functional.py
|
||||
"""
|
||||
from request_llms.bridge_all import model_info
|
||||
if is_any_api_key(inputs):
|
||||
chatbot._cookies['api_key'] = inputs
|
||||
chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面
|
||||
return
|
||||
elif not is_any_api_key(chatbot._cookies['api_key']):
|
||||
chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。"))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面
|
||||
return
|
||||
|
||||
user_input = inputs
|
||||
if additional_fn is not None:
|
||||
from core_functional import handle_core_functionality
|
||||
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
|
||||
|
||||
# 多模态模型
|
||||
has_multimodal_capacity = model_info[llm_kwargs['llm_model']].get('has_multimodal_capacity', False)
|
||||
if has_multimodal_capacity:
|
||||
has_recent_image_upload, image_paths = have_any_recent_upload_image_files(chatbot, pop=True)
|
||||
else:
|
||||
has_recent_image_upload, image_paths = False, []
|
||||
if has_recent_image_upload:
|
||||
_inputs, image_base64_array = make_multimodal_input(inputs, image_paths)
|
||||
else:
|
||||
_inputs, image_base64_array = inputs, []
|
||||
chatbot.append((_inputs, ""))
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
||||
|
||||
# 禁用stream的特殊模型处理
|
||||
if model_info[llm_kwargs['llm_model']].get('openai_disable_stream', False): stream = False
|
||||
else: stream = True
|
||||
|
||||
# check mis-behavior
|
||||
if is_the_upload_folder(user_input):
|
||||
chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。")
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
|
||||
time.sleep(2)
|
||||
|
||||
try:
|
||||
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, image_base64_array, has_multimodal_capacity, stream)
|
||||
except RuntimeError as e:
|
||||
chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
|
||||
return
|
||||
|
||||
# 检查endpoint是否合法
|
||||
try:
|
||||
endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
|
||||
except:
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
chatbot[-1] = (inputs, tb_str)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Endpoint不满足要求") # 刷新界面
|
||||
return
|
||||
|
||||
# 加入历史
|
||||
if has_recent_image_upload:
|
||||
history.extend([_inputs, ""])
|
||||
else:
|
||||
history.extend([inputs, ""])
|
||||
|
||||
retry = 0
|
||||
while True:
|
||||
try:
|
||||
# make a POST request to the API endpoint, stream=True
|
||||
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
||||
json=payload, stream=stream, timeout=TIMEOUT_SECONDS);break
|
||||
except:
|
||||
retry += 1
|
||||
chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
|
||||
retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
|
||||
if retry > MAX_RETRY: raise TimeoutError
|
||||
|
||||
|
||||
if not stream:
|
||||
# 该分支仅适用于不支持stream的o1模型,其他情形一律不适用
|
||||
yield from handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history)
|
||||
return
|
||||
|
||||
if stream:
|
||||
gpt_replying_buffer = ""
|
||||
is_head_of_the_stream = True
|
||||
stream_response = response.iter_lines()
|
||||
while True:
|
||||
try:
|
||||
chunk = next(stream_response)
|
||||
except StopIteration:
|
||||
# 非OpenAI官方接口的出现这样的报错,OpenAI和API2D不会走这里
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
# 首先排除一个one-api没有done数据包的第三方Bug情形
|
||||
if len(gpt_replying_buffer.strip()) > 0 and len(error_msg) == 0:
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="检测到有缺陷的非OpenAI官方接口,建议选择更稳定的接口。")
|
||||
break
|
||||
# 其他情况,直接返回报错
|
||||
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="非OpenAI官方接口返回了错误:" + chunk.decode()) # 刷新界面
|
||||
return
|
||||
|
||||
# 提前读取一些信息 (用于判断异常)
|
||||
chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
|
||||
|
||||
if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"content" not in chunk_decoded):
|
||||
# 数据流的第一帧不携带content
|
||||
is_head_of_the_stream = False; continue
|
||||
|
||||
if chunk:
|
||||
try:
|
||||
if (has_choices and not choice_valid) or ('OPENROUTER PROCESSING' in chunk_decoded):
|
||||
# 一些垃圾第三方接口的出现这样的错误, 或者OPENROUTER的特殊处理,因为OPENROUTER的数据流未连接到模型时会出现OPENROUTER PROCESSING
|
||||
continue
|
||||
if ('data: [DONE]' not in chunk_decoded) and len(chunk_decoded) > 0 and (chunkjson is None):
|
||||
# 传递进来一些奇怪的东西
|
||||
raise ValueError(f'无法读取以下数据,请检查配置。\n\n{chunk_decoded}')
|
||||
# 前者是API2D的结束条件,后者是OPENAI的结束条件
|
||||
if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
|
||||
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
||||
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
|
||||
break
|
||||
# 处理数据流的主体
|
||||
status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
|
||||
# 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
|
||||
if has_content:
|
||||
# 正常情况
|
||||
gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
|
||||
elif has_role:
|
||||
# 一些第三方接口的出现这样的错误,兼容一下吧
|
||||
continue
|
||||
else:
|
||||
# 至此已经超出了正常接口应该进入的范围,一些垃圾第三方接口会出现这样的错误
|
||||
if chunkjson['choices'][0]["delta"]["content"] is None: continue # 一些垃圾第三方接口出现这样的错误,兼容一下吧
|
||||
gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
|
||||
|
||||
history[-1] = gpt_replying_buffer
|
||||
chatbot[-1] = (history[-2], history[-1])
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
|
||||
except Exception as e:
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
|
||||
chunk = get_full_error(chunk, stream_response)
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析异常" + error_msg) # 刷新界面
|
||||
logger.error(error_msg)
|
||||
return
|
||||
return # return from stream-branch
|
||||
|
||||
def handle_o1_model_special(response, inputs, llm_kwargs, chatbot, history):
|
||||
try:
|
||||
chunkjson = json.loads(response.content.decode())
|
||||
gpt_replying_buffer = chunkjson['choices'][0]["message"]["content"]
|
||||
log_chat(llm_model=llm_kwargs["llm_model"], input_str=inputs, output_str=gpt_replying_buffer)
|
||||
history[-1] = gpt_replying_buffer
|
||||
chatbot[-1] = (history[-2], history[-1])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
except Exception as e:
|
||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析异常" + response.text) # 刷新界面
|
||||
|
||||
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
|
||||
from request_llms.bridge_all import model_info
|
||||
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
|
||||
if "reduce the length" in error_msg:
|
||||
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
||||
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
|
||||
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
|
||||
elif "does not exist" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
|
||||
elif "Incorrect API key" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
|
||||
elif "exceeded your current quota" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
|
||||
elif "account is not active" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
|
||||
elif "associated with a deactivated account" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
|
||||
elif "API key has been deactivated" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] API key has been deactivated. OpenAI以账户失效为由, 拒绝服务." + openai_website)
|
||||
elif "bad forward key" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
|
||||
elif "Not enough point" in error_msg:
|
||||
chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
|
||||
else:
|
||||
from toolbox import regular_txt_to_markdown
|
||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
|
||||
return chatbot, history
|
||||
|
||||
def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:str, image_base64_array:list=[], has_multimodal_capacity:bool=False, stream:bool=True):
|
||||
"""
|
||||
整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
|
||||
"""
|
||||
from request_llms.bridge_all import model_info
|
||||
|
||||
if not is_any_api_key(llm_kwargs['api_key']):
|
||||
raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
|
||||
|
||||
if llm_kwargs['llm_model'].startswith('vllm-'):
|
||||
api_key = 'no-api-key'
|
||||
else:
|
||||
api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}"
|
||||
}
|
||||
if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG})
|
||||
if llm_kwargs['llm_model'].startswith('azure-'):
|
||||
headers.update({"api-key": api_key})
|
||||
if llm_kwargs['llm_model'] in AZURE_CFG_ARRAY.keys():
|
||||
azure_api_key_unshared = AZURE_CFG_ARRAY[llm_kwargs['llm_model']]["AZURE_API_KEY"]
|
||||
headers.update({"api-key": azure_api_key_unshared})
|
||||
|
||||
if has_multimodal_capacity:
|
||||
# 当以下条件满足时,启用多模态能力:
|
||||
# 1. 模型本身是多模态模型(has_multimodal_capacity)
|
||||
# 2. 输入包含图像(len(image_base64_array) > 0)
|
||||
# 3. 历史输入包含图像( any([contain_base64(h) for h in history]) )
|
||||
enable_multimodal_capacity = (len(image_base64_array) > 0) or any([contain_base64(h) for h in history])
|
||||
else:
|
||||
enable_multimodal_capacity = False
|
||||
|
||||
conversation_cnt = len(history) // 2
|
||||
openai_disable_system_prompt = model_info[llm_kwargs['llm_model']].get('openai_disable_system_prompt', False)
|
||||
|
||||
if openai_disable_system_prompt:
|
||||
messages = [{"role": "user", "content": system_prompt}]
|
||||
else:
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
|
||||
if not enable_multimodal_capacity:
|
||||
# 不使用多模态能力
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2*conversation_cnt, 2):
|
||||
what_i_have_asked = {}
|
||||
what_i_have_asked["role"] = "user"
|
||||
what_i_have_asked["content"] = remove_image_if_contain_base64(history[index])
|
||||
what_gpt_answer = {}
|
||||
what_gpt_answer["role"] = "assistant"
|
||||
what_gpt_answer["content"] = remove_image_if_contain_base64(history[index+1])
|
||||
if what_i_have_asked["content"] != "":
|
||||
if what_gpt_answer["content"] == "": continue
|
||||
if what_gpt_answer["content"] == timeout_bot_msg: continue
|
||||
messages.append(what_i_have_asked)
|
||||
messages.append(what_gpt_answer)
|
||||
else:
|
||||
messages[-1]['content'] = what_gpt_answer['content']
|
||||
what_i_ask_now = {}
|
||||
what_i_ask_now["role"] = "user"
|
||||
what_i_ask_now["content"] = inputs
|
||||
messages.append(what_i_ask_now)
|
||||
else:
|
||||
# 多模态能力
|
||||
if conversation_cnt:
|
||||
for index in range(0, 2*conversation_cnt, 2):
|
||||
what_i_have_asked = {}
|
||||
what_i_have_asked["role"] = "user"
|
||||
what_i_have_asked["content"] = append_image_if_contain_base64(history[index])
|
||||
what_gpt_answer = {}
|
||||
what_gpt_answer["role"] = "assistant"
|
||||
what_gpt_answer["content"] = append_image_if_contain_base64(history[index+1])
|
||||
if what_i_have_asked["content"] != "":
|
||||
if what_gpt_answer["content"] == "": continue
|
||||
if what_gpt_answer["content"] == timeout_bot_msg: continue
|
||||
messages.append(what_i_have_asked)
|
||||
messages.append(what_gpt_answer)
|
||||
else:
|
||||
messages[-1]['content'] = what_gpt_answer['content']
|
||||
what_i_ask_now = {}
|
||||
what_i_ask_now["role"] = "user"
|
||||
what_i_ask_now["content"] = []
|
||||
what_i_ask_now["content"].append({
|
||||
"type": "text",
|
||||
"text": inputs
|
||||
})
|
||||
for image_base64 in image_base64_array:
|
||||
what_i_ask_now["content"].append({
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{image_base64}"
|
||||
}
|
||||
})
|
||||
messages.append(what_i_ask_now)
|
||||
|
||||
|
||||
model = llm_kwargs['llm_model']
|
||||
if llm_kwargs['llm_model'].startswith('api2d-'):
|
||||
model = llm_kwargs['llm_model'][len('api2d-'):]
|
||||
if llm_kwargs['llm_model'].startswith('one-api-'):
|
||||
model = llm_kwargs['llm_model'][len('one-api-'):]
|
||||
model, _ = read_one_api_model_name(model)
|
||||
if llm_kwargs['llm_model'].startswith('vllm-'):
|
||||
model = llm_kwargs['llm_model'][len('vllm-'):]
|
||||
model, _ = read_one_api_model_name(model)
|
||||
if llm_kwargs['llm_model'].startswith('openrouter-'):
|
||||
model = llm_kwargs['llm_model'][len('openrouter-'):]
|
||||
model= read_one_api_model_name(model)
|
||||
if model == "gpt-3.5-random": # 随机选择, 绕过openai访问频率限制
|
||||
model = random.choice([
|
||||
"gpt-3.5-turbo",
|
||||
"gpt-3.5-turbo-16k",
|
||||
"gpt-3.5-turbo-1106",
|
||||
"gpt-3.5-turbo-0613",
|
||||
"gpt-3.5-turbo-16k-0613",
|
||||
"gpt-3.5-turbo-0301",
|
||||
])
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": llm_kwargs['temperature'], # 1.0,
|
||||
"top_p": llm_kwargs['top_p'], # 1.0,
|
||||
"n": 1,
|
||||
"stream": stream,
|
||||
}
|
||||
|
||||
return headers,payload
|
||||
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import time
|
||||
import asyncio
|
||||
import threading
|
||||
import importlib
|
||||
|
||||
from .bridge_newbingfree import preprocess_newbing_out, preprocess_newbing_out_simple
|
||||
from multiprocessing import Process, Pipe
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc
|
||||
import threading
|
||||
import importlib
|
||||
import logging
|
||||
import time
|
||||
from loguru import logger as logging
|
||||
from toolbox import get_conf
|
||||
import asyncio
|
||||
|
||||
load_message = "正在加载Claude组件,请稍候..."
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ import json
|
||||
import random
|
||||
import string
|
||||
import websockets
|
||||
import logging
|
||||
import time
|
||||
import threading
|
||||
import importlib
|
||||
|
||||
@@ -218,5 +218,3 @@ class GoogleChatInit:
|
||||
|
||||
if __name__ == "__main__":
|
||||
google = GoogleChatInit()
|
||||
# print(gootle.generate_message_payload('你好呀', {}, ['123123', '3123123'], ''))
|
||||
# gootle.input_encode_handle('123123[123123](./123123), ')
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from http import HTTPStatus
|
||||
from toolbox import get_conf
|
||||
import threading
|
||||
import logging
|
||||
|
||||
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from toolbox import get_conf
|
||||
import threading
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from toolbox import get_conf
|
||||
from loguru import logger as logging
|
||||
|
||||
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
||||
#os.environ['VOLC_ACCESSKEY'] = ''
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
from toolbox import get_conf, get_pictures_list, encode_image
|
||||
import base64
|
||||
import datetime
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
from urllib.parse import urlparse
|
||||
import ssl
|
||||
import websocket
|
||||
import threading
|
||||
from toolbox import get_conf, get_pictures_list, encode_image
|
||||
from loguru import logger
|
||||
from urllib.parse import urlparse
|
||||
from datetime import datetime
|
||||
from time import mktime
|
||||
from urllib.parse import urlencode
|
||||
from wsgiref.handlers import format_date_time
|
||||
import websocket
|
||||
import threading, time
|
||||
|
||||
timeout_bot_msg = '[Local Message] Request timeout. Network error.'
|
||||
|
||||
@@ -104,7 +105,7 @@ class SparkRequestInstance():
|
||||
if llm_kwargs['most_recent_uploaded'].get('path'):
|
||||
file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path'])
|
||||
if len(file_manifest) > 0:
|
||||
print('正在使用讯飞图片理解API')
|
||||
logger.info('正在使用讯飞图片理解API')
|
||||
gpt_url = self.gpt_url_img
|
||||
wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url)
|
||||
websocket.enableTrace(False)
|
||||
@@ -123,7 +124,7 @@ class SparkRequestInstance():
|
||||
data = json.loads(message)
|
||||
code = data['header']['code']
|
||||
if code != 0:
|
||||
print(f'请求错误: {code}, {data}')
|
||||
logger.error(f'请求错误: {code}, {data}')
|
||||
self.result_buf += str(data)
|
||||
ws.close()
|
||||
self.time_to_exit_event.set()
|
||||
@@ -140,7 +141,7 @@ class SparkRequestInstance():
|
||||
|
||||
# 收到websocket错误的处理
|
||||
def on_error(ws, error):
|
||||
print("error:", error)
|
||||
logger.error("error:", error)
|
||||
self.time_to_exit_event.set()
|
||||
|
||||
# 收到websocket关闭的处理
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# @Descr : 兼容最新的智谱Ai
|
||||
from toolbox import get_conf
|
||||
from toolbox import get_conf, encode_image, get_pictures_list
|
||||
import logging, os, requests
|
||||
import requests
|
||||
import json
|
||||
class TaichuChatInit:
|
||||
def __init__(self): ...
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
from toolbox import get_conf
|
||||
from zhipuai import ZhipuAI
|
||||
from toolbox import get_conf, encode_image, get_pictures_list
|
||||
import logging, os
|
||||
from loguru import logger
|
||||
import os
|
||||
|
||||
|
||||
def input_encode_handler(inputs:str, llm_kwargs:dict):
|
||||
@@ -24,7 +25,7 @@ class ZhipuChatInit:
|
||||
def __init__(self):
|
||||
ZHIPUAI_API_KEY, ZHIPUAI_MODEL = get_conf("ZHIPUAI_API_KEY", "ZHIPUAI_MODEL")
|
||||
if len(ZHIPUAI_MODEL) > 0:
|
||||
logging.error('ZHIPUAI_MODEL 配置项选项已经弃用,请在LLM_MODEL中配置')
|
||||
logger.error('ZHIPUAI_MODEL 配置项选项已经弃用,请在LLM_MODEL中配置')
|
||||
self.zhipu_bro = ZhipuAI(api_key=ZHIPUAI_API_KEY)
|
||||
self.model = ''
|
||||
|
||||
@@ -37,8 +38,7 @@ class ZhipuChatInit:
|
||||
what_i_have_asked['content'].append({"type": 'text', "text": user_input})
|
||||
if encode_img:
|
||||
if len(encode_img) > 1:
|
||||
logging.warning("glm-4v只支持一张图片,将只取第一张图片进行处理")
|
||||
print("glm-4v只支持一张图片,将只取第一张图片进行处理")
|
||||
logger.warning("glm-4v只支持一张图片,将只取第一张图片进行处理")
|
||||
img_d = {"type": "image_url",
|
||||
"image_url": {
|
||||
"url": encode_img[0]['data']
|
||||
|
||||
@@ -5,6 +5,7 @@ from toolbox import ChatBotWithCookies
|
||||
from multiprocessing import Process, Pipe
|
||||
from contextlib import redirect_stdout
|
||||
from request_llms.queued_pipe import create_queue_pipe
|
||||
from loguru import logger
|
||||
|
||||
class ThreadLock(object):
|
||||
def __init__(self):
|
||||
@@ -51,7 +52,7 @@ def reset_tqdm_output():
|
||||
getattr(sys.stdout, 'flush', lambda: None)()
|
||||
|
||||
def fp_write(s):
|
||||
print(s)
|
||||
logger.info(s)
|
||||
last_len = [0]
|
||||
|
||||
def print_status(s):
|
||||
@@ -199,7 +200,7 @@ class LocalLLMHandle(Process):
|
||||
if res.startswith(self.std_tag):
|
||||
new_output = res[len(self.std_tag):]
|
||||
std_out = std_out[:std_out_clip_len]
|
||||
print(new_output, end='')
|
||||
logger.info(new_output, end='')
|
||||
std_out = new_output + std_out
|
||||
yield self.std_tag + '\n```\n' + std_out + '\n```\n'
|
||||
elif res == '[Finish]':
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import traceback
|
||||
import requests
|
||||
from loguru import logger
|
||||
|
||||
# config_private.py放自己的秘密如API和代理网址
|
||||
# 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
|
||||
@@ -106,10 +106,7 @@ def generate_message(input, model, key, history, max_output_token, system_prompt
|
||||
"stream": True,
|
||||
"max_tokens": max_output_token,
|
||||
}
|
||||
try:
|
||||
print(f" {model} : {conversation_cnt} : {input[:100]} ..........")
|
||||
except:
|
||||
print("输入中可能存在乱码。")
|
||||
|
||||
return headers, playload
|
||||
|
||||
|
||||
@@ -196,7 +193,7 @@ def get_predict_function(
|
||||
if retry > MAX_RETRY:
|
||||
raise TimeoutError
|
||||
if MAX_RETRY != 0:
|
||||
print(f"请求超时,正在重试 ({retry}/{MAX_RETRY}) ……")
|
||||
logger.error(f"请求超时,正在重试 ({retry}/{MAX_RETRY}) ……")
|
||||
|
||||
stream_response = response.iter_lines()
|
||||
result = ""
|
||||
@@ -219,18 +216,17 @@ def get_predict_function(
|
||||
):
|
||||
chunk = get_full_error(chunk, stream_response)
|
||||
chunk_decoded = chunk.decode()
|
||||
print(chunk_decoded)
|
||||
logger.error(chunk_decoded)
|
||||
raise RuntimeError(
|
||||
f"API异常,请检测终端输出。可能的原因是:{finish_reason}"
|
||||
)
|
||||
if chunk:
|
||||
try:
|
||||
if finish_reason == "stop":
|
||||
logging.info(f"[response] {result}")
|
||||
if not console_slience:
|
||||
print(f"[response] {result}")
|
||||
break
|
||||
result += response_text
|
||||
if not console_slience:
|
||||
print(response_text, end="")
|
||||
if observe_window is not None:
|
||||
# 观测窗,把已经获取的数据显示出去
|
||||
if len(observe_window) >= 1:
|
||||
@@ -243,7 +239,7 @@ def get_predict_function(
|
||||
chunk = get_full_error(chunk, stream_response)
|
||||
chunk_decoded = chunk.decode()
|
||||
error_msg = chunk_decoded
|
||||
print(error_msg)
|
||||
logger.error(error_msg)
|
||||
raise RuntimeError("Json解析不合常规")
|
||||
return result
|
||||
|
||||
@@ -276,7 +272,7 @@ def get_predict_function(
|
||||
inputs, history = handle_core_functionality(
|
||||
additional_fn, inputs, history, chatbot
|
||||
)
|
||||
logging.info(f"[raw_input] {inputs}")
|
||||
logger.info(f"[raw_input] {inputs}")
|
||||
chatbot.append((inputs, ""))
|
||||
yield from update_ui(
|
||||
chatbot=chatbot, history=history, msg="等待响应"
|
||||
@@ -376,11 +372,11 @@ def get_predict_function(
|
||||
history=history,
|
||||
msg="API异常:" + chunk_decoded,
|
||||
) # 刷新界面
|
||||
print(chunk_decoded)
|
||||
logger.error(chunk_decoded)
|
||||
return
|
||||
|
||||
if finish_reason == "stop":
|
||||
logging.info(f"[response] {gpt_replying_buffer}")
|
||||
logger.info(f"[response] {gpt_replying_buffer}")
|
||||
break
|
||||
status_text = f"finish_reason: {finish_reason}"
|
||||
gpt_replying_buffer += response_text
|
||||
@@ -403,7 +399,7 @@ def get_predict_function(
|
||||
yield from update_ui(
|
||||
chatbot=chatbot, history=history, msg="Json异常" + chunk_decoded
|
||||
) # 刷新界面
|
||||
print(chunk_decoded)
|
||||
logger.error(chunk_decoded)
|
||||
return
|
||||
|
||||
return predict_no_ui_long_connection, predict
|
||||
|
||||
@@ -2,14 +2,15 @@ https://public.agent-matrix.com/publish/gradio-3.32.10-py3-none-any.whl
|
||||
fastapi==0.110
|
||||
gradio-client==0.8
|
||||
pypdf2==2.12.1
|
||||
httpx<=0.25.2
|
||||
zhipuai==2.0.1
|
||||
tiktoken>=0.3.3
|
||||
requests[socks]
|
||||
pydantic==2.5.2
|
||||
llama-index==0.10
|
||||
pydantic==2.9.2
|
||||
protobuf==3.20
|
||||
transformers>=4.27.1,<4.42
|
||||
scipdf_parser>=0.52
|
||||
spacy==3.7.4
|
||||
anthropic>=0.18.1
|
||||
python-markdown-math
|
||||
pymdown-extensions
|
||||
@@ -28,6 +29,18 @@ edge-tts
|
||||
pymupdf
|
||||
openai
|
||||
rjsmin
|
||||
loguru
|
||||
arxiv
|
||||
numpy
|
||||
rich
|
||||
|
||||
|
||||
llama-index-core==0.10.68
|
||||
llama-index-legacy==0.9.48
|
||||
llama-index-readers-file==0.1.33
|
||||
llama-index-readers-llama-parse==0.1.6
|
||||
llama-index-embeddings-azure-openai==0.1.10
|
||||
llama-index-embeddings-openai==0.1.10
|
||||
llama-parse==0.4.9
|
||||
mdit-py-plugins>=0.3.3
|
||||
linkify-it-py==2.0.3
|
||||
@@ -2,6 +2,8 @@ import markdown
|
||||
import re
|
||||
import os
|
||||
import math
|
||||
|
||||
from loguru import logger
|
||||
from textwrap import dedent
|
||||
from functools import lru_cache
|
||||
from pymdownx.superfences import fence_code_format
|
||||
@@ -227,14 +229,14 @@ def fix_dollar_sticking_bug(txt):
|
||||
|
||||
if single_stack_height > 0:
|
||||
if txt[:(index+1)].find('\n') > 0 or txt[:(index+1)].find('<td>') > 0 or txt[:(index+1)].find('</td>') > 0:
|
||||
print('公式之中出现了异常 (Unexpect element in equation)')
|
||||
logger.error('公式之中出现了异常 (Unexpect element in equation)')
|
||||
single_stack_height = 0
|
||||
txt_result += ' $'
|
||||
continue
|
||||
|
||||
if double_stack_height > 0:
|
||||
if txt[:(index+1)].find('\n\n') > 0:
|
||||
print('公式之中出现了异常 (Unexpect element in equation)')
|
||||
logger.error('公式之中出现了异常 (Unexpect element in equation)')
|
||||
double_stack_height = 0
|
||||
txt_result += '$$'
|
||||
continue
|
||||
@@ -253,13 +255,13 @@ def fix_dollar_sticking_bug(txt):
|
||||
txt = txt[(index+2):]
|
||||
else:
|
||||
if double_stack_height != 0:
|
||||
# print(txt[:(index)])
|
||||
print('发现异常嵌套公式')
|
||||
# logger.info(txt[:(index)])
|
||||
logger.info('发现异常嵌套公式')
|
||||
if single_stack_height == 0:
|
||||
single_stack_height = 1
|
||||
else:
|
||||
single_stack_height = 0
|
||||
# print(txt[:(index)])
|
||||
# logger.info(txt[:(index)])
|
||||
txt_result += txt[:(index+1)]
|
||||
txt = txt[(index+1):]
|
||||
break
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import platform
|
||||
from sys import stdout
|
||||
from loguru import logger
|
||||
|
||||
if platform.system()=="Linux":
|
||||
pass
|
||||
@@ -59,3 +60,29 @@ def sprint亮紫(*kw):
|
||||
return "\033[1;35m"+' '.join(kw)+"\033[0m"
|
||||
def sprint亮靛(*kw):
|
||||
return "\033[1;36m"+' '.join(kw)+"\033[0m"
|
||||
|
||||
def log红(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint红(*kw))
|
||||
def log绿(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint绿(*kw))
|
||||
def log黄(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint黄(*kw))
|
||||
def log蓝(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint蓝(*kw))
|
||||
def log紫(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint紫(*kw))
|
||||
def log靛(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint靛(*kw))
|
||||
|
||||
def log亮红(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint亮红(*kw))
|
||||
def log亮绿(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint亮绿(*kw))
|
||||
def log亮黄(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint亮黄(*kw))
|
||||
def log亮蓝(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint亮蓝(*kw))
|
||||
def log亮紫(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint亮紫(*kw))
|
||||
def log亮靛(*kw,**kargs):
|
||||
logger.opt(depth=1).info(sprint亮靛(*kw))
|
||||
@@ -2,7 +2,7 @@ import importlib
|
||||
import time
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from shared_utils.colorful import print亮红, print亮绿, print亮蓝
|
||||
from shared_utils.colorful import log亮红, log亮绿, log亮蓝
|
||||
|
||||
pj = os.path.join
|
||||
default_user_name = 'default_user'
|
||||
@@ -30,13 +30,13 @@ def read_env_variable(arg, default_value):
|
||||
env_arg = os.environ[arg]
|
||||
else:
|
||||
raise KeyError
|
||||
print(f"[ENV_VAR] 尝试加载{arg},默认值:{default_value} --> 修正值:{env_arg}")
|
||||
log亮绿(f"[ENV_VAR] 尝试加载{arg},默认值:{default_value} --> 修正值:{env_arg}")
|
||||
try:
|
||||
if isinstance(default_value, bool):
|
||||
env_arg = env_arg.strip()
|
||||
if env_arg == 'True': r = True
|
||||
elif env_arg == 'False': r = False
|
||||
else: print('Enter True or False, but have:', env_arg); r = default_value
|
||||
else: log亮红('Expect `True` or `False`, but have:', env_arg); r = default_value
|
||||
elif isinstance(default_value, int):
|
||||
r = int(env_arg)
|
||||
elif isinstance(default_value, float):
|
||||
@@ -51,13 +51,13 @@ def read_env_variable(arg, default_value):
|
||||
assert arg == "proxies"
|
||||
r = eval(env_arg)
|
||||
else:
|
||||
print亮红(f"[ENV_VAR] 环境变量{arg}不支持通过环境变量设置! ")
|
||||
log亮红(f"[ENV_VAR] 环境变量{arg}不支持通过环境变量设置! ")
|
||||
raise KeyError
|
||||
except:
|
||||
print亮红(f"[ENV_VAR] 环境变量{arg}加载失败! ")
|
||||
log亮红(f"[ENV_VAR] 环境变量{arg}加载失败! ")
|
||||
raise KeyError(f"[ENV_VAR] 环境变量{arg}加载失败! ")
|
||||
|
||||
print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
|
||||
log亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
|
||||
return r
|
||||
|
||||
|
||||
@@ -80,21 +80,21 @@ def read_single_conf_with_lru_cache(arg):
|
||||
if arg == 'API_URL_REDIRECT':
|
||||
oai_rd = r.get("https://api.openai.com/v1/chat/completions", None) # API_URL_REDIRECT填写格式是错误的,请阅读`https://github.com/binary-husky/gpt_academic/wiki/项目配置说明`
|
||||
if oai_rd and not oai_rd.endswith('/completions'):
|
||||
print亮红("\n\n[API_URL_REDIRECT] API_URL_REDIRECT填错了。请阅读`https://github.com/binary-husky/gpt_academic/wiki/项目配置说明`。如果您确信自己没填错,无视此消息即可。")
|
||||
log亮红("\n\n[API_URL_REDIRECT] API_URL_REDIRECT填错了。请阅读`https://github.com/binary-husky/gpt_academic/wiki/项目配置说明`。如果您确信自己没填错,无视此消息即可。")
|
||||
time.sleep(5)
|
||||
if arg == 'API_KEY':
|
||||
print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key,如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
|
||||
print亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s),也可以在问题输入区输入临时的api-key(s),然后回车键提交后即可生效。")
|
||||
log亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key,如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
|
||||
log亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s),也可以在问题输入区输入临时的api-key(s),然后回车键提交后即可生效。")
|
||||
if is_any_api_key(r):
|
||||
print亮绿(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
|
||||
log亮绿(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
|
||||
else:
|
||||
print亮红(f"[API_KEY] 您的 API_KEY({r[:15]}***)不满足任何一种已知的密钥格式,请在config文件中修改API密钥之后再运行(详见`https://github.com/binary-husky/gpt_academic/wiki/api_key`)。")
|
||||
log亮红(f"[API_KEY] 您的 API_KEY({r[:15]}***)不满足任何一种已知的密钥格式,请在config文件中修改API密钥之后再运行(详见`https://github.com/binary-husky/gpt_academic/wiki/api_key`)。")
|
||||
if arg == 'proxies':
|
||||
if not read_single_conf_with_lru_cache('USE_PROXY'): r = None # 检查USE_PROXY,防止proxies单独起作用
|
||||
if r is None:
|
||||
print亮红('[PROXY] 网络代理状态:未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议:检查USE_PROXY选项是否修改。')
|
||||
log亮红('[PROXY] 网络代理状态:未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议:检查USE_PROXY选项是否修改。')
|
||||
else:
|
||||
print亮绿('[PROXY] 网络代理状态:已配置。配置信息如下:', r)
|
||||
log亮绿('[PROXY] 网络代理状态:已配置。配置信息如下:', str(r))
|
||||
assert isinstance(r, dict), 'proxies格式错误,请注意proxies选项的格式,不要遗漏括号。'
|
||||
return r
|
||||
|
||||
|
||||
@@ -90,23 +90,6 @@ def make_history_cache():
|
||||
|
||||
|
||||
|
||||
# """
|
||||
# with gr.Row():
|
||||
# txt = gr.Textbox(show_label=False, placeholder="Input question here.", elem_id='user_input_main').style(container=False)
|
||||
# txtx = gr.Textbox(show_label=False, placeholder="Input question here.", elem_id='user_input_main').style(container=False)
|
||||
# with gr.Row():
|
||||
# btn_value = "Test"
|
||||
# elem_id = "TestCase"
|
||||
# variant = "primary"
|
||||
# input_list = [txt, txtx]
|
||||
# output_list = [txt, txtx]
|
||||
# input_name_list = ["txt(input)", "txtx(input)"]
|
||||
# output_name_list = ["txt", "txtx"]
|
||||
# js_callback = """(txt, txtx)=>{console.log(txt); console.log(txtx);}"""
|
||||
# def function(txt, txtx):
|
||||
# return "booo", "goooo"
|
||||
# create_button_with_javascript_callback(btn_value, elem_id, variant, js_callback, input_list, output_list, function, input_name_list, output_name_list)
|
||||
# """
|
||||
def create_button_with_javascript_callback(btn_value, elem_id, variant, js_callback, input_list, output_list, function, input_name_list, output_name_list):
|
||||
import gradio as gr
|
||||
middle_ware_component = gr.Textbox(visible=False, elem_id=elem_id+'_buffer')
|
||||
|
||||
@@ -138,7 +138,9 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
|
||||
app_block.is_sagemaker = False
|
||||
|
||||
gradio_app = App.create_app(app_block)
|
||||
|
||||
for route in list(gradio_app.router.routes):
|
||||
if route.path == "/proxy={url_path:path}":
|
||||
gradio_app.router.routes.remove(route)
|
||||
# --- --- replace gradio endpoint to forbid access to sensitive files --- ---
|
||||
if len(AUTHENTICATION) > 0:
|
||||
dependencies = []
|
||||
@@ -154,9 +156,13 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
|
||||
@gradio_app.head("/file={path_or_url:path}", dependencies=dependencies)
|
||||
@gradio_app.get("/file={path_or_url:path}", dependencies=dependencies)
|
||||
async def file(path_or_url: str, request: fastapi.Request):
|
||||
if len(AUTHENTICATION) > 0:
|
||||
if not _authorize_user(path_or_url, request, gradio_app):
|
||||
return "越权访问!"
|
||||
if not _authorize_user(path_or_url, request, gradio_app):
|
||||
return "越权访问!"
|
||||
stripped = path_or_url.lstrip().lower()
|
||||
if stripped.startswith("https://") or stripped.startswith("http://"):
|
||||
return "账户密码授权模式下, 禁止链接!"
|
||||
if '../' in stripped:
|
||||
return "非法路径!"
|
||||
return await endpoint(path_or_url, request)
|
||||
|
||||
from fastapi import Request, status
|
||||
@@ -167,6 +173,26 @@ def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SS
|
||||
response.delete_cookie('access-token')
|
||||
response.delete_cookie('access-token-unsecure')
|
||||
return response
|
||||
else:
|
||||
dependencies = []
|
||||
endpoint = None
|
||||
for route in list(gradio_app.router.routes):
|
||||
if route.path == "/file/{path:path}":
|
||||
gradio_app.router.routes.remove(route)
|
||||
if route.path == "/file={path_or_url:path}":
|
||||
dependencies = route.dependencies
|
||||
endpoint = route.endpoint
|
||||
gradio_app.router.routes.remove(route)
|
||||
@gradio_app.get("/file/{path:path}", dependencies=dependencies)
|
||||
@gradio_app.head("/file={path_or_url:path}", dependencies=dependencies)
|
||||
@gradio_app.get("/file={path_or_url:path}", dependencies=dependencies)
|
||||
async def file(path_or_url: str, request: fastapi.Request):
|
||||
stripped = path_or_url.lstrip().lower()
|
||||
if stripped.startswith("https://") or stripped.startswith("http://"):
|
||||
return "账户密码授权模式下, 禁止链接!"
|
||||
if '../' in stripped:
|
||||
return "非法路径!"
|
||||
return await endpoint(path_or_url, request)
|
||||
|
||||
# --- --- enable TTS (text-to-speech) functionality --- ---
|
||||
TTS_TYPE = get_conf("TTS_TYPE")
|
||||
|
||||
某些文件未显示,因为此 diff 中更改的文件太多 显示更多
在新工单中引用
屏蔽一个用户