From 9f9848c6e95ef9cdce904de2d907198b164fa97b Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 29 Jun 2023 12:54:19 +0800 Subject: [PATCH] again --- .github/workflows/build-with-latex.yml | 44 ++ Dockerfile | 10 +- README.md | 132 ++-- app.py | 6 +- colorful.py | 80 +-- config.py | 8 + crazy_functional.py | 250 +++++--- crazy_functions/Langchain知识库.py | 107 ++++ crazy_functions/Latex全文润色.py | 3 + crazy_functions/Latex输出PDF结果.py | 300 +++++++++ crazy_functions/crazy_functions_test.py | 106 +++- crazy_functions/crazy_utils.py | 140 +++++ crazy_functions/latex_utils.py | 773 ++++++++++++++++++++++++ crazy_functions/对话历史存档.py | 7 +- crazy_functions/数学动画生成manim.py | 2 +- crazy_functions/理解PDF文档内容.py | 4 +- crazy_functions/联网的ChatGPT_bing版.py | 102 ++++ crazy_functions/虚空终端.py | 131 ++++ docker-compose.yml | 27 + docs/Dockerfile+NoLocal+Latex | 27 + docs/GithubAction+NoLocal+Latex | 25 + docs/README.md.Italian.md | 20 +- docs/README.md.Korean.md | 6 +- docs/README.md.Portuguese.md | 12 +- docs/translate_english.json | 2 + docs/use_azure.md | 152 +++++ request_llm/bridge_all.py | 40 ++ request_llm/bridge_azure_test.py | 241 ++++++++ toolbox.py | 91 ++- version | 4 +- 30 files changed, 2614 insertions(+), 238 deletions(-) create mode 100644 .github/workflows/build-with-latex.yml create mode 100644 crazy_functions/Langchain知识库.py create mode 100644 crazy_functions/Latex输出PDF结果.py create mode 100644 crazy_functions/latex_utils.py create mode 100644 crazy_functions/联网的ChatGPT_bing版.py create mode 100644 crazy_functions/虚空终端.py create mode 100644 docs/Dockerfile+NoLocal+Latex create mode 100644 docs/GithubAction+NoLocal+Latex create mode 100644 docs/use_azure.md create mode 100644 request_llm/bridge_azure_test.py diff --git a/.github/workflows/build-with-latex.yml b/.github/workflows/build-with-latex.yml new file mode 100644 index 00000000..fb16d2c1 --- /dev/null +++ b/.github/workflows/build-with-latex.yml @@ -0,0 +1,44 @@ +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages +name: Create and publish a Docker image for Latex support + +on: + push: + branches: + - 'master' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}_with_latex + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + push: true + file: docs/GithubAction+NoLocal+Latex + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfile b/Dockerfile index 19d988f6..97ad13d9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,12 +10,16 @@ RUN echo '[global]' > /etc/pip.conf && \ WORKDIR /gpt -# 装载项目文件 -COPY . . + + # 安装依赖 +COPY requirements.txt ./ +COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl +RUN pip3 install -r requirements.txt +# 装载项目文件 +COPY . . RUN pip3 install -r requirements.txt - # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' diff --git a/README.md b/README.md index f487f69d..99397738 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ pinned: false # ChatGPT 学术优化 > **Note** > -> 5月27日对gradio依赖进行了较大的修复和调整,fork并解决了官方Gradio的一系列bug。但如果27日当天进行了更新,可能会导致代码报错(依赖缺失,卡在loading界面等),请及时更新到**最新版代码**并重新安装pip依赖即可。若给您带来困扰还请谅解。安装依赖时,请严格选择requirements.txt中**指定的版本**: +> 2023.5.27 对Gradio依赖进行了调整,Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时,请严格选择`requirements.txt`中**指定的版本**: > -> `pip install -r requirements.txt -i https://pypi.org/simple` +> `pip install -r requirements.txt` > # GPT 学术优化 (GPT Academic) @@ -28,7 +28,7 @@ To translate this project to arbitary language with GPT, read and run [`multi_la > > 1.请注意只有**红颜色**标识的函数插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR! > -> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。 +> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/gpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。 > > 3.本项目兼容并鼓励尝试国产大语言模型chatglm和RWKV, 盘古等等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,api2d-key3"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 @@ -43,22 +43,23 @@ To translate this project to arbitary language with GPT, read and run [`multi_la 一键中英互译 | 一键中英互译 一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释 [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键 -模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/chatgpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) -[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码 +模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) +[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码 [程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键可以剖析其他Python/C/C++/Java/Lua/...项目树 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [函数插件] 一键解读latex/pdf论文全文并生成摘要 Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [函数插件] 一键翻译或润色latex论文 批量注释生成 | [函数插件] 一键批量生成函数注释 -Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/chatgpt_academic/blob/master/docs/README_EN.md)了吗? +Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗? chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程) [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/) 互联网信息聚合+GPT | [函数插件] 一键[让GPT先从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck),再回答问题,让信息永不过时 +⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具⭐ 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 -启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 -[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持,[API2D](https://api2d.com/)接口支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? +启动暗色gradio[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 +[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama),[RWKV](https://github.com/BlinkDL/ChatRWKV)和[盘古α](https://openi.org.cn/pangu/) 更多新功能展示(图像生成等) …… | 见本文档结尾处 …… @@ -102,13 +103,13 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 1. 下载项目 ```sh -git clone https://github.com/binary-husky/chatgpt_academic.git -cd chatgpt_academic +git clone https://github.com/binary-husky/gpt_academic.git +cd gpt_academic ``` 2. 配置API_KEY -在`config.py`中,配置API KEY等设置,[特殊网络环境设置](https://github.com/binary-husky/gpt_academic/issues/1) 。 +在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。 (P.S. 程序运行时会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。因此,如果您能理解我们的配置读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中。`config_private.py`不受git管控,可以让您的隐私信息更加安全。P.S.项目同样支持通过`环境变量`配置大多数选项,环境变量的书写格式参考`docker-compose`文件。读取优先级: `环境变量` > `config_private.py` > `config.py`) @@ -124,6 +125,7 @@ conda activate gptac_venv # 激活anaconda环境 python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤 ``` +
如果需要支持清华ChatGLM/复旦MOSS作为后端,请点击展开此处

@@ -150,19 +152,13 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt- python main.py ``` -5. 测试函数插件 -``` -- 测试函数插件模板函数(要求gpt回答历史上的今天发生了什么),您可以根据此函数为模板,实现更复杂的功能 - 点击 "[函数插件模板Demo] 历史上的今天" -``` - ## 安装-方法2:使用Docker -1. 仅ChatGPT(推荐大多数人选择) +1. 仅ChatGPT(推荐大多数人选择,等价于docker-compose方案1) ``` sh -git clone https://github.com/binary-husky/chatgpt_academic.git # 下载项目 -cd chatgpt_academic # 进入路径 +git clone https://github.com/binary-husky/gpt_academic.git # 下载项目 +cd gpt_academic # 进入路径 nano config.py # 用任意文本编辑器编辑config.py, 配置 “Proxy”, “API_KEY” 以及 “WEB_PORT” (例如50923) 等 docker build -t gpt-academic . # 安装 @@ -171,37 +167,45 @@ docker run --rm -it --net=host gpt-academic #(最后一步-选择2)在macOS/windows环境下,只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口 docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic ``` +P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。 2. ChatGPT + ChatGLM + MOSS(需要熟悉Docker) ``` sh -# 修改docker-compose.yml,删除方案1和方案3,保留方案2。修改docker-compose.yml中方案2的配置,参考其中注释即可 +# 修改docker-compose.yml,保留方案2并删除其他方案。修改docker-compose.yml中方案2的配置,参考其中注释即可 docker-compose up ``` 3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉Docker) ``` sh -# 修改docker-compose.yml,删除方案1和方案2,保留方案3。修改docker-compose.yml中方案3的配置,参考其中注释即可 +# 修改docker-compose.yml,保留方案3并删除其他方案。修改docker-compose.yml中方案3的配置,参考其中注释即可 docker-compose up ``` ## 安装-方法3:其他部署姿势 +1. 一键运行脚本。 +完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。 +脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。 -1. 如何使用反代URL/微软云AzureAPI +2. 使用docker-compose运行。 +请阅读docker-compose.yml后,按照其中的提示操作即可 + +3. 如何使用反代URL 按照`config.py`中的说明配置API_URL_REDIRECT即可。 -2. 远程云服务器部署(需要云服务器知识与经验) -请访问[部署wiki-1](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) +4. 微软云AzureAPI +按照`config.py`中的说明配置即可(AZURE_ENDPOINT等四个配置) -3. 使用WSL2(Windows Subsystem for Linux 子系统) -请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) +5. 远程云服务器部署(需要云服务器知识与经验)。 +请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) -4. 如何在二级网址(如`http://localhost/subpath`)下运行 +6. 使用WSL2(Windows Subsystem for Linux 子系统)。 +请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) + +7. 如何在二级网址(如`http://localhost/subpath`)下运行。 请访问[FastAPI运行说明](docs/WithFastapi.md) -5. 使用docker-compose运行 -请阅读docker-compose.yml后,按照其中的提示操作即可 --- # Advanced Usage ## 自定义新的便捷按钮 / 自定义函数插件 @@ -226,7 +230,7 @@ docker-compose up 编写强大的函数插件来执行任何你想得到的和想不到的任务。 本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。 -详情请参考[函数插件指南](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 +详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 --- # Latest Update @@ -234,38 +238,33 @@ docker-compose up 1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件, 另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。 -Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存,点击 `删除所有本地对话历史记录` 可以删除所有html存档缓存。 +Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存。

- - -2. 生成报告。大部分插件都会在执行结束后,生成工作报告 +2. ⭐Latex/Arxiv论文翻译功能⭐
- - - + ===> +
-3. 模块化功能设计,简单的接口却能支持强大的功能 +3. 生成报告。大部分插件都会在执行结束后,生成工作报告 +
+ + +
+ +4. 模块化功能设计,简单的接口却能支持强大的功能
-4. 这是一个能够“自我译解”的开源项目 +5. 译解其他开源项目
- -
- -5. 译解其他开源项目,不在话下 -
- -
- -
- + +
6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`) @@ -290,13 +289,15 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex全文校对纠错
- + ===> +
+ ## 版本: - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级) -- version 3.4(Todo): 完善chatglm本地大模型的多线支持 +- version 3.4: +arxiv论文翻译、latex论文批改功能 - version 3.3: +互联网信息综合功能 - version 3.2: 函数插件支持更多参数接口 (保存对话功能, 解读任意语言代码+同时询问任意的LLM组合) - version 3.1: 支持同时问询多个gpt模型!支持api2d,支持多个apikey负载均衡 @@ -314,29 +315,32 @@ gpt_academic开发者QQ群-2:610599535 - 已知问题 - 某些浏览器翻译插件干扰此软件前端的运行 - - 官方Gradio目前有很多兼容性Bug,请务必使用requirement.txt安装Gradio + - 官方Gradio目前有很多兼容性Bug,请务必使用`requirement.txt`安装Gradio ## 参考与学习 ``` -代码中参考了很多其他优秀项目中的设计,主要包括: +代码中参考了很多其他优秀项目中的设计,顺序不分先后: -# 项目1:清华ChatGLM-6B: +# 清华ChatGLM-6B: https://github.com/THUDM/ChatGLM-6B -# 项目2:清华JittorLLMs: +# 清华JittorLLMs: https://github.com/Jittor/JittorLLMs -# 项目3:Edge-GPT: -https://github.com/acheong08/EdgeGPT - -# 项目4:ChuanhuChatGPT: -https://github.com/GaiZhenbiao/ChuanhuChatGPT - -# 项目5:ChatPaper: +# ChatPaper: https://github.com/kaixindelele/ChatPaper -# 更多: +# Edge-GPT: +https://github.com/acheong08/EdgeGPT + +# ChuanhuChatGPT: +https://github.com/GaiZhenbiao/ChuanhuChatGPT + +# Oobabooga one-click installer: +https://github.com/oobabooga/one-click-installers + +# More: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo ``` diff --git a/app.py b/app.py index 909f9e66..b4cd22bf 100644 --- a/app.py +++ b/app.py @@ -2,7 +2,7 @@ import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染 def main(): import subprocess, sys - subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt']) + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork']) import gradio as gr if gr.__version__ not in ['3.28.3','3.32.3']: assert False, "请用 pip install -r requirements.txt 安装依赖" from request_llm.bridge_all import predict @@ -158,7 +158,7 @@ def main(): for k in crazy_fns: if not crazy_fns[k].get("AsButton", True): continue click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo) - click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) + click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot]) cancel_handles.append(click_handle) # 函数插件-下拉菜单与随变按钮的互动 def on_dropdown_changed(k): @@ -178,7 +178,7 @@ def main(): if k in [r"打开插件列表", r"请先从插件列表中选择"]: return yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs) click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo) - click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) + click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot]) cancel_handles.append(click_handle) # 终止按钮的回调函数注册 stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) diff --git a/colorful.py b/colorful.py index d90972bb..9749861f 100644 --- a/colorful.py +++ b/colorful.py @@ -34,58 +34,28 @@ def print亮紫(*kw,**kargs): def print亮靛(*kw,**kargs): print("\033[1;36m",*kw,"\033[0m",**kargs) - - -def print亮红(*kw,**kargs): - print("\033[1;31m",*kw,"\033[0m",**kargs) -def print亮绿(*kw,**kargs): - print("\033[1;32m",*kw,"\033[0m",**kargs) -def print亮黄(*kw,**kargs): - print("\033[1;33m",*kw,"\033[0m",**kargs) -def print亮蓝(*kw,**kargs): - print("\033[1;34m",*kw,"\033[0m",**kargs) -def print亮紫(*kw,**kargs): - print("\033[1;35m",*kw,"\033[0m",**kargs) -def print亮靛(*kw,**kargs): - print("\033[1;36m",*kw,"\033[0m",**kargs) - -print_red = print红 -print_green = print绿 -print_yellow = print黄 -print_blue = print蓝 -print_purple = print紫 -print_indigo = print靛 - -print_bold_red = print亮红 -print_bold_green = print亮绿 -print_bold_yellow = print亮黄 -print_bold_blue = print亮蓝 -print_bold_purple = print亮紫 -print_bold_indigo = print亮靛 - -if not stdout.isatty(): - # redirection, avoid a fucked up log file - print红 = print - print绿 = print - print黄 = print - print蓝 = print - print紫 = print - print靛 = print - print亮红 = print - print亮绿 = print - print亮黄 = print - print亮蓝 = print - print亮紫 = print - print亮靛 = print - print_red = print - print_green = print - print_yellow = print - print_blue = print - print_purple = print - print_indigo = print - print_bold_red = print - print_bold_green = print - print_bold_yellow = print - print_bold_blue = print - print_bold_purple = print - print_bold_indigo = print \ No newline at end of file +# Do you like the elegance of Chinese characters? +def sprint红(*kw): + return "\033[0;31m"+' '.join(kw)+"\033[0m" +def sprint绿(*kw): + return "\033[0;32m"+' '.join(kw)+"\033[0m" +def sprint黄(*kw): + return "\033[0;33m"+' '.join(kw)+"\033[0m" +def sprint蓝(*kw): + return "\033[0;34m"+' '.join(kw)+"\033[0m" +def sprint紫(*kw): + return "\033[0;35m"+' '.join(kw)+"\033[0m" +def sprint靛(*kw): + return "\033[0;36m"+' '.join(kw)+"\033[0m" +def sprint亮红(*kw): + return "\033[1;31m"+' '.join(kw)+"\033[0m" +def sprint亮绿(*kw): + return "\033[1;32m"+' '.join(kw)+"\033[0m" +def sprint亮黄(*kw): + return "\033[1;33m"+' '.join(kw)+"\033[0m" +def sprint亮蓝(*kw): + return "\033[1;34m"+' '.join(kw)+"\033[0m" +def sprint亮紫(*kw): + return "\033[1;35m"+' '.join(kw)+"\033[0m" +def sprint亮靛(*kw): + return "\033[1;36m"+' '.join(kw)+"\033[0m" diff --git a/config.py b/config.py index c3f0192e..350a99b7 100644 --- a/config.py +++ b/config.py @@ -1,6 +1,7 @@ # [step 1]>> 例如: API_KEY = "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" (此key无效) API_KEY = "sk-此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2" + # [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改 USE_PROXY = False if USE_PROXY: @@ -80,3 +81,10 @@ your bing cookies here # 如果需要使用Slack Claude,使用教程详情见 request_llm/README.md SLACK_CLAUDE_BOT_ID = '' SLACK_CLAUDE_USER_TOKEN = '' + + +# 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md +AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/" +AZURE_API_KEY = "填入azure openai api的密钥" +AZURE_API_VERSION = "填入api版本" +AZURE_ENGINE = "填入ENGINE" diff --git a/crazy_functional.py b/crazy_functional.py index 91c85cf0..03aaaf55 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -112,11 +112,11 @@ def get_crazy_functions(): "AsButton": False, # 加入下拉菜单中 "Function": HotReload(解析项目本身) }, - "[老旧的Demo] 把本项目源代码切换成全英文": { - # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 - "AsButton": False, # 加入下拉菜单中 - "Function": HotReload(全项目切换英文) - }, + # "[老旧的Demo] 把本项目源代码切换成全英文": { + # # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 + # "AsButton": False, # 加入下拉菜单中 + # "Function": HotReload(全项目切换英文) + # }, "[插件demo] 历史上的今天": { # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 "Function": HotReload(高阶功能模板函数) @@ -126,7 +126,7 @@ def get_crazy_functions(): ###################### 第二组插件 ########################### # [第二组插件]: 经过充分测试 from crazy_functions.批量总结PDF文档 import 批量总结PDF文档 - from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer + # from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档 from crazy_functions.谷歌检索小助手 import 谷歌检索小助手 from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入 @@ -152,17 +152,16 @@ def get_crazy_functions(): # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 "Function": HotReload(批量总结PDF文档) }, - "[测试功能] 批量总结PDF文档pdfminer": { - "Color": "stop", - "AsButton": False, # 加入下拉菜单中 - "Function": HotReload(批量总结PDF文档pdfminer) - }, + # "[测试功能] 批量总结PDF文档pdfminer": { + # "Color": "stop", + # "AsButton": False, # 加入下拉菜单中 + # "Function": HotReload(批量总结PDF文档pdfminer) + # }, "谷歌学术检索助手(输入谷歌学术搜索页url)": { "Color": "stop", "AsButton": False, # 加入下拉菜单中 "Function": HotReload(谷歌检索小助手) }, - "理解PDF文档内容 (模仿ChatPDF)": { # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 "Color": "stop", @@ -181,7 +180,7 @@ def get_crazy_functions(): "AsButton": False, # 加入下拉菜单中 "Function": HotReload(Latex英文纠错) }, - "[测试功能] 中文Latex项目全文润色(输入路径或上传压缩包)": { + "中文Latex项目全文润色(输入路径或上传压缩包)": { # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 "Color": "stop", "AsButton": False, # 加入下拉菜单中 @@ -210,65 +209,96 @@ def get_crazy_functions(): }) ###################### 第三组插件 ########################### - # [第三组插件]: 尚未充分测试的函数插件,放在这里 - from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要 - function_plugins.update({ - "一键下载arxiv论文并翻译摘要(先在input输入编号,如1812.10695)": { - "Color": "stop", - "AsButton": False, # 加入下拉菜单中 - "Function": HotReload(下载arxiv论文并翻译摘要) - } - }) + # [第三组插件]: 尚未充分测试的函数插件 - from crazy_functions.联网的ChatGPT import 连接网络回答问题 - function_plugins.update({ - "连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌)": { - "Color": "stop", - "AsButton": False, # 加入下拉菜单中 - "Function": HotReload(连接网络回答问题) - } - }) + try: + from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要 + function_plugins.update({ + "一键下载arxiv论文并翻译摘要(先在input输入编号,如1812.10695)": { + "Color": "stop", + "AsButton": False, # 加入下拉菜单中 + "Function": HotReload(下载arxiv论文并翻译摘要) + } + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.联网的ChatGPT import 连接网络回答问题 + function_plugins.update({ + "连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": { + "Color": "stop", + "AsButton": False, # 加入下拉菜单中 + "Function": HotReload(连接网络回答问题) + } + }) + from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题 + function_plugins.update({ + "连接网络回答问题(中文Bing版,输入问题后点击该插件)": { + "Color": "stop", + "AsButton": False, # 加入下拉菜单中 + "Function": HotReload(连接bing搜索回答问题) + } + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.解析项目源代码 import 解析任意code项目 + function_plugins.update({ + "解析项目源代码(手动指定和筛选源代码文件类型)": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) + "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示 + "Function": HotReload(解析任意code项目) + }, + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.询问多个大语言模型 import 同时问询_指定模型 + function_plugins.update({ + "询问多个GPT模型(手动指定询问哪些模型)": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) + "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示 + "Function": HotReload(同时问询_指定模型) + }, + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.图片生成 import 图片生成 + function_plugins.update({ + "图片生成(先切换模型到openai或api2d)": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) + "ArgsReminder": "在这里输入分辨率, 如256x256(默认)", # 高级参数输入区的显示提示 + "Function": HotReload(图片生成) + }, + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.总结音视频 import 总结音视频 + function_plugins.update({ + "批量总结音视频(输入路径或上传压缩包)": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": "调用openai api 使用whisper-1模型, 目前支持的格式:mp4, m4a, wav, mpga, mpeg, mp3。此处可以输入解析提示,例如:解析为简体中文(默认)。", + "Function": HotReload(总结音视频) + } + }) + except: + print('Load function plugin failed') - from crazy_functions.解析项目源代码 import 解析任意code项目 - function_plugins.update({ - "解析项目源代码(手动指定和筛选源代码文件类型)": { - "Color": "stop", - "AsButton": False, - "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) - "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示 - "Function": HotReload(解析任意code项目) - }, - }) - from crazy_functions.询问多个大语言模型 import 同时问询_指定模型 - function_plugins.update({ - "询问多个GPT模型(手动指定询问哪些模型)": { - "Color": "stop", - "AsButton": False, - "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) - "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示 - "Function": HotReload(同时问询_指定模型) - }, - }) - from crazy_functions.图片生成 import 图片生成 - function_plugins.update({ - "图片生成(先切换模型到openai或api2d)": { - "Color": "stop", - "AsButton": False, - "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False) - "ArgsReminder": "在这里输入分辨率, 如256x256(默认)", # 高级参数输入区的显示提示 - "Function": HotReload(图片生成) - }, - }) - from crazy_functions.总结音视频 import 总结音视频 - function_plugins.update({ - "批量总结音视频(输入路径或上传压缩包)": { - "Color": "stop", - "AsButton": False, - "AdvancedArgs": True, - "ArgsReminder": "调用openai api 使用whisper-1模型, 目前支持的格式:mp4, m4a, wav, mpga, mpeg, mp3。此处可以输入解析提示,例如:解析为简体中文(默认)。", - "Function": HotReload(总结音视频) - } - }) try: from crazy_functions.数学动画生成manim import 动画生成 function_plugins.update({ @@ -295,5 +325,83 @@ def get_crazy_functions(): except: print('Load function plugin failed') - ###################### 第n组插件 ########################### + try: + from crazy_functions.Langchain知识库 import 知识库问答 + function_plugins.update({ + "[功能尚不稳定] 构建知识库(请先上传文件素材)": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": "待注入的知识库名称id, 默认为default", + "Function": HotReload(知识库问答) + } + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.Langchain知识库 import 读取知识库作答 + function_plugins.update({ + "[功能尚不稳定] 知识库问答": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": "待提取的知识库名称id, 默认为default, 您需要首先调用构建知识库", + "Function": HotReload(读取知识库作答) + } + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比 + function_plugins.update({ + "Latex英文纠错+高亮修正位置 [需Latex]": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。", + "Function": HotReload(Latex英文纠错加PDF对比) + } + }) + from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF + function_plugins.update({ + "Arixv翻译(输入arxivID)[需Latex]": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": + "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ + "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', + "Function": HotReload(Latex翻译中文并重新编译PDF) + } + }) + function_plugins.update({ + "本地论文翻译(上传Latex压缩包)[需Latex]": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": + "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ + "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', + "Function": HotReload(Latex翻译中文并重新编译PDF) + } + }) + except: + print('Load function plugin failed') + + # try: + # from crazy_functions.虚空终端 import 终端 + # function_plugins.update({ + # "超级终端": { + # "Color": "stop", + # "AsButton": False, + # # "AdvancedArgs": True, + # # "ArgsReminder": "", + # "Function": HotReload(终端) + # } + # }) + # except: + # print('Load function plugin failed') + return function_plugins diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/Langchain知识库.py new file mode 100644 index 00000000..31c459aa --- /dev/null +++ b/crazy_functions/Langchain知识库.py @@ -0,0 +1,107 @@ +from toolbox import CatchException, update_ui, ProxyNetworkActivate +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything + + + +@CatchException +def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行 + plugin_kwargs 插件模型的参数,暂时没有用武之地 + chatbot 聊天显示框的句柄,用于显示给用户 + history 聊天历史,前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + history = [] # 清空历史,以免输入溢出 + chatbot.append(("这是什么功能?", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # resolve deps + try: + from zh_langchain import construct_vector_store + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + from .crazy_utils import knowledge_archive_interface + except Exception as e: + chatbot.append( + ["依赖不足", + "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."] + ) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + from .crazy_utils import try_install_deps + try_install_deps(['zh_langchain==0.2.1']) + + # < --------------------读取参数--------------- > + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + kai_id = plugin_kwargs.get("advanced_arg", 'default') + + # < --------------------读取文件--------------- > + file_manifest = [] + spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"] + for sp in spl: + _, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}') + file_manifest += file_manifest_tmp + + if len(file_manifest) == 0: + chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # < -------------------预热文本向量化模组--------------- > + chatbot.append(['
'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + print('Checking Text2vec ...') + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + with ProxyNetworkActivate(): # 临时地激活代理网络 + HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") + + # < -------------------构建知识库--------------- > + chatbot.append(['
'.join(file_manifest), "正在构建知识库..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + print('Establishing knowledge archive ...') + with ProxyNetworkActivate(): # 临时地激活代理网络 + kai = knowledge_archive_interface() + kai.feed_archive(file_manifest=file_manifest, id=kai_id) + kai_files = kai.get_loaded_file() + kai_files = '
'.join(kai_files) + # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"]) + # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id() + # chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答' + # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"]) + chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + +@CatchException +def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1): + # resolve deps + try: + from zh_langchain import construct_vector_store + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + from .crazy_utils import knowledge_archive_interface + except Exception as e: + chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + from .crazy_utils import try_install_deps + try_install_deps(['zh_langchain==0.2.1']) + + # < ------------------- --------------- > + kai = knowledge_archive_interface() + + if 'langchain_plugin_embedding' in chatbot._cookies: + resp, prompt = kai.answer_with_archive_by_id(txt, chatbot._cookies['langchain_plugin_embedding']) + else: + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + kai_id = plugin_kwargs.get("advanced_arg", 'default') + resp, prompt = kai.answer_with_archive_by_id(txt, kai_id) + + chatbot.append((txt, '[Local Message] ' + prompt)) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=prompt, inputs_show_user=txt, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], + sys_prompt=system_prompt + ) + history.extend((prompt, gpt_say)) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py index 8d3f97b5..9e1d4b66 100644 --- a/crazy_functions/Latex全文润色.py +++ b/crazy_functions/Latex全文润色.py @@ -238,3 +238,6 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread') + + + diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py new file mode 100644 index 00000000..810d8024 --- /dev/null +++ b/crazy_functions/Latex输出PDF结果.py @@ -0,0 +1,300 @@ +from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone +from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str +from functools import partial +import glob, os, requests, time +pj = os.path.join +ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") + +# =================================== 工具函数 =============================================== +专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". ' +def switch_prompt(pfg, mode, more_requirement): + """ + Generate prompts and system prompts based on the mode for proofreading or translating. + Args: + - pfg: Proofreader or Translator instance. + - mode: A string specifying the mode, either 'proofread' or 'translate_zh'. + + Returns: + - inputs_array: A list of strings containing prompts for users to respond to. + - sys_prompt_array: A list of strings containing prompts for system prompts. + """ + n_split = len(pfg.sp_file_contents) + if mode == 'proofread_en': + inputs_array = [r"Below is a section from an academic paper, proofread this section." + + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement + + r"Answer me only with the revised text:" + + f"\n\n{frag}" for frag in pfg.sp_file_contents] + sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] + elif mode == 'translate_zh': + inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement + + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + + r"Answer me only with the translated text:" + + f"\n\n{frag}" for frag in pfg.sp_file_contents] + sys_prompt_array = ["You are a professional translator." for _ in range(n_split)] + else: + assert False, "未知指令" + return inputs_array, sys_prompt_array + +def desend_to_extracted_folder_if_exist(project_folder): + """ + Descend into the extracted folder if it exists, otherwise return the original folder. + + Args: + - project_folder: A string specifying the folder path. + + Returns: + - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder. + """ + maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)] + if len(maybe_dir) == 0: return project_folder + if maybe_dir[0].endswith('.extract'): return maybe_dir[0] + return project_folder + +def move_project(project_folder, arxiv_id=None): + """ + Create a new work folder and copy the project folder to it. + + Args: + - project_folder: A string specifying the folder path of the project. + + Returns: + - A string specifying the path to the new work folder. + """ + import shutil, time + time.sleep(2) # avoid time string conflict + if arxiv_id is not None: + new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder') + else: + new_workfolder = f'gpt_log/{gen_time_str()}' + try: + shutil.rmtree(new_workfolder) + except: + pass + + # align subfolder if there is a folder wrapper + items = glob.glob(pj(project_folder,'*')) + if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1: + if os.path.isdir(items[0]): project_folder = items[0] + + shutil.copytree(src=project_folder, dst=new_workfolder) + return new_workfolder + +def arxiv_download(chatbot, history, txt): + def check_cached_translation_pdf(arxiv_id): + translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation') + if not os.path.exists(translation_dir): + os.makedirs(translation_dir) + target_file = pj(translation_dir, 'translate_zh.pdf') + if os.path.exists(target_file): + promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot) + return target_file + return False + def is_float(s): + try: + float(s) + return True + except ValueError: + return False + if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID + txt = 'https://arxiv.org/abs/' + txt.strip() + if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID + txt = 'https://arxiv.org/abs/' + txt[:10] + if not txt.startswith('https://arxiv.org'): + return txt, None + + # <-------------- inspect format -------------> + chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...']) + yield from update_ui(chatbot=chatbot, history=history) + time.sleep(1) # 刷新界面 + + url_ = txt # https://arxiv.org/abs/1707.06690 + if not txt.startswith('https://arxiv.org/abs/'): + msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}" + yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面 + return msg, None + # <-------------- set format -------------> + arxiv_id = url_.split('/abs/')[-1] + if 'v' in arxiv_id: arxiv_id = arxiv_id[:10] + cached_translation_pdf = check_cached_translation_pdf(arxiv_id) + if cached_translation_pdf: return cached_translation_pdf, arxiv_id + + url_tar = url_.replace('/abs/', '/e-print/') + translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print') + extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract') + os.makedirs(translation_dir, exist_ok=True) + + # <-------------- download arxiv source file -------------> + dst = pj(translation_dir, arxiv_id+'.tar') + if os.path.exists(dst): + yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面 + else: + yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面 + proxies, = get_conf('proxies') + r = requests.get(url_tar, proxies=proxies) + with open(dst, 'wb+') as f: + f.write(r.content) + # <-------------- extract file -------------> + yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面 + from toolbox import extract_archive + extract_archive(file_path=dst, dest_dir=extract_dst) + return extract_dst, arxiv_id +# ========================================= 插件主程序1 ===================================================== + + +@CatchException +def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + # <-------------- information about this plugin -------------> + chatbot.append([ "函数插件功能?", + "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # <-------------- more requirements -------------> + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + more_req = plugin_kwargs.get("advanced_arg", "") + _switch_prompt_ = partial(switch_prompt, more_requirement=more_req) + + # <-------------- check deps -------------> + try: + import glob, os, time, subprocess + subprocess.Popen(['pdflatex', '-version']) + from .latex_utils import Latex精细分解与转化, 编译Latex + except Exception as e: + chatbot.append([ f"解析项目: {txt}", + f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + # <-------------- clear history and read input -------------> + history = [] + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": txt = '空空如也的输入栏' + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + if len(file_manifest) == 0: + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + # <-------------- if is a zip/tar file -------------> + project_folder = desend_to_extracted_folder_if_exist(project_folder) + + + # <-------------- move latex project away from temp folder -------------> + project_folder = move_project(project_folder, arxiv_id=None) + + + # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> + if not os.path.exists(project_folder + '/merge_proofread_en.tex'): + yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, + chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_) + + + # <-------------- compile PDF -------------> + success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en', + work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) + + + # <-------------- zip PDF -------------> + zip_res = zip_result(project_folder) + if success: + chatbot.append((f"成功啦", '请查收结果(压缩包)...')) + yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) + else: + chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) + yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) + + # <-------------- we are done -------------> + return success + + +# ========================================= 插件主程序2 ===================================================== + +@CatchException +def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + # <-------------- information about this plugin -------------> + chatbot.append([ + "函数插件功能?", + "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # <-------------- more requirements -------------> + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + more_req = plugin_kwargs.get("advanced_arg", "") + _switch_prompt_ = partial(switch_prompt, more_requirement=more_req) + + # <-------------- check deps -------------> + try: + import glob, os, time, subprocess + subprocess.Popen(['pdflatex', '-version']) + from .latex_utils import Latex精细分解与转化, 编译Latex + except Exception as e: + chatbot.append([ f"解析项目: {txt}", + f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + # <-------------- clear history and read input -------------> + history = [] + txt, arxiv_id = yield from arxiv_download(chatbot, history, txt) + if txt.endswith('.pdf'): + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": txt = '空空如也的输入栏' + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + if len(file_manifest) == 0: + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + # <-------------- if is a zip/tar file -------------> + project_folder = desend_to_extracted_folder_if_exist(project_folder) + + + # <-------------- move latex project away from temp folder -------------> + project_folder = move_project(project_folder, arxiv_id) + + + # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> + if not os.path.exists(project_folder + '/merge_translate_zh.tex'): + yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, + chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_) + + + # <-------------- compile PDF -------------> + success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh', + work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) + + # <-------------- zip PDF -------------> + zip_res = zip_result(project_folder) + if success: + chatbot.append((f"成功啦", '请查收结果(压缩包)...')) + yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) + else: + chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) + yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) + + + # <-------------- we are done -------------> + return success diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index a9bfbf80..0c623b8e 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -3,6 +3,8 @@ 这个文件用于函数插件的单元测试 运行方法 python crazy_functions/crazy_functions_test.py """ + +# ============================================================================================================================== def validate_path(): import os, sys @@ -10,10 +12,16 @@ def validate_path(): root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..') os.chdir(root_dir_assume) sys.path.append(root_dir_assume) - validate_path() # validate path so you can run from base directory + +# ============================================================================================================================== + from colorful import * from toolbox import get_conf, ChatBotWithCookies +import contextlib +import os +import sys +from functools import wraps proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \ get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY') @@ -30,7 +38,43 @@ history = [] system_prompt = "Serve me as a writing and programming assistant." web_port = 1024 +# ============================================================================================================================== +def silence_stdout(func): + @wraps(func) + def wrapper(*args, **kwargs): + _original_stdout = sys.stdout + sys.stdout = open(os.devnull, 'w') + for q in func(*args, **kwargs): + sys.stdout = _original_stdout + yield q + sys.stdout = open(os.devnull, 'w') + sys.stdout.close() + sys.stdout = _original_stdout + return wrapper + +class CLI_Printer(): + def __init__(self) -> None: + self.pre_buf = "" + + def print(self, buf): + bufp = "" + for index, chat in enumerate(buf): + a, b = chat + bufp += sprint亮靛('[Me]:' + a) + '\n' + bufp += '[GPT]:' + b + if index < len(buf)-1: + bufp += '\n' + + if self.pre_buf!="" and bufp.startswith(self.pre_buf): + print(bufp[len(self.pre_buf):], end='') + else: + print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'+bufp, end='') + self.pre_buf = bufp + return + +cli_printer = CLI_Printer() +# ============================================================================================================================== def test_解析一个Python项目(): from crazy_functions.解析项目源代码 import 解析一个Python项目 txt = "crazy_functions/test_project/python/dqn" @@ -116,6 +160,56 @@ def test_Markdown多语言(): for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) +def test_Langchain知识库(): + from crazy_functions.Langchain知识库 import 知识库问答 + txt = "./" + chatbot = ChatBotWithCookies(llm_kwargs) + for cookies, cb, hist, msg in silence_stdout(知识库问答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) # print(cb) + + chatbot = ChatBotWithCookies(cookies) + from crazy_functions.Langchain知识库 import 读取知识库作答 + txt = "What is the installation method?" + for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) # print(cb) + +def test_Langchain知识库读取(): + from crazy_functions.Langchain知识库 import 读取知识库作答 + txt = "远程云服务器部署?" + for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) # print(cb) + +def test_Latex(): + from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF + + # txt = r"https://arxiv.org/abs/1706.03762" + # txt = r"https://arxiv.org/abs/1902.03185" + # txt = r"https://arxiv.org/abs/2305.18290" + # txt = r"https://arxiv.org/abs/2305.17608" + # txt = r"https://arxiv.org/abs/2211.16068" # ACE + # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE + # txt = r"https://arxiv.org/abs/2002.09253" + # txt = r"https://arxiv.org/abs/2306.07831" + # txt = r"https://arxiv.org/abs/2212.10156" + # txt = r"https://arxiv.org/abs/2211.11559" + # txt = r"https://arxiv.org/abs/2303.08774" + txt = r"https://arxiv.org/abs/2303.12712" + # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" + + + for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) # print(cb) + + + + # txt = "2302.02948.tar" + # print(txt) + # main_tex, work_folder = Latex预处理(txt) + # print('main tex:', main_tex) + # res = 编译Latex(main_tex, work_folder) + # # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + # cli_printer.print(cb) # print(cb) + # test_解析一个Python项目() @@ -129,7 +223,9 @@ def test_Markdown多语言(): # test_联网回答问题() # test_解析ipynb文件() # test_数学动画生成manim() -test_Markdown多语言() - -input("程序完成,回车退出。") -print("退出。") \ No newline at end of file +# test_Langchain知识库() +# test_Langchain知识库读取() +if __name__ == "__main__": + test_Latex() + input("程序完成,回车退出。") + print("退出。") \ No newline at end of file diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index de205d73..a1b1493c 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -1,4 +1,5 @@ from toolbox import update_ui, get_conf, trimmed_format_exc +import threading def input_clipping(inputs, history, max_token_limit): import numpy as np @@ -606,3 +607,142 @@ def get_files_from_everything(txt, type): # type='.md' success = False return success, file_manifest, project_folder + + + + +def Singleton(cls): + _instance = {} + + def _singleton(*args, **kargs): + if cls not in _instance: + _instance[cls] = cls(*args, **kargs) + return _instance[cls] + + return _singleton + + +@Singleton +class knowledge_archive_interface(): + def __init__(self) -> None: + self.threadLock = threading.Lock() + self.current_id = "" + self.kai_path = None + self.qa_handle = None + self.text2vec_large_chinese = None + + def get_chinese_text2vec(self): + if self.text2vec_large_chinese is None: + # < -------------------预热文本向量化模组--------------- > + from toolbox import ProxyNetworkActivate + print('Checking Text2vec ...') + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + with ProxyNetworkActivate(): # 临时地激活代理网络 + self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") + + return self.text2vec_large_chinese + + + def feed_archive(self, file_manifest, id="default"): + self.threadLock.acquire() + # import uuid + self.current_id = id + from zh_langchain import construct_vector_store + self.qa_handle, self.kai_path = construct_vector_store( + vs_id=self.current_id, + files=file_manifest, + sentence_size=100, + history=[], + one_conent="", + one_content_segmentation="", + text2vec = self.get_chinese_text2vec(), + ) + self.threadLock.release() + + def get_current_archive_id(self): + return self.current_id + + def get_loaded_file(self): + return self.qa_handle.get_loaded_file() + + def answer_with_archive_by_id(self, txt, id): + self.threadLock.acquire() + if not self.current_id == id: + self.current_id = id + from zh_langchain import construct_vector_store + self.qa_handle, self.kai_path = construct_vector_store( + vs_id=self.current_id, + files=[], + sentence_size=100, + history=[], + one_conent="", + one_content_segmentation="", + text2vec = self.get_chinese_text2vec(), + ) + VECTOR_SEARCH_SCORE_THRESHOLD = 0 + VECTOR_SEARCH_TOP_K = 4 + CHUNK_SIZE = 512 + resp, prompt = self.qa_handle.get_knowledge_based_conent_test( + query = txt, + vs_path = self.kai_path, + score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, + vector_search_top_k=VECTOR_SEARCH_TOP_K, + chunk_conent=True, + chunk_size=CHUNK_SIZE, + text2vec = self.get_chinese_text2vec(), + ) + self.threadLock.release() + return resp, prompt + +def try_install_deps(deps): + for dep in deps: + import subprocess, sys + subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep]) + + +class construct_html(): + def __init__(self) -> None: + self.css = """ +.row { + display: flex; + flex-wrap: wrap; +} + +.column { + flex: 1; + padding: 10px; +} + +.table-header { + font-weight: bold; + border-bottom: 1px solid black; +} + +.table-row { + border-bottom: 1px solid lightgray; +} + +.table-cell { + padding: 5px; +} + """ + self.html_string = f'翻译结果' + + + def add_row(self, a, b): + tmp = """ +
+
REPLACE_A
+
REPLACE_B
+
+ """ + from toolbox import markdown_convertion + tmp = tmp.replace('REPLACE_A', markdown_convertion(a)) + tmp = tmp.replace('REPLACE_B', markdown_convertion(b)) + self.html_string += tmp + + + def save_file(self, file_name): + with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f: + f.write(self.html_string.encode('utf-8', 'ignore').decode()) + diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py new file mode 100644 index 00000000..69f05ff9 --- /dev/null +++ b/crazy_functions/latex_utils.py @@ -0,0 +1,773 @@ +from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面 +from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone +import os, shutil +import re +import numpy as np +pj = os.path.join + +""" +======================================================================== +Part One +Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1) +======================================================================== +""" +PRESERVE = 0 +TRANSFORM = 1 + +def set_forbidden_text(text, mask, pattern, flags=0): + """ + Add a preserve text area in this paper + e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}" + you can mask out (mask = PRESERVE so that text become untouchable for GPT) + everything between "\begin{equation}" and "\end{equation}" + """ + if isinstance(pattern, list): pattern = '|'.join(pattern) + pattern_compile = re.compile(pattern, flags) + for res in pattern_compile.finditer(text): + mask[res.span()[0]:res.span()[1]] = PRESERVE + return text, mask + +def set_forbidden_text_careful_brace(text, mask, pattern, flags=0): + """ + Add a preserve text area in this paper (text become untouchable for GPT). + count the number of the braces so as to catch compelete text area. + e.g. + \caption{blablablablabla\texbf{blablabla}blablabla.} + """ + pattern_compile = re.compile(pattern, flags) + for res in pattern_compile.finditer(text): + brace_level = -1 + p = begin = end = res.regs[0][0] + for _ in range(1024*16): + if text[p] == '}' and brace_level == 0: break + elif text[p] == '}': brace_level -= 1 + elif text[p] == '{': brace_level += 1 + p += 1 + end = p+1 + mask[begin:end] = PRESERVE + return text, mask + +def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True): + """ + Move area out of preserve area (make text editable for GPT) + count the number of the braces so as to catch compelete text area. + e.g. + \caption{blablablablabla\texbf{blablabla}blablabla.} + """ + pattern_compile = re.compile(pattern, flags) + for res in pattern_compile.finditer(text): + brace_level = 0 + p = begin = end = res.regs[1][0] + for _ in range(1024*16): + if text[p] == '}' and brace_level == 0: break + elif text[p] == '}': brace_level -= 1 + elif text[p] == '{': brace_level += 1 + p += 1 + end = p + mask[begin:end] = TRANSFORM + if forbid_wrapper: + mask[res.regs[0][0]:begin] = PRESERVE + mask[end:res.regs[0][1]] = PRESERVE + return text, mask + +def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): + """ + Find all \begin{} ... \end{} text block that with less than limit_n_lines lines. + Add it to preserve area + """ + pattern_compile = re.compile(pattern, flags) + def search_with_line_limit(text, mask): + for res in pattern_compile.finditer(text): + cmd = res.group(1) # begin{what} + this = res.group(2) # content between begin and end + this_mask = mask[res.regs[2][0]:res.regs[2][1]] + white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', + 'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate'] + if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42 + this, this_mask = search_with_line_limit(this, this_mask) + mask[res.regs[2][0]:res.regs[2][1]] = this_mask + else: + mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE + return text, mask + return search_with_line_limit(text, mask) + +class LinkedListNode(): + """ + Linked List Node + """ + def __init__(self, string, preserve=True) -> None: + self.string = string + self.preserve = preserve + self.next = None + # self.begin_line = 0 + # self.begin_char = 0 + +def convert_to_linklist(text, mask): + root = LinkedListNode("", preserve=True) + current_node = root + for c, m, i in zip(text, mask, range(len(text))): + if (m==PRESERVE and current_node.preserve) \ + or (m==TRANSFORM and not current_node.preserve): + # add + current_node.string += c + else: + current_node.next = LinkedListNode(c, preserve=(m==PRESERVE)) + current_node = current_node.next + return root +""" +======================================================================== +Latex Merge File +======================================================================== +""" + +def 寻找Latex主文件(file_manifest, mode): + """ + 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。 + P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码) + """ + canidates = [] + for texf in file_manifest: + if os.path.basename(texf).startswith('merge'): + continue + with open(texf, 'r', encoding='utf8') as f: + file_content = f.read() + if r'\documentclass' in file_content: + canidates.append(texf) + else: + continue + + if len(canidates) == 0: + raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') + elif len(canidates) == 1: + return canidates[0] + else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回 + canidates_score = [] + # 给出一些判定模板文档的词作为扣分项 + unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers'] + expected_words = ['\input', '\ref', '\cite'] + for texf in canidates: + canidates_score.append(0) + with open(texf, 'r', encoding='utf8') as f: + file_content = f.read() + for uw in unexpected_words: + if uw in file_content: + canidates_score[-1] -= 1 + for uw in expected_words: + if uw in file_content: + canidates_score[-1] += 1 + select = np.argmax(canidates_score) # 取评分最高者返回 + return canidates[select] + +def rm_comments(main_file): + new_file_remove_comment_lines = [] + for l in main_file.splitlines(): + # 删除整行的空注释 + if l.lstrip().startswith("%"): + pass + else: + new_file_remove_comment_lines.append(l) + main_file = '\n'.join(new_file_remove_comment_lines) + # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令 + main_file = re.sub(r'(? 0 and node_string.count('\_') > final_tex.count('\_'): + # walk and replace any _ without \ + final_tex = re.sub(r"(?') + if not node.preserve: + segment_parts_for_gpt.append(node.string) + f.write(f'

#{show_html}#

') + else: + f.write(f'

{show_html}

') + node = node.next + if node is None: break + + for n in nodes: n.next = None # break + return_dict['nodes'] = nodes + return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt + return return_dict + + + +class LatexPaperSplit(): + """ + break down latex file to a linked list, + each node use a preserve flag to indicate whether it should + be proccessed by GPT. + """ + def __init__(self) -> None: + self.nodes = None + self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ + "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \ + "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" + # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者) + self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" + + def merge_result(self, arr, mode, msg): + """ + Merge the result after the GPT process completed + """ + result_string = "" + p = 0 + for node in self.nodes: + if node.preserve: + result_string += node.string + else: + result_string += fix_content(arr[p], node.string) + p += 1 + if mode == 'translate_zh': + pattern = re.compile(r'\\begin\{abstract\}.*\n') + match = pattern.search(result_string) + if not match: + # match \abstract{xxxx} + pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL) + match = pattern_compile.search(result_string) + position = match.regs[1][0] + else: + # match \begin{abstract}xxxx\end{abstract} + position = match.end() + result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:] + return result_string + + def split(self, txt, project_folder, opts): + """ + break down latex file to a linked list, + each node use a preserve flag to indicate whether it should + be proccessed by GPT. + P.S. use multiprocessing to avoid timeout error + """ + import multiprocessing + manager = multiprocessing.Manager() + return_dict = manager.dict() + p = multiprocessing.Process( + target=split_subprocess, + args=(txt, project_folder, return_dict, opts)) + p.start() + p.join() + p.close() + self.nodes = return_dict['nodes'] + self.sp = return_dict['segment_parts_for_gpt'] + return self.sp + + + +class LatexPaperFileGroup(): + """ + use tokenizer to break down text according to max_token_limit + """ + def __init__(self): + self.file_paths = [] + self.file_contents = [] + self.sp_file_contents = [] + self.sp_file_index = [] + self.sp_file_tag = [] + + # count_token + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] + def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) + self.get_token_num = get_token_num + + def run_file_split(self, max_token_limit=1900): + """ + use tokenizer to break down text according to max_token_limit + """ + for index, file_content in enumerate(self.file_contents): + if self.get_token_num(file_content) < max_token_limit: + self.sp_file_contents.append(file_content) + self.sp_file_index.append(index) + self.sp_file_tag.append(self.file_paths[index]) + else: + from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + for j, segment in enumerate(segments): + self.sp_file_contents.append(segment) + self.sp_file_index.append(index) + self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex") + print('Segmentation: done') + + def merge_result(self): + self.file_result = ["" for _ in range(len(self.file_paths))] + for r, k in zip(self.sp_file_result, self.sp_file_index): + self.file_result[k] += r + + def write_result(self): + manifest = [] + for path, res in zip(self.file_paths, self.file_result): + with open(path + '.polish.tex', 'w', encoding='utf8') as f: + manifest.append(path + '.polish.tex') + f.write(res) + return manifest + +def write_html(sp_file_contents, sp_file_result, chatbot, project_folder): + + # write html + try: + import shutil + from .crazy_utils import construct_html + from toolbox import gen_time_str + ch = construct_html() + orig = "" + trans = "" + final = [] + for c,r in zip(sp_file_contents, sp_file_result): + final.append(c) + final.append(r) + for i, k in enumerate(final): + if i%2==0: + orig = k + if i%2==1: + trans = k + ch.add_row(a=orig, b=trans) + create_report_file_name = f"{gen_time_str()}.trans.html" + ch.save_file(create_report_file_name) + shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name)) + promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot) + except: + from toolbox import trimmed_format_exc + print('writing html result failed:', trimmed_format_exc()) + +def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]): + import time, os, re + from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency + from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件 + + # <-------- 寻找主tex文件 ----------> + maintex = 寻找Latex主文件(file_manifest, mode) + chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。')) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + time.sleep(3) + + # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ----------> + main_tex_basename = os.path.basename(maintex) + assert main_tex_basename.endswith('.tex') + main_tex_basename_bare = main_tex_basename[:-4] + may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl') + if os.path.exists(may_exist_bbl): + shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl')) + shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl')) + shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl')) + + with open(maintex, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + merged_content = merge_tex_files(project_folder, content, mode) + + with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f: + f.write(merged_content) + + # <-------- 精细切分latex文件 ----------> + chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。')) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + lps = LatexPaperSplit() + res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数 + + # <-------- 拆分过长的latex片段 ----------> + pfg = LatexPaperFileGroup() + for index, r in enumerate(res): + pfg.file_paths.append('segment-' + str(index)) + pfg.file_contents.append(r) + + pfg.run_file_split(max_token_limit=1024) + n_split = len(pfg.sp_file_contents) + + # <-------- 根据需要切换prompt ----------> + inputs_array, sys_prompt_array = switch_prompt(pfg, mode) + inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag] + + if os.path.exists(pj(project_folder,'temp.pkl')): + + # <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ----------> + pfg = objload(file=pj(project_folder,'temp.pkl')) + + else: + # <-------- gpt 多线程请求 ----------> + gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array=inputs_array, + inputs_show_user_array=inputs_show_user_array, + llm_kwargs=llm_kwargs, + chatbot=chatbot, + history_array=[[""] for _ in range(n_split)], + sys_prompt_array=sys_prompt_array, + # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待 + scroller_max_len = 40 + ) + + # <-------- 文本碎片重组为完整的tex片段 ----------> + pfg.sp_file_result = [] + for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents): + pfg.sp_file_result.append(gpt_say) + pfg.merge_result() + + # <-------- 临时存储用于调试 ----------> + pfg.get_token_num = None + objdump(pfg, file=pj(project_folder,'temp.pkl')) + + write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder) + + # <-------- 写出文件 ----------> + msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。" + final_tex = lps.merge_result(pfg.file_result, mode, msg) + with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f: + if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex) + + + # <-------- 整理结果, 退出 ----------> + chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF')) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # <-------- 返回 ----------> + return project_folder + f'/merge_{mode}.tex' + + + +def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified): + try: + with open(log_path, 'r', encoding='utf-8', errors='replace') as f: + log = f.read() + with open(file_path, 'r', encoding='utf-8', errors='replace') as f: + file_lines = f.readlines() + import re + buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log) + buggy_lines = [int(l) for l in buggy_lines] + buggy_lines = sorted(buggy_lines) + print("removing lines that has errors", buggy_lines) + file_lines.pop(buggy_lines[0]-1) + with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f: + f.writelines(file_lines) + return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines + except: + print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.") + return False, -1, [-1] + + +def compile_latex_with_timeout(command, timeout=60): + import subprocess + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + try: + stdout, stderr = process.communicate(timeout=timeout) + except subprocess.TimeoutExpired: + process.kill() + stdout, stderr = process.communicate() + print("Process timed out!") + return False + return True + +def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'): + import os, time + current_dir = os.getcwd() + n_fix = 1 + max_try = 32 + chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history) + chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面 + yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面 + + while True: + import os + + # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) + + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) + + if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')): + # 只有第二步成功,才能继续下面的步骤 + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面 + if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')): + os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir) + if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')): + os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir) + + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) + os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) + os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) + os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) + + if mode!='translate_zh': + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 + print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') + ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') + + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + + # <---------------------> + os.chdir(current_dir) + + # <---------- 检查结果 -----------> + results_ = "" + original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf')) + modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')) + diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf')) + results_ += f"原始PDF编译是否成功: {original_pdf_success};" + results_ += f"转化PDF编译是否成功: {modified_pdf_success};" + results_ += f"对比PDF编译是否成功: {diff_pdf_success};" + yield from update_ui_lastest_msg(f'第{n_fix}编译结束:
{results_}...', chatbot, history) # 刷新Gradio前端界面 + + if diff_pdf_success: + result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path + promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI + if modified_pdf_success: + yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面 + result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path + if os.path.exists(pj(work_folder, '..', 'translation')): + shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf')) + promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI + return True # 成功啦 + else: + if n_fix>=max_try: break + n_fix += 1 + can_retry, main_file_modified, buggy_lines = remove_buggy_lines( + file_path=pj(work_folder_modified, f'{main_file_modified}.tex'), + log_path=pj(work_folder_modified, f'{main_file_modified}.log'), + tex_name=f'{main_file_modified}.tex', + tex_name_pure=f'{main_file_modified}', + n_fix=n_fix, + work_folder_modified=work_folder_modified, + ) + yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面 + if not can_retry: break + + os.chdir(current_dir) + return False # 失败啦 + + + diff --git a/crazy_functions/对话历史存档.py b/crazy_functions/对话历史存档.py index c638d1bd..fed0f8f2 100644 --- a/crazy_functions/对话历史存档.py +++ b/crazy_functions/对话历史存档.py @@ -1,4 +1,4 @@ -from toolbox import CatchException, update_ui +from toolbox import CatchException, update_ui, promote_file_to_downloadzone from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive import re @@ -29,9 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None): for h in history: f.write("\n>>>" + h) f.write('') - res = '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}') - print(res) - return res + promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot) + return '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}') def gen_file_preview(file_name): try: diff --git a/crazy_functions/数学动画生成manim.py b/crazy_functions/数学动画生成manim.py index 5851b9c6..26e61b1b 100644 --- a/crazy_functions/数学动画生成manim.py +++ b/crazy_functions/数学动画生成manim.py @@ -8,7 +8,7 @@ def inspect_dependency(chatbot, history): import manim return True except: - chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manimgl```"]) + chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manim manimgl```"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return False diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py index 50508645..f1a89a7e 100644 --- a/crazy_functions/理解PDF文档内容.py +++ b/crazy_functions/理解PDF文档内容.py @@ -13,7 +13,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro # 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割) # 的长度必须小于 2500 个 Token file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF - + file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + TOKEN_LIMIT_PER_FRAGMENT = 2500 from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf diff --git a/crazy_functions/联网的ChatGPT_bing版.py b/crazy_functions/联网的ChatGPT_bing版.py new file mode 100644 index 00000000..93a84a0c --- /dev/null +++ b/crazy_functions/联网的ChatGPT_bing版.py @@ -0,0 +1,102 @@ +from toolbox import CatchException, update_ui +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping +import requests +from bs4 import BeautifulSoup +from request_llm.bridge_all import model_info + + +def bing_search(query, proxies=None): + query = query + url = f"https://cn.bing.com/search?q={query}" + headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'} + response = requests.get(url, headers=headers, proxies=proxies) + soup = BeautifulSoup(response.content, 'html.parser') + results = [] + for g in soup.find_all('li', class_='b_algo'): + anchors = g.find_all('a') + if anchors: + link = anchors[0]['href'] + if not link.startswith('http'): + continue + title = g.find('h2').text + item = {'title': title, 'link': link} + results.append(item) + + for r in results: + print(r['link']) + return results + + +def scrape_text(url, proxies) -> str: + """Scrape text from a webpage + + Args: + url (str): The URL to scrape text from + + Returns: + str: The scraped text + """ + headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36', + 'Content-Type': 'text/plain', + } + try: + response = requests.get(url, headers=headers, proxies=proxies, timeout=8) + if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding + except: + return "无法连接到该网页" + soup = BeautifulSoup(response.text, "html.parser") + for script in soup(["script", "style"]): + script.extract() + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = "\n".join(chunk for chunk in chunks if chunk) + return text + +@CatchException +def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 + plugin_kwargs 插件模型的参数,暂时没有用武之地 + chatbot 聊天显示框的句柄,用于显示给用户 + history 聊天历史,前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + history = [] # 清空历史,以免输入溢出 + chatbot.append((f"请结合互联网信息回答以下问题:{txt}", + "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + + # ------------- < 第1步:爬取搜索引擎的结果 > ------------- + from toolbox import get_conf + proxies, = get_conf('proxies') + urls = bing_search(txt, proxies) + history = [] + + # ------------- < 第2步:依次访问网页 > ------------- + max_search_result = 8 # 最多收纳多少个网页的结果 + for index, url in enumerate(urls[:max_search_result]): + res = scrape_text(url['link'], proxies) + history.extend([f"第{index}份搜索结果:", res]) + chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + + # ------------- < 第3步:ChatGPT综合 > ------------- + i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}" + i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token + inputs=i_say, + history=history, + max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4 + ) + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=i_say, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, + sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。" + ) + chatbot[-1] = (i_say, gpt_say) + history.append(i_say);history.append(gpt_say) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 + diff --git a/crazy_functions/虚空终端.py b/crazy_functions/虚空终端.py new file mode 100644 index 00000000..fe71a463 --- /dev/null +++ b/crazy_functions/虚空终端.py @@ -0,0 +1,131 @@ +from toolbox import CatchException, update_ui, gen_time_str +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from .crazy_utils import input_clipping + + +prompt = """ +I have to achieve some functionalities by calling one of the functions below. +Your job is to find the correct funtion to use to satisfy my requirement, +and then write python code to call this function with correct parameters. + +These are functions you are allowed to choose from: +1. + 功能描述: 总结音视频内容 + 调用函数: ConcludeAudioContent(txt, llm_kwargs) + 参数说明: + txt: 音频文件的路径 + llm_kwargs: 模型参数, 永远给定None +2. + 功能描述: 将每次对话记录写入Markdown格式的文件中 + 调用函数: WriteMarkdown() +3. + 功能描述: 将指定目录下的PDF文件从英文翻译成中文 + 调用函数: BatchTranslatePDFDocuments_MultiThreaded(txt, llm_kwargs) + 参数说明: + txt: PDF文件所在的路径 + llm_kwargs: 模型参数, 永远给定None +4. + 功能描述: 根据文本使用GPT模型生成相应的图像 + 调用函数: ImageGeneration(txt, llm_kwargs) + 参数说明: + txt: 图像生成所用到的提示文本 + llm_kwargs: 模型参数, 永远给定None +5. + 功能描述: 对输入的word文档进行摘要生成 + 调用函数: SummarizingWordDocuments(input_path, output_path) + 参数说明: + input_path: 待处理的word文档路径 + output_path: 摘要生成后的文档路径 + + +You should always anwser with following format: +---------------- +Code: +``` +class AutoAcademic(object): + def __init__(self): + self.selected_function = "FILL_CORRECT_FUNCTION_HERE" # e.g., "GenerateImage" + self.txt = "FILL_MAIN_PARAMETER_HERE" # e.g., "荷叶上的蜻蜓" + self.llm_kwargs = None +``` +Explanation: +只有GenerateImage和生成图像相关, 因此选择GenerateImage函数。 +---------------- + +Now, this is my requirement: + +""" +def get_fn_lib(): + return { + "BatchTranslatePDFDocuments_MultiThreaded": ("crazy_functions.批量翻译PDF文档_多线程", "批量翻译PDF文档"), + "SummarizingWordDocuments": ("crazy_functions.总结word文档", "总结word文档"), + "ImageGeneration": ("crazy_functions.图片生成", "图片生成"), + "TranslateMarkdownFromEnglishToChinese": ("crazy_functions.批量Markdown翻译", "Markdown中译英"), + "SummaryAudioVideo": ("crazy_functions.总结音视频", "总结音视频"), + } + +def inspect_dependency(chatbot, history): + return True + +def eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + import subprocess, sys, os, shutil, importlib + + with open('gpt_log/void_terminal_runtime.py', 'w', encoding='utf8') as f: + f.write(code) + + try: + AutoAcademic = getattr(importlib.import_module('gpt_log.void_terminal_runtime', 'AutoAcademic'), 'AutoAcademic') + # importlib.reload(AutoAcademic) + auto_dict = AutoAcademic() + selected_function = auto_dict.selected_function + txt = auto_dict.txt + fp, fn = get_fn_lib()[selected_function] + fn_plugin = getattr(importlib.import_module(fp, fn), fn) + yield from fn_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port) + except: + from toolbox import trimmed_format_exc + chatbot.append(["执行错误", f"\n```\n{trimmed_format_exc()}\n```\n"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + +def get_code_block(reply): + import re + pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks + matches = re.findall(pattern, reply) # find all code blocks in text + if len(matches) != 1: + raise RuntimeError("GPT is not generating proper code.") + return matches[0].strip('python') # code block + +@CatchException +def 终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行 + plugin_kwargs 插件模型的参数, 暂时没有用武之地 + chatbot 聊天显示框的句柄, 用于显示给用户 + history 聊天历史, 前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + # 清空历史, 以免输入溢出 + history = [] + + # 基本信息:功能、贡献者 + chatbot.append(["函数插件功能?", "根据自然语言执行插件命令, 作者: binary-husky, 插件初始化中 ..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # # 尝试导入依赖, 如果缺少依赖, 则给出安装建议 + # dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面 + # if not dep_ok: return + + # 输入 + i_say = prompt + txt + # 开始 + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=txt, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], + sys_prompt="" + ) + + # 将代码转为动画 + code = get_code_block(gpt_say) + yield from eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port) diff --git a/docker-compose.yml b/docker-compose.yml index 07f1c9fe..0a0dcda9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -103,3 +103,30 @@ services: echo '[jittorllms] 正在从github拉取最新代码...' && git --git-dir=request_llm/jittorllms/.git --work-tree=request_llm/jittorllms pull --force && python3 -u main.py" + + +## =================================================== +## 【方案四】 chatgpt + Latex +## =================================================== +version: '3' +services: + gpt_academic_with_latex: + image: ghcr.io/binary-husky/gpt_academic_with_latex:master + environment: + # 请查阅 `config.py` 以查看所有的配置信息 + API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ' + USE_PROXY: ' True ' + proxies: ' { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' + LLM_MODEL: ' gpt-3.5-turbo ' + AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "gpt-4"] ' + LOCAL_MODEL_DEVICE: ' cuda ' + DEFAULT_WORKER_NUM: ' 10 ' + WEB_PORT: ' 12303 ' + + # 与宿主的网络融合 + network_mode: "host" + + # 不使用代理网络拉取最新代码 + command: > + bash -c "python3 -u main.py" + diff --git a/docs/Dockerfile+NoLocal+Latex b/docs/Dockerfile+NoLocal+Latex new file mode 100644 index 00000000..0f9ac8a1 --- /dev/null +++ b/docs/Dockerfile+NoLocal+Latex @@ -0,0 +1,27 @@ +# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM +# - 1 修改 `config.py` +# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex . +# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex + +FROM fuqingxu/python311_texlive_ctex:latest + +# 指定路径 +WORKDIR /gpt + +ARG useProxyNetwork='' + +RUN $useProxyNetwork pip3 install gradio openai numpy arxiv rich -i https://pypi.douban.com/simple/ +RUN $useProxyNetwork pip3 install colorama Markdown pygments pymupdf -i https://pypi.douban.com/simple/ + +# 装载项目文件 +COPY . . + + +# 安装依赖 +RUN $useProxyNetwork pip3 install -r requirements.txt -i https://pypi.douban.com/simple/ + +# 可选步骤,用于预热模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/docs/GithubAction+NoLocal+Latex b/docs/GithubAction+NoLocal+Latex new file mode 100644 index 00000000..5ff9bb82 --- /dev/null +++ b/docs/GithubAction+NoLocal+Latex @@ -0,0 +1,25 @@ +# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM +# - 1 修改 `config.py` +# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex . +# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex + +FROM fuqingxu/python311_texlive_ctex:latest + +# 指定路径 +WORKDIR /gpt + +RUN pip3 install gradio openai numpy arxiv rich +RUN pip3 install colorama Markdown pygments pymupdf + +# 装载项目文件 +COPY . . + + +# 安装依赖 +RUN pip3 install -r requirements.txt + +# 可选步骤,用于预热模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/docs/README.md.Italian.md b/docs/README.md.Italian.md index 04c5ed6b..301ce697 100644 --- a/docs/README.md.Italian.md +++ b/docs/README.md.Italian.md @@ -2,11 +2,11 @@ > > Durante l'installazione delle dipendenze, selezionare rigorosamente le **versioni specificate** nel file requirements.txt. > -> ` pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/` +> ` pip install -r requirements.txt` -# GPT Ottimizzazione Accademica (GPT Academic) +# GPT Ottimizzazione Accademica (GPT Academic) -**Se ti piace questo progetto, ti preghiamo di dargli una stella. Se hai sviluppato scorciatoie accademiche o plugin funzionali più utili, non esitare ad aprire una issue o pull request. Abbiamo anche una README in [Inglese|](docs/README_EN.md)[Giapponese|](docs/README_JP.md)[Coreano|](https://github.com/mldljyh/ko_gpt_academic)[Russo|](docs/README_RS.md)[Francese](docs/README_FR.md) tradotta da questo stesso progetto. +**Se ti piace questo progetto, ti preghiamo di dargli una stella. Se hai sviluppato scorciatoie accademiche o plugin funzionali più utili, non esitare ad aprire una issue o pull request. Abbiamo anche una README in [Inglese|](README_EN.md)[Giapponese|](README_JP.md)[Coreano|](https://github.com/mldljyh/ko_gpt_academic)[Russo|](README_RS.md)[Francese](README_FR.md) tradotta da questo stesso progetto. Per tradurre questo progetto in qualsiasi lingua con GPT, leggere e eseguire [`multi_language.py`](multi_language.py) (sperimentale). > **Nota** @@ -17,7 +17,9 @@ Per tradurre questo progetto in qualsiasi lingua con GPT, leggere e eseguire [`m > > 3. Questo progetto è compatibile e incoraggia l'utilizzo di grandi modelli di linguaggio di produzione nazionale come chatglm, RWKV, Pangu ecc. Supporta la coesistenza di più api-key e può essere compilato nel file di configurazione come `API_KEY="openai-key1,openai-key2,api2d-key3"`. Per sostituire temporaneamente `API_KEY`, inserire `API_KEY` temporaneo nell'area di input e premere Invio per renderlo effettivo. -
Funzione | Descrizione +
+ +Funzione | Descrizione --- | --- Correzione immediata | Supporta correzione immediata e ricerca degli errori di grammatica del documento con un solo clic Traduzione cinese-inglese immediata | Traduzione cinese-inglese immediata con un solo clic @@ -41,6 +43,8 @@ Avvia il tema di gradio [scuro](https://github.com/binary-husky/chatgpt_academic Supporto per maggiori modelli LLM, supporto API2D | Sentirsi serviti simultaneamente da GPT3.5, GPT4, [Tsinghua ChatGLM](https://github.com/THUDM/ChatGLM-6B), [Fudan MOSS](https://github.com/OpenLMLab/MOSS) deve essere una grande sensazione, giusto? Ulteriori modelli LLM supportat,i supporto per l'implementazione di Huggingface | Aggiunta di un'interfaccia Newbing (Nuovo Bing), introdotta la compatibilità con Tsinghua [Jittorllms](https://github.com/Jittor/JittorLLMs), [LLaMA](https://github.com/facebookresearch/llama), [RWKV](https://github.com/BlinkDL/ChatRWKV) e [PanGu-α](https://openi.org.cn/pangu/) Ulteriori dimostrazioni di nuove funzionalità (generazione di immagini, ecc.)... | Vedere la fine di questo documento... +
+ - Nuova interfaccia (modificare l'opzione LAYOUT in `config.py` per passare dal layout a sinistra e a destra al layout superiore e inferiore)
@@ -202,11 +206,13 @@ ad esempio 2. Plugin di funzione personalizzati Scrivi plugin di funzione personalizzati e esegui tutte le attività che desideri o non hai mai pensato di fare. -La difficoltà di scrittura e debug dei plugin del nostro progetto è molto bassa. Se si dispone di una certa conoscenza di base di Python, è possibile realizzare la propria funzione del plugin seguendo il nostro modello. Per maggiori dettagli, consultare la [guida al plugin per funzioni] (https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97). +La difficoltà di scrittura e debug dei plugin del nostro progetto è molto bassa. Se si dispone di una certa conoscenza di base di Python, è possibile realizzare la propria funzione del plugin seguendo il nostro modello. Per maggiori dettagli, consultare la [guida al plugin per funzioni](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97). --- # Ultimo aggiornamento -## Nuove funzionalità dinamiche1. Funzionalità di salvataggio della conversazione. Nell'area dei plugin della funzione, fare clic su "Salva la conversazione corrente" per salvare la conversazione corrente come file html leggibile e ripristinabile, inoltre, nell'area dei plugin della funzione (menu a discesa), fare clic su "Carica la cronologia della conversazione archiviata" per ripristinare la conversazione precedente. Suggerimento: fare clic su "Carica la cronologia della conversazione archiviata" senza specificare il file consente di visualizzare la cache degli archivi html di cronologia, fare clic su "Elimina tutti i record di cronologia delle conversazioni locali" per eliminare tutte le cache degli archivi html. +## Nuove funzionalità dinamiche + +1. Funzionalità di salvataggio della conversazione. Nell'area dei plugin della funzione, fare clic su "Salva la conversazione corrente" per salvare la conversazione corrente come file html leggibile e ripristinabile, inoltre, nell'area dei plugin della funzione (menu a discesa), fare clic su "Carica la cronologia della conversazione archiviata" per ripristinare la conversazione precedente. Suggerimento: fare clic su "Carica la cronologia della conversazione archiviata" senza specificare il file consente di visualizzare la cache degli archivi html di cronologia, fare clic su "Elimina tutti i record di cronologia delle conversazioni locali" per eliminare tutte le cache degli archivi html.
@@ -307,4 +313,4 @@ https://github.com/kaixindelele/ChatPaper # Altro: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -``` \ No newline at end of file +``` diff --git a/docs/README.md.Korean.md b/docs/README.md.Korean.md index d94aaf1a..f15d477a 100644 --- a/docs/README.md.Korean.md +++ b/docs/README.md.Korean.md @@ -17,7 +17,9 @@ GPT를 이용하여 프로젝트를 임의의 언어로 번역하려면 [`multi_ > > 3. 이 프로젝트는 국내 언어 모델 chatglm과 RWKV, 판고 등의 시도와 호환 가능합니다. 여러 개의 api-key를 지원하며 설정 파일에 "API_KEY="openai-key1,openai-key2,api2d-key3""와 같이 작성할 수 있습니다. `API_KEY`를 임시로 변경해야하는 경우 입력 영역에 임시 `API_KEY`를 입력 한 후 엔터 키를 누르면 즉시 적용됩니다. -
기능 | 설명 +
+ +기능 | 설명 --- | --- 원 키워드 | 원 키워드 및 논문 문법 오류를 찾는 기능 지원 한-영 키워드 | 한-영 키워드 지원 @@ -265,4 +267,4 @@ https://github.com/kaixindelele/ChatPaper # 더 많은 : https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -``` \ No newline at end of file +``` diff --git a/docs/README.md.Portuguese.md b/docs/README.md.Portuguese.md index 816ced19..98f60543 100644 --- a/docs/README.md.Portuguese.md +++ b/docs/README.md.Portuguese.md @@ -2,7 +2,7 @@ > > Ao instalar as dependências, por favor, selecione rigorosamente as versões **especificadas** no arquivo requirements.txt. > -> `pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/` +> `pip install -r requirements.txt` > # Otimização acadêmica GPT (GPT Academic) @@ -18,7 +18,9 @@ Para traduzir este projeto para qualquer idioma com o GPT, leia e execute [`mult > > 3. Este projeto é compatível com e incentiva o uso de modelos de linguagem nacionais, como chatglm e RWKV, Pangolin, etc. Suporta a coexistência de várias chaves de API e pode ser preenchido no arquivo de configuração como `API_KEY="openai-key1,openai-key2,api2d-key3"`. Quando precisar alterar temporariamente o `API_KEY`, basta digitar o `API_KEY` temporário na área de entrada e pressionar Enter para que ele entre em vigor. -
Funcionalidade | Descrição +
+ +Funcionalidade | Descrição --- | --- Um clique de polimento | Suporte a um clique polimento, um clique encontrar erros de gramática no artigo Tradução chinês-inglês de um clique | Tradução chinês-inglês de um clique @@ -216,7 +218,9 @@ Para mais detalhes, consulte o [Guia do plug-in de função.](https://github.com --- # Última atualização -## Novas funções dinâmicas.1. Função de salvamento de diálogo. Ao chamar o plug-in de função "Salvar diálogo atual", é possível salvar o diálogo atual em um arquivo html legível e reversível. Além disso, ao chamar o plug-in de função "Carregar arquivo de histórico de diálogo" no menu suspenso da área de plug-in, é possível restaurar uma conversa anterior. Dica: clicar em "Carregar arquivo de histórico de diálogo" sem especificar um arquivo permite visualizar o cache do arquivo html de histórico. Clicar em "Excluir todo o registro de histórico de diálogo local" permite excluir todo o cache de arquivo html. +## Novas funções dinâmicas. + +1. Função de salvamento de diálogo. Ao chamar o plug-in de função "Salvar diálogo atual", é possível salvar o diálogo atual em um arquivo html legível e reversível. Além disso, ao chamar o plug-in de função "Carregar arquivo de histórico de diálogo" no menu suspenso da área de plug-in, é possível restaurar uma conversa anterior. Dica: clicar em "Carregar arquivo de histórico de diálogo" sem especificar um arquivo permite visualizar o cache do arquivo html de histórico. Clicar em "Excluir todo o registro de histórico de diálogo local" permite excluir todo o cache de arquivo html.
@@ -317,4 +321,4 @@ https://github.com/kaixindelele/ChatPaper # Mais: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -``` \ No newline at end of file +``` diff --git a/docs/translate_english.json b/docs/translate_english.json index d9968c61..57e008b1 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -58,6 +58,8 @@ "连接网络回答问题": "ConnectToNetworkToAnswerQuestions", "联网的ChatGPT": "ChatGPTConnectedToNetwork", "解析任意code项目": "ParseAnyCodeProject", + "读取知识库作答": "ReadKnowledgeArchiveAnswerQuestions", + "知识库问答": "UpdateKnowledgeArchive", "同时问询_指定模型": "InquireSimultaneously_SpecifiedModel", "图片生成": "ImageGeneration", "test_解析ipynb文件": "Test_ParseIpynbFile", diff --git a/docs/use_azure.md b/docs/use_azure.md new file mode 100644 index 00000000..f1c27ef3 --- /dev/null +++ b/docs/use_azure.md @@ -0,0 +1,152 @@ +# 通过微软Azure云服务申请 Openai API + +由于Openai和微软的关系,现在是可以通过微软的Azure云计算服务直接访问openai的api,免去了注册和网络的问题。 + +快速入门的官方文档的链接是:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python) + +# 申请API + +按文档中的“先决条件”的介绍,出了编程的环境以外,还需要以下三个条件: + +1.  Azure账号并创建订阅 + +2.  为订阅添加Azure OpenAI 服务 + +3.  部署模型 + +## Azure账号并创建订阅 + +### Azure账号 + +创建Azure的账号时最好是有微软的账号,这样似乎更容易获得免费额度(第一个月的200美元,实测了一下,如果用一个刚注册的微软账号登录Azure的话,并没有这一个月的免费额度)。 + +创建Azure账号的网址是:[立即创建 Azure 免费帐户 | Microsoft Azure](https://azure.microsoft.com/zh-cn/free/) + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_944786_iH6AECuZ_tY0EaBd_1685327219?w=1327\&h=695\&type=image/png) + +打开网页后,点击 “免费开始使用” 会跳转到登录或注册页面,如果有微软的账户,直接登录即可,如果没有微软账户,那就需要到微软的网页再另行注册一个。 + +注意,Azure的页面和政策时不时会变化,已实际最新显示的为准就好。 + +### 创建订阅 + +注册好Azure后便可进入主页: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_444847_tk-9S-pxOYuaLs_K_1685327675?w=1865\&h=969\&type=image/png) + +首先需要在订阅里进行添加操作,点开后即可进入订阅的页面: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_612820_z_1AlaEgnJR-rUl0_1685327892?w=1865\&h=969\&type=image/png) + +第一次进来应该是空的,点添加即可创建新的订阅(可以是“免费”或者“即付即用”的订阅),其中订阅ID是后面申请Azure OpenAI需要使用的。 + +## 为订阅添加Azure OpenAI服务 + +之后回到首页,点Azure OpenAI即可进入OpenAI服务的页面(如果不显示的话,则在首页上方的搜索栏里搜索“openai”即可)。 + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_269759_nExkGcPC0EuAR5cp_1685328130?w=1865\&h=969\&type=image/png) + +不过现在这个服务还不能用。在使用前,还需要在这个网址申请一下: + +[Request Access to Azure OpenAI Service (microsoft.com)](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu) + +这里有二十来个问题,按照要求和自己的实际情况填写即可。 + +其中需要注意的是 + +1.  千万记得填对"订阅ID" + +2.  需要填一个公司邮箱(可以不是注册用的邮箱)和公司网址 + +之后,在回到上面那个页面,点创建,就会进入创建页面了: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_72708_9d9JYhylPVz3dFWL_1685328372?w=824\&h=590\&type=image/png) + +需要填入“资源组”和“名称”,按照自己的需要填入即可。 + +完成后,在主页的“资源”里就可以看到刚才创建的“资源”了,点击进入后,就可以进行最后的部署了。 + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_871541_CGCnbgtV9Uk1Jccy_1685329861?w=1217\&h=628\&type=image/png) + +## 部署模型 + +进入资源页面后,在部署模型前,可以先点击“开发”,把密钥和终结点记下来。 + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_852567_dxCZOrkMlWDSLH0d_1685330736?w=856\&h=568\&type=image/png) + +之后,就可以去部署模型了,点击“部署”即可,会跳转到 Azure OpenAI Stuido 进行下面的操作: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_169225_uWs1gMhpNbnwW4h2_1685329901?w=1865\&h=969\&type=image/png) + +进入 Azure OpenAi Studio 后,点击新建部署,会弹出如下对话框: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_391255_iXUSZAzoud5qlxjJ_1685330224?w=656\&h=641\&type=image/png) + +在这里选 gpt-35-turbo 或需要的模型并按需要填入“部署名”即可完成模型的部署。 + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_724099_vBaHcUilsm1EtPgK_1685330396?w=1869\&h=482\&type=image/png) + +这个部署名需要记下来。 + +到现在为止,申请操作就完成了,需要记下来的有下面几个东西: + +● 密钥(1或2都可以) + +● 终结点 + +● 部署名(不是模型名) + +# 修改 config.py + +``` +AZURE_ENDPOINT = "填入终结点" +AZURE_API_KEY = "填入azure openai api的密钥" +AZURE_API_VERSION = "2023-05-15" # 默认使用 2023-05-15 版本,无需修改 +AZURE_ENGINE = "填入部署名" + +``` +# API的使用 + +接下来就是具体怎么使用API了,还是可以参考官方文档:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python) + +和openai自己的api调用有点类似,都需要安装openai库,不同的是调用方式 + +``` +import openai +openai.api_type = "azure" #固定格式,无需修改 +openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") #这里填入“终结点” +openai.api_version = "2023-05-15" #固定格式,无需修改 +openai.api_key = os.getenv("AZURE_OPENAI_KEY") #这里填入“密钥1”或“密钥2” + +response = openai.ChatCompletion.create( + engine="gpt-35-turbo", #这里填入的不是模型名,是部署名 + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"}, + {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."}, + {"role": "user", "content": "Do other Azure Cognitive Services support this too?"} + ] +) + +print(response) +print(response['choices'][0]['message']['content']) + +``` + +需要注意的是: + +1.  engine那里填入的是部署名,不是模型名 + +2.  通过openai库获得的这个 response 和通过 request 库访问 url 获得的 response 不同,不需要 decode,已经是解析好的 json 了,直接根据键值读取即可。 + +更细节的使用方法,详见官方API文档。 + +# 关于费用 + +Azure OpenAI API 还是需要一些费用的(免费订阅只有1个月有效期),费用如下: + +![image.png](https://note.youdao.com/yws/res/18095/WEBRESOURCEeba0ab6d3127b79e143ef2d5627c0e44) + +具体可以可以看这个网址 :[Azure OpenAI 服务 - 定价| Microsoft Azure](https://azure.microsoft.com/zh-cn/pricing/details/cognitive-services/openai-service/?cdn=disable) + +并非网上说的什么“一年白嫖”,但注册方法以及网络问题都比直接使用openai的api要简单一些。 diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index b6efe21a..02cfe988 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -16,6 +16,9 @@ from toolbox import get_conf, trimmed_format_exc from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui from .bridge_chatgpt import predict as chatgpt_ui +from .bridge_azure_test import predict_no_ui_long_connection as azure_noui +from .bridge_azure_test import predict as azure_ui + from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui from .bridge_chatglm import predict as chatglm_ui @@ -83,6 +86,33 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, + + "gpt-3.5-turbo-16k": { + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": openai_endpoint, + "max_token": 1024*16, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + + "gpt-3.5-turbo-0613": { + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": openai_endpoint, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + + "gpt-3.5-turbo-16k-0613": { + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": openai_endpoint, + "max_token": 1024 * 16, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, "gpt-4": { "fn_with_ui": chatgpt_ui, @@ -93,6 +123,16 @@ model_info = { "token_cnt": get_token_num_gpt4, }, + # azure openai + "azure-gpt35":{ + "fn_with_ui": azure_ui, + "fn_without_ui": azure_noui, + "endpoint": get_conf("AZURE_ENDPOINT"), + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + # api_2d "api2d-gpt-3.5-turbo": { "fn_with_ui": chatgpt_ui, diff --git a/request_llm/bridge_azure_test.py b/request_llm/bridge_azure_test.py new file mode 100644 index 00000000..edc68f74 --- /dev/null +++ b/request_llm/bridge_azure_test.py @@ -0,0 +1,241 @@ +""" + 该文件中主要包含三个函数 + + 不具备多线程能力的函数: + 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 + + 具备多线程调用能力的函数 + 2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑 + 3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 +""" + +import logging +import traceback +import importlib +import openai +import time + + +# 读取config.py文件中关于AZURE OPENAI API的信息 +from toolbox import get_conf, update_ui, clip_history, trimmed_format_exc +TIMEOUT_SECONDS, MAX_RETRY, AZURE_ENGINE, AZURE_ENDPOINT, AZURE_API_VERSION, AZURE_API_KEY = \ + get_conf('TIMEOUT_SECONDS', 'MAX_RETRY',"AZURE_ENGINE","AZURE_ENDPOINT", "AZURE_API_VERSION", "AZURE_API_KEY") + + +def get_full_error(chunk, stream_response): + """ + 获取完整的从Openai返回的报错 + """ + while True: + try: + chunk += next(stream_response) + except: + break + return chunk + +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + 发送至azure openai api,流式获取输出。 + 用于基础的对话功能。 + inputs 是本次问询的输入 + top_p, temperature是chatGPT的内部调优参数 + history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) + chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 + additional_fn代表点击的哪个按钮,按钮见functional.py + """ + print(llm_kwargs["llm_model"]) + + if additional_fn is not None: + import core_functional + importlib.reload(core_functional) # 热更新prompt + core_functional = core_functional.get_core_functions() + if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) + inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] + + raw_input = inputs + logging.info(f'[raw_input] {raw_input}') + chatbot.append((inputs, "")) + yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 + + + payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream) + + history.append(inputs); history.append("") + + retry = 0 + while True: + try: + + openai.api_type = "azure" + openai.api_version = AZURE_API_VERSION + openai.api_base = AZURE_ENDPOINT + openai.api_key = AZURE_API_KEY + response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break + + except: + retry += 1 + chatbot[-1] = ((chatbot[-1][0], "获取response失败,重试中。。。")) + retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else "" + yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面 + if retry > MAX_RETRY: raise TimeoutError + + gpt_replying_buffer = "" + is_head_of_the_stream = True + if stream: + + stream_response = response + + while True: + try: + chunk = next(stream_response) + + except StopIteration: + from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```' + chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk)}") + yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk) # 刷新界面 + return + + if is_head_of_the_stream and (r'"object":"error"' not in chunk): + # 数据流的第一帧不携带content + is_head_of_the_stream = False; continue + + if chunk: + #print(chunk) + try: + if "delta" in chunk["choices"][0]: + if chunk["choices"][0]["finish_reason"] == "stop": + logging.info(f'[response] {gpt_replying_buffer}') + break + status_text = f"finish_reason: {chunk['choices'][0]['finish_reason']}" + gpt_replying_buffer = gpt_replying_buffer + chunk["choices"][0]["delta"]["content"] + + history[-1] = gpt_replying_buffer + chatbot[-1] = (history[-2], history[-1]) + yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面 + + except Exception as e: + traceback.print_exc() + yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面 + chunk = get_full_error(chunk, stream_response) + + error_msg = chunk + yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 + return + + +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): + """ + 发送至AZURE OPENAI API,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 + inputs: + 是本次问询的输入 + sys_prompt: + 系统静默prompt + llm_kwargs: + chatGPT的内部调优参数 + history: + 是之前的对话列表 + observe_window = None: + 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 + """ + watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 + payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True) + retry = 0 + while True: + + try: + openai.api_type = "azure" + openai.api_version = AZURE_API_VERSION + openai.api_base = AZURE_ENDPOINT + openai.api_key = AZURE_API_KEY + response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break + + except: + retry += 1 + traceback.print_exc() + if retry > MAX_RETRY: raise TimeoutError + if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') + + + stream_response = response + result = '' + while True: + try: chunk = next(stream_response) + except StopIteration: + break + except: + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + + if len(chunk)==0: continue + if not chunk.startswith('data:'): + error_msg = get_full_error(chunk, stream_response) + if "reduce the length" in error_msg: + raise ConnectionAbortedError("AZURE OPENAI API拒绝了请求:" + error_msg) + else: + raise RuntimeError("AZURE OPENAI API拒绝了请求:" + error_msg) + if ('data: [DONE]' in chunk): break + + delta = chunk["delta"] + if len(delta) == 0: break + if "role" in delta: continue + if "content" in delta: + result += delta["content"] + if not console_slience: print(delta["content"], end='') + if observe_window is not None: + # 观测窗,把已经获取的数据显示出去 + if len(observe_window) >= 1: observe_window[0] += delta["content"] + # 看门狗,如果超过期限没有喂狗,则终止 + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: + raise RuntimeError("用户取消了程序。") + else: raise RuntimeError("意外Json结构:"+delta) + if chunk['finish_reason'] == 'length': + raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。") + return result + + +def generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream): + """ + 整合所有信息,选择LLM模型,生成 azure openai api请求,为发送请求做准备 + """ + + conversation_cnt = len(history) // 2 + + messages = [{"role": "system", "content": system_prompt}] + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]['content'] = what_gpt_answer['content'] + + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = inputs + messages.append(what_i_ask_now) + + payload = { + "model": llm_kwargs['llm_model'], + "messages": messages, + "temperature": llm_kwargs['temperature'], # 1.0, + "top_p": llm_kwargs['top_p'], # 1.0, + "n": 1, + "stream": stream, + "presence_penalty": 0, + "frequency_penalty": 0, + "engine": AZURE_ENGINE + } + try: + print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") + except: + print('输入中可能存在乱码。') + return payload + + diff --git a/toolbox.py b/toolbox.py index 10e5a875..c585db59 100644 --- a/toolbox.py +++ b/toolbox.py @@ -1,11 +1,12 @@ import markdown import importlib -import traceback +import time import inspect import re import os from latex2mathml.converter import convert as tex2mathml from functools import wraps, lru_cache +pj = os.path.join """ ======================================================================== @@ -70,6 +71,17 @@ def update_ui(chatbot, history, msg='正常', **kwargs): # 刷新界面 assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时,可用clear将其清空,然后用for+append循环重新赋值。" yield chatbot.get_cookies(), chatbot, history, msg +def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面 + """ + 刷新用户界面 + """ + if len(chatbot) == 0: chatbot.append(["update_ui_last_msg", lastmsg]) + chatbot[-1] = list(chatbot[-1]) + chatbot[-1][-1] = lastmsg + yield from update_ui(chatbot=chatbot, history=history) + time.sleep(delay) + + def trimmed_format_exc(): import os, traceback str = traceback.format_exc() @@ -83,7 +95,7 @@ def CatchException(f): """ @wraps(f) - def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): + def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1): try: yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT) except Exception as e: @@ -210,16 +222,21 @@ def text_divide_paragraph(text): """ 将文本按照段落分隔符分割开,生成带有段落标签的HTML代码。 """ + pre = '
' + suf = '
' + if text.startswith(pre) and text.endswith(suf): + return text + if '```' in text: # careful input - return text + return pre + text + suf else: # wtf input lines = text.split("\n") for i, line in enumerate(lines): lines[i] = lines[i].replace(" ", " ") text = "
".join(lines) - return text + return pre + text + suf @lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 def markdown_convertion(txt): @@ -331,8 +348,11 @@ def format_io(self, y): if y is None or y == []: return [] i_ask, gpt_reply = y[-1] - i_ask = text_divide_paragraph(i_ask) # 输入部分太自由,预处理一波 - gpt_reply = close_up_code_segment_during_stream(gpt_reply) # 当代码输出半截的时候,试着补上后个``` + # 输入部分太自由,预处理一波 + if i_ask is not None: i_ask = text_divide_paragraph(i_ask) + # 当代码输出半截的时候,试着补上后个``` + if gpt_reply is not None: gpt_reply = close_up_code_segment_during_stream(gpt_reply) + # process y[-1] = ( None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code', 'tables']), None if gpt_reply is None else markdown_convertion(gpt_reply) @@ -380,7 +400,7 @@ def extract_archive(file_path, dest_dir): print("Successfully extracted rar archive to {}".format(dest_dir)) except: print("Rar format requires additional dependencies to install") - return '\n\n需要安装pip install rarfile来解压rar文件' + return '\n\n解压失败! 需要安装pip install rarfile来解压rar文件' # 第三方库,需要预先pip install py7zr elif file_extension == '.7z': @@ -391,7 +411,7 @@ def extract_archive(file_path, dest_dir): print("Successfully extracted 7z archive to {}".format(dest_dir)) except: print("7z format requires additional dependencies to install") - return '\n\n需要安装pip install py7zr来解压7z文件' + return '\n\n解压失败! 需要安装pip install py7zr来解压7z文件' else: return '' return '' @@ -420,6 +440,17 @@ def find_recent_files(directory): return recent_files +def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): + # 将文件复制一份到下载区 + import shutil + if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}' + new_path = os.path.join(f'./gpt_log/', rename_file) + if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path) + if not os.path.exists(new_path): shutil.copyfile(file, new_path) + if chatbot: + if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote'] + else: current = [] + chatbot._cookies.update({'file_to_promote': [new_path] + current}) def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): """ @@ -459,14 +490,20 @@ def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): return chatbot, txt, txt2 -def on_report_generated(files, chatbot): +def on_report_generated(cookies, files, chatbot): from toolbox import find_recent_files - report_files = find_recent_files('gpt_log') + if 'file_to_promote' in cookies: + report_files = cookies['file_to_promote'] + cookies.pop('file_to_promote') + else: + report_files = find_recent_files('gpt_log') if len(report_files) == 0: return None, chatbot # files.extend(report_files) - chatbot.append(['报告如何远程获取?', '报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。']) - return report_files, chatbot + file_links = '' + for f in report_files: file_links += f'
{f}' + chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) + return cookies, report_files, chatbot def is_openai_api_key(key): API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key) @@ -728,6 +765,8 @@ def clip_history(inputs, history, tokenizer, max_token_limit): 其他小工具: - zip_folder: 把某个路径下所有文件压缩,然后转移到指定的另一个路径中(gpt写的) - gen_time_str: 生成时间戳 + - ProxyNetworkActivate: 临时地启动代理网络(如果有) + - objdump/objload: 快捷的调试函数 ======================================================================== """ @@ -762,11 +801,16 @@ def zip_folder(source_folder, dest_folder, zip_name): print(f"Zip file created at {zip_file}") +def zip_result(folder): + import time + t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + zip_folder(folder, './gpt_log/', f'{t}-result.zip') + return pj('./gpt_log/', f'{t}-result.zip') + def gen_time_str(): import time return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) - class ProxyNetworkActivate(): """ 这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理 @@ -775,12 +819,27 @@ class ProxyNetworkActivate(): from toolbox import get_conf proxies, = get_conf('proxies') if 'no_proxy' in os.environ: os.environ.pop('no_proxy') - os.environ['HTTP_PROXY'] = proxies['http'] - os.environ['HTTPS_PROXY'] = proxies['https'] + if proxies is not None: + if 'http' in proxies: os.environ['HTTP_PROXY'] = proxies['http'] + if 'https' in proxies: os.environ['HTTPS_PROXY'] = proxies['https'] return self def __exit__(self, exc_type, exc_value, traceback): os.environ['no_proxy'] = '*' if 'HTTP_PROXY' in os.environ: os.environ.pop('HTTP_PROXY') if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY') - return \ No newline at end of file + return + +def objdump(obj, file='objdump.tmp'): + import pickle + with open(file, 'wb+') as f: + pickle.dump(obj, f) + return + +def objload(file='objdump.tmp'): + import pickle, os + if not os.path.exists(file): + return + with open(file, 'rb') as f: + return pickle.load(f) + diff --git a/version b/version index ad75b2c4..6353b341 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.37, + "version": 3.42, "show_feature": true, - "new_feature": "修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持 <-> 提供复旦MOSS模型适配(启用需额外依赖) <-> 提供docker-compose方案兼容LLAMA盘古RWKV等模型的后端 <-> 新增Live2D装饰 <-> 完善对话历史的保存/载入/删除 <-> 保存对话功能" + "new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" }