Merge branch 'master' into frontier

2025-12-06 06:26:47 +00:00 · 2024-10-13 08:25:47 +00:00
--- a/.github/workflows/build-with-all-capacity-beta.yml
+++ b/.github/workflows/build-with-all-capacity-beta.yml
@@ -1,44 +0,0 @@
 # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
 name: build-with-all-capacity-beta
 on:
  push:
    branches:
      - 'master'
 env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}_with_all_capacity_beta
 jobs:
  build-and-push-image:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Log in to the Container registry
        uses: docker/login-action@v2
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Extract metadata (tags, labels) for Docker
        id: meta
        uses: docker/metadata-action@v4
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
      - name: Build and push Docker image
        uses: docker/build-push-action@v4
        with:
          context: .
          push: true
          file: docs/GithubAction+AllCapacityBeta
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/build-with-jittorllms.yml
+++ b/.github/workflows/build-with-jittorllms.yml
@@ -1,44 +0,0 @@
 # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
 name: build-with-jittorllms
 on:
  push:
    branches:
      - 'master'
 env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}_jittorllms
 jobs:
  build-and-push-image:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Log in to the Container registry
        uses: docker/login-action@v2
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Extract metadata (tags, labels) for Docker
        id: meta
        uses: docker/metadata-action@v4
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
      - name: Build and push Docker image
        uses: docker/build-push-action@v4
        with:
          context: .
          push: true
          file: docs/GithubAction+JittorLLMs
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 > [!IMPORTANT]
-> 2024.6.1: 版本3.80加入插件二级菜单功能（详见wiki）  
+> 2024.10.10: 突发停电，紧急恢复了提供[whl包](https://drive.google.com/file/d/19U_hsLoMrjOlQSzYS3pzWX9fTzyusArP/view?usp=sharing)的文件服务器  
 > 2024.10.8: 版本3.90加入对llama-index的初步支持，版本3.80加入插件二级菜单功能（详见wiki）  
 > 2024.5.1: 加入Doc2x翻译PDF论文的功能，[查看详情](https://github.com/binary-husky/gpt_academic/wiki/Doc2x)  
 > 2024.3.11: 全力支持Qwen、GLM、DeepseekCoder等中文大语言模型！ SoVits语音克隆模块，[查看详情](https://www.bilibili.com/video/BV1Rp421S7tF/) 
 > 2024.1.17: 安装依赖时，请选择`requirements.txt`中**指定的版本**。 安装命令：`pip install -r requirements.txt`。本项目完全开源免费，您可通过订阅[在线服务](https://github.com/binary-husky/gpt_academic/wiki/online)的方式鼓励本项目的发展。
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -6,7 +6,6 @@ from loguru import logger
 def get_crazy_functions():
    from crazy_functions.读文章写摘要 import 读文章写摘要
    from crazy_functions.生成函数注释 import 批量生成函数注释
    from crazy_functions.Rag_Interface import Rag问答
    from crazy_functions.SourceCode_Analyse import 解析项目本身
    from crazy_functions.SourceCode_Analyse import 解析一个Python项目
    from crazy_functions.SourceCode_Analyse import 解析一个Matlab项目
@@ -52,13 +51,6 @@ def get_crazy_functions():
    from crazy_functions.SourceCode_Comment import 注释Python项目
    function_plugins = {
        "Rag智能召回": {
            "Group": "对话",
            "Color": "stop",
            "AsButton": False,
            "Info": "将问答数据记录到向量库中，作为长期参考。",
            "Function": HotReload(Rag问答),
        },
        "虚空终端": {
            "Group": "对话|编程|学术|智能体",
            "Color": "stop",
@@ -707,6 +699,31 @@ def get_crazy_functions():
        logger.error(trimmed_format_exc())
        logger.error("Load function plugin failed")
    try:
        from crazy_functions.Rag_Interface import Rag问答
        function_plugins.update(
            {
                "Rag智能召回": {
                    "Group": "对话",
                    "Color": "stop",
                    "AsButton": False,
                    "Info": "将问答数据记录到向量库中，作为长期参考。",
                    "Function": HotReload(Rag问答),
                },
            }
        )
    except:
        logger.error(trimmed_format_exc())
        logger.error("Load function plugin failed")
    # try:
    #     from crazy_functions.高级功能函数模板 import 测试图表渲染
    #     function_plugins.update({
--- a/crazy_functions/Rag_Interface.py
+++ b/crazy_functions/Rag_Interface.py
@@ -2,20 +2,7 @@ from toolbox import CatchException, update_ui, get_conf, get_log_folder, update_
 from crazy_functions.crazy_utils import input_clipping
 from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 VECTOR_STORE_TYPE = "Milvus"
 if VECTOR_STORE_TYPE == "Milvus":
    try:
        from crazy_functions.rag_fns.milvus_worker import MilvusRagWorker as LlamaIndexRagWorker
    except:
        VECTOR_STORE_TYPE = "Simple"
 if VECTOR_STORE_TYPE == "Simple":
    from crazy_functions.rag_fns.llama_index_worker import LlamaIndexRagWorker
 RAG_WORKER_REGISTER = {}
 MAX_HISTORY_ROUND = 5
 MAX_CONTEXT_TOKEN_LIMIT = 4096
 REMEMBER_PREVIEW = 1000
@@ -23,6 +10,16 @@ REMEMBER_PREVIEW = 1000
@CatchException
 def Rag问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
    # import vector store lib
    VECTOR_STORE_TYPE = "Milvus"
    if VECTOR_STORE_TYPE == "Milvus":
        try:
            from crazy_functions.rag_fns.milvus_worker import MilvusRagWorker as LlamaIndexRagWorker
        except:
            VECTOR_STORE_TYPE = "Simple"
    if VECTOR_STORE_TYPE == "Simple":
        from crazy_functions.rag_fns.llama_index_worker import LlamaIndexRagWorker
    # 1. we retrieve rag worker from global context
    user_name = chatbot.get_user()
    checkpoint_dir = get_log_folder(user_name, plugin_name='experimental_rag')
--- a/crazy_functions/latex_fns/latex_toolbox.py
+++ b/crazy_functions/latex_fns/latex_toolbox.py
@@ -644,6 +644,191 @@ def run_in_subprocess(func):
 def _merge_pdfs(pdf1_path, pdf2_path, output_path):
    try:
        logger.info("Merging PDFs using _merge_pdfs_ng")
        _merge_pdfs_ng(pdf1_path, pdf2_path, output_path)
    except:
        logger.info("Merging PDFs using _merge_pdfs_legacy")
        _merge_pdfs_legacy(pdf1_path, pdf2_path, output_path)
 def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
    import PyPDF2  # PyPDF2这个库有严重的内存泄露问题，把它放到子进程中运行，从而方便内存的释放
    from PyPDF2.generic import NameObject, TextStringObject, ArrayObject, FloatObject, NumberObject
    Percent = 1
    # raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.')
    # Open the first PDF file
    with open(pdf1_path, "rb") as pdf1_file:
        pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
        # Open the second PDF file
        with open(pdf2_path, "rb") as pdf2_file:
            pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
            # Create a new PDF file to store the merged pages
            output_writer = PyPDF2.PdfFileWriter()
            # Determine the number of pages in each PDF file
            num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
            # Merge the pages from the two PDF files
            for page_num in range(num_pages):
                # Add the page from the first PDF file
                if page_num < pdf1_reader.numPages:
                    page1 = pdf1_reader.getPage(page_num)
                else:
                    page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
                # Add the page from the second PDF file
                if page_num < pdf2_reader.numPages:
                    page2 = pdf2_reader.getPage(page_num)
                else:
                    page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
                # Create a new empty page with double width
                new_page = PyPDF2.PageObject.createBlankPage(
                    width=int(
                        int(page1.mediaBox.getWidth())
                        + int(page2.mediaBox.getWidth()) * Percent
                    ),
                    height=max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight()),
                )
                new_page.mergeTranslatedPage(page1, 0, 0)
                new_page.mergeTranslatedPage(
                    page2,
                    int(
                        int(page1.mediaBox.getWidth())
                        - int(page2.mediaBox.getWidth()) * (1 - Percent)
                    ),
                    0,
                )
                if "/Annots" in page1:
                    page1_annot_id = [annot.idnum for annot in page1["/Annots"]]
                else:
                    page1_annot_id = []
                if "/Annots" in page2:
                    page2_annot_id = [annot.idnum for annot in page2["/Annots"]]
                else:
                    page2_annot_id = []
                if "/Annots" in new_page:
                    annotations = new_page["/Annots"]
                    for i, annot in enumerate(annotations):
                        annot_obj = annot.get_object()
                        # 检查注释类型是否是链接（/Link）
                        if annot_obj.get("/Subtype") == "/Link":
                            # 检查是否为内部链接跳转（/GoTo）或外部URI链接（/URI）
                            action = annot_obj.get("/A")
                            if action:
                                if "/S" in action and action["/S"] == "/GoTo":
                                    # 内部链接：跳转到文档中的某个页面
                                    dest = action.get("/D")  # 目标页或目标位置
                                    if dest and annot.idnum in page2_annot_id:
                                        # 获取原始文件中跳转信息，包括跳转页面
                                        destination = pdf2_reader.named_destinations[
                                            dest
                                        ]
                                        page_number = (
                                            pdf2_reader.get_destination_page_number(
                                                destination
                                            )
                                        )
                                        # 更新跳转信息，跳转到对应的页面和，指定坐标 (100, 150)，缩放比例为 100%
                                        # “/D”:[10,'/XYZ',100,100,0]
                                        annot_obj["/A"].update(
                                            {
                                                NameObject("/D"): ArrayObject(
                                                    [
                                                        NumberObject(page_number),
                                                        destination.dest_array[1],
                                                        FloatObject(
                                                            destination.dest_array[2]
                                                            + int(
                                                                page1.mediaBox.getWidth()
                                                            )
                                                        ),
                                                        destination.dest_array[3],
                                                        destination.dest_array[4],
                                                    ]
                                                )  # 确保键和值是 PdfObject
                                            }
                                        )
                                        rect = annot_obj.get("/Rect")
                                        # 更新点击坐标
                                        rect = ArrayObject(
                                            [
                                                FloatObject(
                                                    rect[0]
                                                    + int(page1.mediaBox.getWidth())
                                                ),
                                                rect[1],
                                                FloatObject(
                                                    rect[2]
                                                    + int(page1.mediaBox.getWidth())
                                                ),
                                                rect[3],
                                            ]
                                        )
                                        annot_obj.update(
                                            {
                                                NameObject(
                                                    "/Rect"
                                                ): rect  # 确保键和值是 PdfObject
                                            }
                                        )
                                    if dest and annot.idnum in page1_annot_id:
                                        # 获取原始文件中跳转信息，包括跳转页面
                                        destination = pdf1_reader.named_destinations[
                                            dest
                                        ]
                                        page_number = (
                                            pdf1_reader.get_destination_page_number(
                                                destination
                                            )
                                        )
                                        # 更新跳转信息，跳转到对应的页面和，指定坐标 (100, 150)，缩放比例为 100%
                                        # “/D”:[10,'/XYZ',100,100,0]
                                        annot_obj["/A"].update(
                                            {
                                                NameObject("/D"): ArrayObject(
                                                    [
                                                        NumberObject(page_number),
                                                        destination.dest_array[1],
                                                        FloatObject(
                                                            destination.dest_array[2]
                                                        ),
                                                        destination.dest_array[3],
                                                        destination.dest_array[4],
                                                    ]
                                                )  # 确保键和值是 PdfObject
                                            }
                                        )
                                        rect = annot_obj.get("/Rect")
                                        rect = ArrayObject(
                                            [
                                                FloatObject(rect[0]),
                                                rect[1],
                                                FloatObject(rect[2]),
                                                rect[3],
                                            ]
                                        )
                                        annot_obj.update(
                                            {
                                                NameObject(
                                                    "/Rect"
                                                ): rect  # 确保键和值是 PdfObject
                                            }
                                        )
                                elif "/S" in action and action["/S"] == "/URI":
                                    # 外部链接：跳转到某个URI
                                    uri = action.get("/URI")
                output_writer.addPage(new_page)
            # Save the merged PDF file
            with open(output_path, "wb") as output_file:
                output_writer.write(output_file)
 def _merge_pdfs_legacy(pdf1_path, pdf2_path, output_path):
    import PyPDF2  # PyPDF2这个库有严重的内存泄露问题，把它放到子进程中运行，从而方便内存的释放
    Percent = 0.95
--- a/docs/Dockerfile+JittorLLM
+++ b/docs/Dockerfile+JittorLLM
@@ -1 +0,0 @@
 # 此Dockerfile不再维护，请前往docs/GithubAction+JittorLLMs
--- a/docs/GithubAction+AllCapacityBeta
+++ b/docs/GithubAction+AllCapacityBeta
@@ -1,57 +0,0 @@
 # docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacity  --network=host --build-arg http_proxy=http://localhost:10881 --build-arg https_proxy=http://localhost:10881 .
 # docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacityBeta  --network=host .
 # docker run -it --net=host gpt-academic-all-capacity  bash
 # 从NVIDIA源，从而支持显卡（检查宿主的nvidia-smi中的cuda版本必须>=11.3）
 FROM fuqingxu/11.3.1-runtime-ubuntu20.04-with-texlive:latest
 # edge-tts需要的依赖，某些pip包所需的依赖
 RUN apt update && apt install ffmpeg build-essential -y
 # use python3 as the system default python
 WORKDIR /gpt
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
 # # 非必要步骤，更换pip源 （以下三行，可以删除）
 # RUN echo '[global]' > /etc/pip.conf && \
 #     echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
 #     echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
 # 下载pytorch
 RUN python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
 # 准备pip依赖
 RUN python3 -m pip install openai numpy arxiv rich
 RUN python3 -m pip install colorama Markdown pygments pymupdf
 RUN python3 -m pip install python-docx moviepy pdfminer
 RUN python3 -m pip install zh_langchain==0.2.1 pypinyin
 RUN python3 -m pip install rarfile py7zr
 RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
 # 下载分支
 WORKDIR /gpt
 RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
 WORKDIR /gpt/gpt_academic
 RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss
 RUN python3 -m pip install -r requirements.txt
 RUN python3 -m pip install -r request_llms/requirements_moss.txt
 RUN python3 -m pip install -r request_llms/requirements_qwen.txt
 RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
 RUN python3 -m pip install -r request_llms/requirements_newbing.txt
 RUN python3 -m pip install nougat-ocr
 # 预热Tiktoken模块
 RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'
 # 安装知识库插件的额外依赖
 RUN apt-get update && apt-get install libgl1 -y
 RUN pip3 install transformers protobuf langchain sentence-transformers  faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
 RUN pip3 install unstructured[all-docs] --upgrade
 RUN python3  -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
 RUN rm -rf /usr/local/lib/python3.8/dist-packages/tests
 # COPY .cache /root/.cache
 # COPY config_private.py config_private.py
 # 启动
 CMD ["python3", "-u", "main.py"]
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,14 +2,15 @@ https://public.agent-matrix.com/publish/gradio-3.32.10-py3-none-any.whl
 fastapi==0.110
 gradio-client==0.8
 pypdf2==2.12.1
 httpx<=0.25.2
 zhipuai==2.0.1
 tiktoken>=0.3.3
 requests[socks]
-pydantic==2.5.2
+pydantic==2.9.2
 llama-index~=0.10
 protobuf==3.20
 transformers>=4.27.1,<4.42
 scipdf_parser>=0.52
 spacy==3.7.4
 anthropic>=0.18.1
 python-markdown-math
 pymdown-extensions
@@ -32,3 +33,14 @@ loguru
 arxiv
 numpy
 rich
 llama-index-core==0.10.68
 llama-index-legacy==0.9.48
 llama-index-readers-file==0.1.33
 llama-index-readers-llama-parse==0.1.6
 llama-index-embeddings-azure-openai==0.1.10
 llama-index-embeddings-openai==0.1.10
 llama-parse==0.4.9
 mdit-py-plugins>=0.3.3
 linkify-it-py==2.0.3
--- a/tests/test_anim_gen.py
+++ b/tests/test_anim_gen.py
@@ -0,0 +1,12 @@
 """
 对项目中的各个插件进行测试。运行方法：直接运行 python tests/test_plugins.py
 """
 import init_test
 import os, sys
 if __name__ == "__main__":
    from test_utils import plugin_test
    plugin_test(plugin='crazy_functions.数学动画生成manim->动画生成', main_input="A point moving along function culve y=sin(x), starting from x=0 and stop at x=4*\pi.")
--- a/4
+++ b/4
@@ -1,5 +1,5 @@
 {
-  "version": 3.83,
+  "version": 3.90,
  "show_feature": true,
-  "new_feature": "增加欢迎页面 <-> 优化图像生成插件 <-> 添加紫东太初大模型支持 <-> 保留主题选择 <-> 支持更复杂的插件框架 <-> 上传文件时显示进度条"
+  "new_feature": "增加RAG组件 <-> 升级多合一主提交键"
 }
		`@@ -1 +0,0 @@`
			`# 此Dockerfile不再维护，请前往docs/GithubAction+JittorLLMs`