add legacy fallback option

solve the pdf concatenate error.
2025-12-07 15:06:48 +00:00 · 2024-10-13 08:15:58 +00:00 · 2024-10-13 08:03:31 +00:00
--- a/.github/workflows/build-with-latex-arm.yml
+++ b/.github/workflows/build-with-latex-arm.yml
@@ -1,51 +0,0 @@
-# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
-name: build-with-latex-arm
-
-on:
-  push:
-    branches:
-      - "master"
-
-env:
-  REGISTRY: ghcr.io
-  IMAGE_NAME: ${{ github.repository }}_with_latex_arm
-
-jobs:
-  build-and-push-image:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-
-    steps:
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata (tags, labels) for Docker
-        id: meta
-        uses: docker/metadata-action@v4
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-
-      - name: Build and push Docker image
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          push: true
-          platforms: linux/arm64
-          file: docs/GithubAction+NoLocal+Latex
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
--- a/crazy_functions/latex_fns/latex_toolbox.py
+++ b/crazy_functions/latex_fns/latex_toolbox.py
@@ -697,6 +697,15 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
                    ),
                    0,
                )
+                if "/Annots" in page1:
+                    page1_annot_id = [annot.idnum for annot in page1["/Annots"]]
+                else:
+                    page1_annot_id = []
+
+                if "/Annots" in page2:
+                    page2_annot_id = [annot.idnum for annot in page2["/Annots"]]
+                else:
+                    page2_annot_id = []
                if "/Annots" in new_page:
                    annotations = new_page["/Annots"]
                    for i, annot in enumerate(annotations):
@@ -711,8 +720,7 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
                                if "/S" in action and action["/S"] == "/GoTo":
                                    # 内部链接：跳转到文档中的某个页面
                                    dest = action.get("/D")  # 目标页或目标位置
-                                    # if dest and annot.idnum in page2_annot_id:
-                                    if dest in pdf2_reader.named_destinations:
+                                    if dest and annot.idnum in page2_annot_id:
                                        # 获取原始文件中跳转信息，包括跳转页面
                                        destination = pdf2_reader.named_destinations[
                                            dest
@@ -724,39 +732,24 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
                                        )
                                        # 更新跳转信息，跳转到对应的页面和，指定坐标 (100, 150)，缩放比例为 100%
                                        # “/D”:[10,'/XYZ',100,100,0]
-                                        if destination.dest_array[1] == "/XYZ":
-                                            annot_obj["/A"].update(
-                                                {
-                                                    NameObject("/D"): ArrayObject(
-                                                        [
-                                                            NumberObject(page_number),
-                                                            destination.dest_array[1],
-                                                            FloatObject(
-                                                                destination.dest_array[
-                                                                    2
-                                                                ]
-                                                                + int(
-                                                                    page1.mediaBox.getWidth()
-                                                                )
-                                                            ),
-                                                            destination.dest_array[3],
-                                                            destination.dest_array[4],
-                                                        ]
-                                                    )  # 确保键和值是 PdfObject
-                                                }
-                                            )
-                                        else:
-                                            annot_obj["/A"].update(
-                                                {
-                                                    NameObject("/D"): ArrayObject(
-                                                        [
-                                                            NumberObject(page_number),
-                                                            destination.dest_array[1],
-                                                        ]
-                                                    )  # 确保键和值是 PdfObject
-                                                }
-                                            )
-
+                                        annot_obj["/A"].update(
+                                            {
+                                                NameObject("/D"): ArrayObject(
+                                                    [
+                                                        NumberObject(page_number),
+                                                        destination.dest_array[1],
+                                                        FloatObject(
+                                                            destination.dest_array[2]
+                                                            + int(
+                                                                page1.mediaBox.getWidth()
+                                                            )
+                                                        ),
+                                                        destination.dest_array[3],
+                                                        destination.dest_array[4],
+                                                    ]
+                                                )  # 确保键和值是 PdfObject
+                                            }
+                                        )
                                        rect = annot_obj.get("/Rect")
                                        # 更新点击坐标
                                        rect = ArrayObject(
@@ -780,9 +773,7 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
                                                ): rect  # 确保键和值是 PdfObject
                                            }
                                        )
-                                    # if dest and annot.idnum in page1_annot_id:
-                                    if dest in pdf1_reader.named_destinations:
-
+                                    if dest and annot.idnum in page1_annot_id:
                                        # 获取原始文件中跳转信息，包括跳转页面
                                        destination = pdf1_reader.named_destinations[
                                            dest
@@ -794,36 +785,21 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
                                        )
                                        # 更新跳转信息，跳转到对应的页面和，指定坐标 (100, 150)，缩放比例为 100%
                                        # “/D”:[10,'/XYZ',100,100,0]
-                                        if destination.dest_array[1] == "/XYZ":
-                                            annot_obj["/A"].update(
-                                                {
-                                                    NameObject("/D"): ArrayObject(
-                                                        [
-                                                            NumberObject(page_number),
-                                                            destination.dest_array[1],
-                                                            FloatObject(
-                                                                destination.dest_array[
-                                                                    2
-                                                                ]
-                                                            ),
-                                                            destination.dest_array[3],
-                                                            destination.dest_array[4],
-                                                        ]
-                                                    )  # 确保键和值是 PdfObject
-                                                }
-                                            )
-                                        else:
-                                            annot_obj["/A"].update(
-                                                {
-                                                    NameObject("/D"): ArrayObject(
-                                                        [
-                                                            NumberObject(page_number),
-                                                            destination.dest_array[1],
-                                                        ]
-                                                    )  # 确保键和值是 PdfObject
-                                                }
-                                            )
-
+                                        annot_obj["/A"].update(
+                                            {
+                                                NameObject("/D"): ArrayObject(
+                                                    [
+                                                        NumberObject(page_number),
+                                                        destination.dest_array[1],
+                                                        FloatObject(
+                                                            destination.dest_array[2]
+                                                        ),
+                                                        destination.dest_array[3],
+                                                        destination.dest_array[4],
+                                                    ]
+                                                )  # 确保键和值是 PdfObject
+                                            }
+                                        )
                                        rect = annot_obj.get("/Rect")
                                        rect = ArrayObject(
                                            [
@@ -844,12 +820,14 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
                                elif "/S" in action and action["/S"] == "/URI":
                                    # 外部链接：跳转到某个URI
                                    uri = action.get("/URI")
+
                output_writer.addPage(new_page)
            # Save the merged PDF file
            with open(output_path, "wb") as output_file:
                output_writer.write(output_file)


+
 def _merge_pdfs_legacy(pdf1_path, pdf2_path, output_path):
    import PyPDF2  # PyPDF2这个库有严重的内存泄露问题，把它放到子进程中运行，从而方便内存的释放

--- a/docs/GithubAction+NoLocal+Latex
+++ b/docs/GithubAction+NoLocal+Latex
@@ -3,19 +3,33 @@
 # - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex .
 # - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex

-FROM menghuan1918/ubuntu_uv_ctex:latest
-ENV DEBIAN_FRONTEND=noninteractive
-SHELL ["/bin/bash", "-c"]
+FROM fuqingxu/python311_texlive_ctex:latest
+ENV PATH "$PATH:/usr/local/texlive/2022/bin/x86_64-linux"
+ENV PATH "$PATH:/usr/local/texlive/2023/bin/x86_64-linux"
+ENV PATH "$PATH:/usr/local/texlive/2024/bin/x86_64-linux"
+ENV PATH "$PATH:/usr/local/texlive/2025/bin/x86_64-linux"
+ENV PATH "$PATH:/usr/local/texlive/2026/bin/x86_64-linux"
+
+# 指定路径
 WORKDIR /gpt
+
+RUN pip3 install openai numpy arxiv rich
+RUN pip3 install colorama Markdown pygments pymupdf
+RUN pip3 install python-docx pdfminer
+RUN pip3 install nougat-ocr
+
+# 装载项目文件
 COPY . .
-RUN /root/.cargo/bin/uv venv --seed \
-    && source .venv/bin/activate \
-    && /root/.cargo/bin/uv pip install openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \
-    && /root/.cargo/bin/uv pip install -r requirements.txt \
-    && /root/.cargo/bin/uv clean
+
+
+# 安装依赖
+RUN pip3 install -r requirements.txt
+
+# edge-tts需要的依赖
+RUN apt update && apt install ffmpeg -y

 # 可选步骤，用于预热模块
-RUN .venv/bin/python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
+RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'

 # 启动
-CMD [".venv/bin/python3", "-u", "main.py"]
+CMD ["python3", "-u", "main.py"]
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -256,8 +256,6 @@ model_info = {
        "max_token": 128000,
        "tokenizer": tokenizer_gpt4,
        "token_cnt": get_token_num_gpt4,
-        "openai_disable_system_prompt": True,
-        "openai_disable_stream": True,
    },
    "o1-mini": {
        "fn_with_ui": chatgpt_ui,
@@ -266,8 +264,6 @@ model_info = {
        "max_token": 128000,
        "tokenizer": tokenizer_gpt4,
        "token_cnt": get_token_num_gpt4,
-        "openai_disable_system_prompt": True,
-        "openai_disable_stream": True,
    },

    "gpt-4-turbo": {
作者	SHA1	备注	提交日期
binary-husky	19a24523e5	add legacy fallback option	2024-10-13 08:15:58 +00:00
wsg1873	76685040af	solve the pdf concatenate error.	2024-10-13 08:03:31 +00:00