比较提交

..

2 次代码提交

作者 SHA1 备注 提交日期
binary-husky
19a24523e5 add legacy fallback option 2024-10-13 08:15:58 +00:00
wsg1873
76685040af solve the pdf concatenate error. 2024-10-13 08:03:31 +00:00
共有 4 个文件被更改,包括 70 次插入133 次删除

查看文件

@@ -1,51 +0,0 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: build-with-latex-arm
on:
push:
branches:
- "master"
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_with_latex_arm
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
push: true
platforms: linux/arm64
file: docs/GithubAction+NoLocal+Latex
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

查看文件

@@ -697,6 +697,15 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
),
0,
)
if "/Annots" in page1:
page1_annot_id = [annot.idnum for annot in page1["/Annots"]]
else:
page1_annot_id = []
if "/Annots" in page2:
page2_annot_id = [annot.idnum for annot in page2["/Annots"]]
else:
page2_annot_id = []
if "/Annots" in new_page:
annotations = new_page["/Annots"]
for i, annot in enumerate(annotations):
@@ -711,8 +720,7 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
if "/S" in action and action["/S"] == "/GoTo":
# 内部链接:跳转到文档中的某个页面
dest = action.get("/D") # 目标页或目标位置
# if dest and annot.idnum in page2_annot_id:
if dest in pdf2_reader.named_destinations:
if dest and annot.idnum in page2_annot_id:
# 获取原始文件中跳转信息,包括跳转页面
destination = pdf2_reader.named_destinations[
dest
@@ -724,39 +732,24 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
)
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
# “/D”:[10,'/XYZ',100,100,0]
if destination.dest_array[1] == "/XYZ":
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
FloatObject(
destination.dest_array[
2
]
+ int(
page1.mediaBox.getWidth()
)
),
destination.dest_array[3],
destination.dest_array[4],
]
) # 确保键和值是 PdfObject
}
)
else:
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
]
) # 确保键和值是 PdfObject
}
)
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
FloatObject(
destination.dest_array[2]
+ int(
page1.mediaBox.getWidth()
)
),
destination.dest_array[3],
destination.dest_array[4],
]
) # 确保键和值是 PdfObject
}
)
rect = annot_obj.get("/Rect")
# 更新点击坐标
rect = ArrayObject(
@@ -780,9 +773,7 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
): rect # 确保键和值是 PdfObject
}
)
# if dest and annot.idnum in page1_annot_id:
if dest in pdf1_reader.named_destinations:
if dest and annot.idnum in page1_annot_id:
# 获取原始文件中跳转信息,包括跳转页面
destination = pdf1_reader.named_destinations[
dest
@@ -794,36 +785,21 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
)
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
# “/D”:[10,'/XYZ',100,100,0]
if destination.dest_array[1] == "/XYZ":
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
FloatObject(
destination.dest_array[
2
]
),
destination.dest_array[3],
destination.dest_array[4],
]
) # 确保键和值是 PdfObject
}
)
else:
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
]
) # 确保键和值是 PdfObject
}
)
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
FloatObject(
destination.dest_array[2]
),
destination.dest_array[3],
destination.dest_array[4],
]
) # 确保键和值是 PdfObject
}
)
rect = annot_obj.get("/Rect")
rect = ArrayObject(
[
@@ -844,12 +820,14 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
elif "/S" in action and action["/S"] == "/URI":
# 外部链接跳转到某个URI
uri = action.get("/URI")
output_writer.addPage(new_page)
# Save the merged PDF file
with open(output_path, "wb") as output_file:
output_writer.write(output_file)
def _merge_pdfs_legacy(pdf1_path, pdf2_path, output_path):
import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放

查看文件

@@ -3,19 +3,33 @@
# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex .
# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
FROM menghuan1918/ubuntu_uv_ctex:latest
ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-c"]
FROM fuqingxu/python311_texlive_ctex:latest
ENV PATH "$PATH:/usr/local/texlive/2022/bin/x86_64-linux"
ENV PATH "$PATH:/usr/local/texlive/2023/bin/x86_64-linux"
ENV PATH "$PATH:/usr/local/texlive/2024/bin/x86_64-linux"
ENV PATH "$PATH:/usr/local/texlive/2025/bin/x86_64-linux"
ENV PATH "$PATH:/usr/local/texlive/2026/bin/x86_64-linux"
# 指定路径
WORKDIR /gpt
RUN pip3 install openai numpy arxiv rich
RUN pip3 install colorama Markdown pygments pymupdf
RUN pip3 install python-docx pdfminer
RUN pip3 install nougat-ocr
# 装载项目文件
COPY . .
RUN /root/.cargo/bin/uv venv --seed \
&& source .venv/bin/activate \
&& /root/.cargo/bin/uv pip install openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \
&& /root/.cargo/bin/uv pip install -r requirements.txt \
&& /root/.cargo/bin/uv clean
# 安装依赖
RUN pip3 install -r requirements.txt
# edge-tts需要的依赖
RUN apt update && apt install ffmpeg -y
# 可选步骤,用于预热模块
RUN .venv/bin/python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
# 启动
CMD [".venv/bin/python3", "-u", "main.py"]
CMD ["python3", "-u", "main.py"]

查看文件

@@ -256,8 +256,6 @@ model_info = {
"max_token": 128000,
"tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4,
"openai_disable_system_prompt": True,
"openai_disable_stream": True,
},
"o1-mini": {
"fn_with_ui": chatgpt_ui,
@@ -266,8 +264,6 @@ model_info = {
"max_token": 128000,
"tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4,
"openai_disable_system_prompt": True,
"openai_disable_stream": True,
},
"gpt-4-turbo": {