镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
比较提交
8 次代码提交
version3.9
...
boyin_essa
| 作者 | SHA1 | 提交日期 | |
|---|---|---|---|
|
|
c83bf214d0 | ||
|
|
e34c49dce5 | ||
|
|
3890467c84 | ||
|
|
074b3c9828 | ||
|
|
b8e8457a01 | ||
|
|
2c93a24d7e | ||
|
|
e9af6ef3a0 | ||
|
|
5ae8981dbb |
2
.github/workflows/build-with-latex-arm.yml
vendored
2
.github/workflows/build-with-latex-arm.yml
vendored
@@ -46,6 +46,6 @@ jobs:
|
||||
context: .
|
||||
push: true
|
||||
platforms: linux/arm64
|
||||
file: docs/GithubAction+NoLocal+Latex
|
||||
file: docs/GithubAction+NoLocal+Latex+Arm
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
@@ -138,25 +138,43 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
|
||||
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
|
||||
if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
|
||||
|
||||
url_tar = url_.replace('/abs/', '/e-print/')
|
||||
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
|
||||
extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
|
||||
os.makedirs(translation_dir, exist_ok=True)
|
||||
|
||||
# <-------------- download arxiv source file ------------->
|
||||
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
|
||||
dst = pj(translation_dir, arxiv_id + '.tar')
|
||||
if os.path.exists(dst):
|
||||
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
|
||||
else:
|
||||
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
|
||||
os.makedirs(translation_dir, exist_ok=True)
|
||||
# <-------------- download arxiv source file ------------->
|
||||
|
||||
def fix_url_and_download():
|
||||
# for url_tar in [url_.replace('/abs/', '/e-print/'), url_.replace('/abs/', '/src/')]:
|
||||
for url_tar in [url_.replace('/abs/', '/src/'), url_.replace('/abs/', '/e-print/')]:
|
||||
proxies = get_conf('proxies')
|
||||
r = requests.get(url_tar, proxies=proxies)
|
||||
if r.status_code == 200:
|
||||
with open(dst, 'wb+') as f:
|
||||
f.write(r.content)
|
||||
return True
|
||||
return False
|
||||
|
||||
if os.path.exists(dst) and allow_cache:
|
||||
yield from update_ui_lastest_msg(f"调用缓存 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
|
||||
success = True
|
||||
else:
|
||||
yield from update_ui_lastest_msg(f"开始下载 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
|
||||
success = fix_url_and_download()
|
||||
yield from update_ui_lastest_msg(f"下载完成 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
if not success:
|
||||
yield from update_ui_lastest_msg(f"下载失败 {arxiv_id}", chatbot=chatbot, history=history)
|
||||
raise tarfile.ReadError(f"论文下载失败 {arxiv_id}")
|
||||
|
||||
# <-------------- extract file ------------->
|
||||
yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
|
||||
from toolbox import extract_archive
|
||||
try:
|
||||
extract_archive(file_path=dst, dest_dir=extract_dst)
|
||||
except tarfile.ReadError:
|
||||
os.remove(dst)
|
||||
raise tarfile.ReadError(f"论文下载失败")
|
||||
return extract_dst, arxiv_id
|
||||
|
||||
|
||||
|
||||
@@ -697,15 +697,6 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
),
|
||||
0,
|
||||
)
|
||||
if "/Annots" in page1:
|
||||
page1_annot_id = [annot.idnum for annot in page1["/Annots"]]
|
||||
else:
|
||||
page1_annot_id = []
|
||||
|
||||
if "/Annots" in page2:
|
||||
page2_annot_id = [annot.idnum for annot in page2["/Annots"]]
|
||||
else:
|
||||
page2_annot_id = []
|
||||
if "/Annots" in new_page:
|
||||
annotations = new_page["/Annots"]
|
||||
for i, annot in enumerate(annotations):
|
||||
@@ -720,7 +711,8 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
if "/S" in action and action["/S"] == "/GoTo":
|
||||
# 内部链接:跳转到文档中的某个页面
|
||||
dest = action.get("/D") # 目标页或目标位置
|
||||
if dest and annot.idnum in page2_annot_id:
|
||||
# if dest and annot.idnum in page2_annot_id:
|
||||
if dest in pdf2_reader.named_destinations:
|
||||
# 获取原始文件中跳转信息,包括跳转页面
|
||||
destination = pdf2_reader.named_destinations[
|
||||
dest
|
||||
@@ -732,6 +724,7 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
)
|
||||
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
|
||||
# “/D”:[10,'/XYZ',100,100,0]
|
||||
if destination.dest_array[1] == "/XYZ":
|
||||
annot_obj["/A"].update(
|
||||
{
|
||||
NameObject("/D"): ArrayObject(
|
||||
@@ -739,7 +732,9 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
NumberObject(page_number),
|
||||
destination.dest_array[1],
|
||||
FloatObject(
|
||||
destination.dest_array[2]
|
||||
destination.dest_array[
|
||||
2
|
||||
]
|
||||
+ int(
|
||||
page1.mediaBox.getWidth()
|
||||
)
|
||||
@@ -750,6 +745,18 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
) # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
else:
|
||||
annot_obj["/A"].update(
|
||||
{
|
||||
NameObject("/D"): ArrayObject(
|
||||
[
|
||||
NumberObject(page_number),
|
||||
destination.dest_array[1],
|
||||
]
|
||||
) # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
|
||||
rect = annot_obj.get("/Rect")
|
||||
# 更新点击坐标
|
||||
rect = ArrayObject(
|
||||
@@ -773,7 +780,9 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
): rect # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
if dest and annot.idnum in page1_annot_id:
|
||||
# if dest and annot.idnum in page1_annot_id:
|
||||
if dest in pdf1_reader.named_destinations:
|
||||
|
||||
# 获取原始文件中跳转信息,包括跳转页面
|
||||
destination = pdf1_reader.named_destinations[
|
||||
dest
|
||||
@@ -785,6 +794,7 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
)
|
||||
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
|
||||
# “/D”:[10,'/XYZ',100,100,0]
|
||||
if destination.dest_array[1] == "/XYZ":
|
||||
annot_obj["/A"].update(
|
||||
{
|
||||
NameObject("/D"): ArrayObject(
|
||||
@@ -792,7 +802,9 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
NumberObject(page_number),
|
||||
destination.dest_array[1],
|
||||
FloatObject(
|
||||
destination.dest_array[2]
|
||||
destination.dest_array[
|
||||
2
|
||||
]
|
||||
),
|
||||
destination.dest_array[3],
|
||||
destination.dest_array[4],
|
||||
@@ -800,6 +812,18 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
) # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
else:
|
||||
annot_obj["/A"].update(
|
||||
{
|
||||
NameObject("/D"): ArrayObject(
|
||||
[
|
||||
NumberObject(page_number),
|
||||
destination.dest_array[1],
|
||||
]
|
||||
) # 确保键和值是 PdfObject
|
||||
}
|
||||
)
|
||||
|
||||
rect = annot_obj.get("/Rect")
|
||||
rect = ArrayObject(
|
||||
[
|
||||
@@ -820,14 +844,12 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
|
||||
elif "/S" in action and action["/S"] == "/URI":
|
||||
# 外部链接:跳转到某个URI
|
||||
uri = action.get("/URI")
|
||||
|
||||
output_writer.addPage(new_page)
|
||||
# Save the merged PDF file
|
||||
with open(output_path, "wb") as output_file:
|
||||
output_writer.write(output_file)
|
||||
|
||||
|
||||
|
||||
def _merge_pdfs_legacy(pdf1_path, pdf2_path, output_path):
|
||||
import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
|
||||
|
||||
|
||||
@@ -3,19 +3,33 @@
|
||||
# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex .
|
||||
# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
|
||||
|
||||
FROM menghuan1918/ubuntu_uv_ctex:latest
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
FROM fuqingxu/python311_texlive_ctex:latest
|
||||
ENV PATH "$PATH:/usr/local/texlive/2022/bin/x86_64-linux"
|
||||
ENV PATH "$PATH:/usr/local/texlive/2023/bin/x86_64-linux"
|
||||
ENV PATH "$PATH:/usr/local/texlive/2024/bin/x86_64-linux"
|
||||
ENV PATH "$PATH:/usr/local/texlive/2025/bin/x86_64-linux"
|
||||
ENV PATH "$PATH:/usr/local/texlive/2026/bin/x86_64-linux"
|
||||
|
||||
# 指定路径
|
||||
WORKDIR /gpt
|
||||
|
||||
RUN pip3 install openai numpy arxiv rich
|
||||
RUN pip3 install colorama Markdown pygments pymupdf
|
||||
RUN pip3 install python-docx pdfminer
|
||||
RUN pip3 install nougat-ocr
|
||||
|
||||
# 装载项目文件
|
||||
COPY . .
|
||||
RUN /root/.cargo/bin/uv venv --seed \
|
||||
&& source .venv/bin/activate \
|
||||
&& /root/.cargo/bin/uv pip install openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \
|
||||
&& /root/.cargo/bin/uv pip install -r requirements.txt \
|
||||
&& /root/.cargo/bin/uv clean
|
||||
|
||||
|
||||
# 安装依赖
|
||||
RUN pip3 install -r requirements.txt
|
||||
|
||||
# edge-tts需要的依赖
|
||||
RUN apt update && apt install ffmpeg -y
|
||||
|
||||
# 可选步骤,用于预热模块
|
||||
RUN .venv/bin/python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
|
||||
# 启动
|
||||
CMD [".venv/bin/python3", "-u", "main.py"]
|
||||
CMD ["python3", "-u", "main.py"]
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM
|
||||
# - 1 修改 `config.py`
|
||||
# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex .
|
||||
# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
|
||||
|
||||
FROM menghuan1918/ubuntu_uv_ctex:latest
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
WORKDIR /gpt
|
||||
COPY . .
|
||||
RUN /root/.cargo/bin/uv venv --seed \
|
||||
&& source .venv/bin/activate \
|
||||
&& /root/.cargo/bin/uv pip install openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \
|
||||
&& /root/.cargo/bin/uv pip install -r requirements.txt \
|
||||
&& /root/.cargo/bin/uv clean
|
||||
|
||||
# 对齐python3
|
||||
RUN rm -f /usr/bin/python3 && ln -s /gpt/.venv/bin/python /usr/bin/python3
|
||||
RUN rm -f /usr/bin/python && ln -s /gpt/.venv/bin/python /usr/bin/python
|
||||
|
||||
# 可选步骤,用于预热模块
|
||||
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
|
||||
# 启动
|
||||
CMD ["python3", "-u", "main.py"]
|
||||
@@ -1285,4 +1285,3 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot,
|
||||
|
||||
# 更新一下llm_kwargs的参数,否则会出现参数不匹配的问题
|
||||
yield from method(inputs, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, stream, additional_fn)
|
||||
|
||||
|
||||
@@ -202,10 +202,13 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
|
||||
if (time.time()-observe_window[1]) > watch_dog_patience:
|
||||
raise RuntimeError("用户取消了程序。")
|
||||
else: raise RuntimeError("意外Json结构:"+delta)
|
||||
if json_data and json_data['finish_reason'] == 'content_filter':
|
||||
raise RuntimeError("由于提问含不合规内容被Azure过滤。")
|
||||
if json_data and json_data['finish_reason'] == 'length':
|
||||
|
||||
finish_reason = json_data.get('finish_reason', None) if json_data else None
|
||||
if finish_reason == 'content_filter':
|
||||
raise RuntimeError("由于提问含不合规内容被过滤。")
|
||||
if finish_reason == 'length':
|
||||
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -536,4 +539,3 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
|
||||
|
||||
return headers,payload
|
||||
|
||||
|
||||
|
||||
在新工单中引用
屏蔽一个用户