比较提交

..

12 次代码提交

作者 SHA1 备注 提交日期
wsg1873
303d55c8a1 solve the concatenate error. 2024-10-15 15:42:35 +00:00
binary-husky
c83bf214d0 change arxiv download attempt url order 2024-10-15 09:09:24 +00:00
binary-husky
e34c49dce5 compat: deal with arxiv url change 2024-10-15 09:07:39 +00:00
binary-husky
3890467c84 replace rm with rm -f 2024-10-15 07:32:29 +00:00
binary-husky
074b3c9828 explicitly declare default value 2024-10-15 06:41:12 +00:00
Nextstrain
b8e8457a01 关于o1系列模型无法正常请求的修复,多模型轮询KeyError: 'finish_reason'的修复 (#1992)
* Update bridge_all.py

* Update bridge_chatgpt.py

* Update bridge_chatgpt.py

* Update bridge_all.py

* Update bridge_all.py
2024-10-15 14:36:51 +08:00
binary-husky
2c93a24d7e fix dockerfile: try align python 2024-10-15 06:35:35 +00:00
binary-husky
e9af6ef3a0 fix: github action glitch 2024-10-15 06:32:47 +00:00
wsg1873
5ae8981dbb add the '/Fit' destination (#2009) 2024-10-14 22:50:56 +08:00
binary-husky
adbed044e4 fix o1 compat problem 2024-10-13 17:02:07 +00:00
Menghuan1918
2fe5febaf0 为build-with-latex版本Docker构建新增arm64支持 (#1994)
* Add arm64 support

* Bug fix

* Some build bug fix

* Add arm support

* 分离arm和x86构建

* 改进构建文档

* update tags

* Update build-with-latex-arm.yml

* Revert "Update build-with-latex-arm.yml"

This reverts commit 9af92549b5.

* Update

* Add

* httpx

* Addison

* Update GithubAction+NoLocal+Latex

* Update docker-compose.yml and GithubAction+NoLocal+Latex

* Update README.md

* test math anim generation

* solve the pdf concatenate error. (#2006)

* solve the pdf concatenate error.

* add legacy fallback option

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>

---------

Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com>
Co-authored-by: binary-husky <qingxu.fu@outlook.com>
Co-authored-by: wsg1873 <wsg0326@163.com>
2024-10-14 00:25:28 +08:00
wsg1873
f54d8e559a solve the pdf concatenate error. (#2006)
* solve the pdf concatenate error.

* add legacy fallback option

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>
2024-10-13 16:16:51 +08:00
共有 6 个文件被更改,包括 247 次插入123 次删除

查看文件

@@ -0,0 +1,51 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: build-with-latex-arm
on:
push:
branches:
- "master"
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_with_latex_arm
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
push: true
platforms: linux/arm64
file: docs/GithubAction+NoLocal+Latex+Arm
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

查看文件

@@ -138,25 +138,43 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
if cached_translation_pdf and allow_cache: return cached_translation_pdf, arxiv_id
url_tar = url_.replace('/abs/', '/e-print/')
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
os.makedirs(translation_dir, exist_ok=True)
# <-------------- download arxiv source file ------------->
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
dst = pj(translation_dir, arxiv_id + '.tar')
if os.path.exists(dst):
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
os.makedirs(translation_dir, exist_ok=True)
# <-------------- download arxiv source file ------------->
def fix_url_and_download():
# for url_tar in [url_.replace('/abs/', '/e-print/'), url_.replace('/abs/', '/src/')]:
for url_tar in [url_.replace('/abs/', '/src/'), url_.replace('/abs/', '/e-print/')]:
proxies = get_conf('proxies')
r = requests.get(url_tar, proxies=proxies)
if r.status_code == 200:
with open(dst, 'wb+') as f:
f.write(r.content)
return True
return False
if os.path.exists(dst) and allow_cache:
yield from update_ui_lastest_msg(f"调用缓存 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
success = True
else:
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
proxies = get_conf('proxies')
r = requests.get(url_tar, proxies=proxies)
with open(dst, 'wb+') as f:
f.write(r.content)
yield from update_ui_lastest_msg(f"开始下载 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
success = fix_url_and_download()
yield from update_ui_lastest_msg(f"下载完成 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面
if not success:
yield from update_ui_lastest_msg(f"下载失败 {arxiv_id}", chatbot=chatbot, history=history)
raise tarfile.ReadError(f"论文下载失败 {arxiv_id}")
# <-------------- extract file ------------->
yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
from toolbox import extract_archive
extract_archive(file_path=dst, dest_dir=extract_dst)
try:
extract_archive(file_path=dst, dest_dir=extract_dst)
except tarfile.ReadError:
os.remove(dst)
raise tarfile.ReadError(f"论文下载失败")
return extract_dst, arxiv_id

查看文件

@@ -697,15 +697,6 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
),
0,
)
if "/Annots" in page1:
page1_annot_id = [annot.idnum for annot in page1["/Annots"]]
else:
page1_annot_id = []
if "/Annots" in page2:
page2_annot_id = [annot.idnum for annot in page2["/Annots"]]
else:
page2_annot_id = []
if "/Annots" in new_page:
annotations = new_page["/Annots"]
for i, annot in enumerate(annotations):
@@ -720,114 +711,148 @@ def _merge_pdfs_ng(pdf1_path, pdf2_path, output_path):
if "/S" in action and action["/S"] == "/GoTo":
# 内部链接:跳转到文档中的某个页面
dest = action.get("/D") # 目标页或目标位置
if dest and annot.idnum in page2_annot_id:
# 获取原始文件中跳转信息,包括跳转页面
destination = pdf2_reader.named_destinations[
dest
]
page_number = (
pdf2_reader.get_destination_page_number(
destination
)
)
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
# “/D”:[10,'/XYZ',100,100,0]
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
FloatObject(
destination.dest_array[2]
+ int(
page1.mediaBox.getWidth()
)
),
destination.dest_array[3],
destination.dest_array[4],
]
) # 确保键和值是 PdfObject
}
)
rect = annot_obj.get("/Rect")
# 更新点击坐标
rect = ArrayObject(
[
FloatObject(
rect[0]
+ int(page1.mediaBox.getWidth())
),
rect[1],
FloatObject(
rect[2]
+ int(page1.mediaBox.getWidth())
),
rect[3],
# if dest and annot.idnum in page2_annot_id:
# if dest in pdf2_reader.named_destinations:
if dest and page2.annotations:
if annot in page2.annotations:
# 获取原始文件中跳转信息,包括跳转页面
destination = pdf2_reader.named_destinations[
dest
]
)
annot_obj.update(
{
NameObject(
"/Rect"
): rect # 确保键和值是 PdfObject
}
)
if dest and annot.idnum in page1_annot_id:
# 获取原始文件中跳转信息,包括跳转页面
destination = pdf1_reader.named_destinations[
dest
]
page_number = (
pdf1_reader.get_destination_page_number(
destination
page_number = (
pdf2_reader.get_destination_page_number(
destination
)
)
)
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
# “/D”:[10,'/XYZ',100,100,0]
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
FloatObject(
destination.dest_array[2]
),
destination.dest_array[3],
destination.dest_array[4],
]
) # 确保键和值是 PdfObject
}
)
rect = annot_obj.get("/Rect")
rect = ArrayObject(
[
FloatObject(rect[0]),
rect[1],
FloatObject(rect[2]),
rect[3],
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
# “/D”:[10,'/XYZ',100,100,0]
if destination.dest_array[1] == "/XYZ":
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
FloatObject(
destination.dest_array[
2
]
+ int(
page1.mediaBox.getWidth()
)
),
destination.dest_array[3],
destination.dest_array[4],
]
) # 确保键和值是 PdfObject
}
)
else:
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
]
) # 确保键和值是 PdfObject
}
)
rect = annot_obj.get("/Rect")
# 更新点击坐标
rect = ArrayObject(
[
FloatObject(
rect[0]
+ int(page1.mediaBox.getWidth())
),
rect[1],
FloatObject(
rect[2]
+ int(page1.mediaBox.getWidth())
),
rect[3],
]
)
annot_obj.update(
{
NameObject(
"/Rect"
): rect # 确保键和值是 PdfObject
}
)
# if dest and annot.idnum in page1_annot_id:
# if dest in pdf1_reader.named_destinations:
if dest and page1.annotations:
if annot in page1.annotations:
# 获取原始文件中跳转信息,包括跳转页面
destination = pdf1_reader.named_destinations[
dest
]
)
annot_obj.update(
{
NameObject(
"/Rect"
): rect # 确保键和值是 PdfObject
}
)
page_number = (
pdf1_reader.get_destination_page_number(
destination
)
)
# 更新跳转信息,跳转到对应的页面和,指定坐标 (100, 150),缩放比例为 100%
# “/D”:[10,'/XYZ',100,100,0]
if destination.dest_array[1] == "/XYZ":
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
FloatObject(
destination.dest_array[
2
]
),
destination.dest_array[3],
destination.dest_array[4],
]
) # 确保键和值是 PdfObject
}
)
else:
annot_obj["/A"].update(
{
NameObject("/D"): ArrayObject(
[
NumberObject(page_number),
destination.dest_array[1],
]
) # 确保键和值是 PdfObject
}
)
rect = annot_obj.get("/Rect")
rect = ArrayObject(
[
FloatObject(rect[0]),
rect[1],
FloatObject(rect[2]),
rect[3],
]
)
annot_obj.update(
{
NameObject(
"/Rect"
): rect # 确保键和值是 PdfObject
}
)
elif "/S" in action and action["/S"] == "/URI":
# 外部链接跳转到某个URI
uri = action.get("/URI")
output_writer.addPage(new_page)
# Save the merged PDF file
with open(output_path, "wb") as output_file:
output_writer.write(output_file)
def _merge_pdfs_legacy(pdf1_path, pdf2_path, output_path):
import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放

查看文件

@@ -0,0 +1,25 @@
# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM
# - 1 修改 `config.py`
# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/GithubAction+NoLocal+Latex .
# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
FROM menghuan1918/ubuntu_uv_ctex:latest
ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-c"]
WORKDIR /gpt
COPY . .
RUN /root/.cargo/bin/uv venv --seed \
&& source .venv/bin/activate \
&& /root/.cargo/bin/uv pip install openai numpy arxiv rich colorama Markdown pygments pymupdf python-docx pdfminer \
&& /root/.cargo/bin/uv pip install -r requirements.txt \
&& /root/.cargo/bin/uv clean
# 对齐python3
RUN rm -f /usr/bin/python3 && ln -s /gpt/.venv/bin/python /usr/bin/python3
RUN rm -f /usr/bin/python && ln -s /gpt/.venv/bin/python /usr/bin/python
# 可选步骤,用于预热模块
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
# 启动
CMD ["python3", "-u", "main.py"]

查看文件

@@ -256,6 +256,8 @@ model_info = {
"max_token": 128000,
"tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4,
"openai_disable_system_prompt": True,
"openai_disable_stream": True,
},
"o1-mini": {
"fn_with_ui": chatgpt_ui,
@@ -264,6 +266,8 @@ model_info = {
"max_token": 128000,
"tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4,
"openai_disable_system_prompt": True,
"openai_disable_stream": True,
},
"gpt-4-turbo": {
@@ -1281,4 +1285,3 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot,
# 更新一下llm_kwargs的参数,否则会出现参数不匹配的问题
yield from method(inputs, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, stream, additional_fn)

查看文件

@@ -202,10 +202,13 @@ def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[],
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("用户取消了程序。")
else: raise RuntimeError("意外Json结构"+delta)
if json_data and json_data['finish_reason'] == 'content_filter':
raise RuntimeError("由于提问含不合规内容被Azure过滤。")
if json_data and json_data['finish_reason'] == 'length':
finish_reason = json_data.get('finish_reason', None) if json_data else None
if finish_reason == 'content_filter':
raise RuntimeError("由于提问含不合规内容被过滤。")
if finish_reason == 'length':
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
return result
@@ -536,4 +539,3 @@ def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:st
return headers,payload