From a9c86a7fb8853df13335cac337681f4e10ceddb3 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Fri, 18 Oct 2024 14:16:24 +0000
Subject: [PATCH 1/3] pre

---
 crazy_functions/Latex_Function.py          | 42 ++++++++++++++++++++--
 crazy_functions/latex_fns/latex_actions.py |  3 ++
 tests/test_latex_auto_correct.py           |  3 +-
 3 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/crazy_functions/Latex_Function.py b/crazy_functions/Latex_Function.py
index 53bbdd21..236460eb 100644
--- a/crazy_functions/Latex_Function.py
+++ b/crazy_functions/Latex_Function.py
@@ -364,6 +364,24 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
             chatbot=chatbot, history=history)
         return
 
+    # allow_cloud_io = True
+    # arxiv_id = "2203.01927"
+    # if allow_cloud_io and arxiv_id:
+    #     # 如果用户允许，我们将arxiv论文PDF上传到云端
+    #     for file_path in chatbot._cookies.get("files_to_promote", []):
+    #         if file_path.endswith('comparison.pdf'):
+    #             def compute_hash(file_path):
+    #                 return map_file_to_sha256(file_path)
+    #             with open(file_path, 'rb') as f:
+    #                 import requests
+    #                 url = 'https://cloud-2.agent-matrix.com/upload'
+    #                 files = {'file': (file_path, f, 'application/octet-stream')}
+    #                 data = {
+    #                     'arxiv_id': arxiv_id,
+    #                     'file_hash': compute_hash(file_path),
+    #                 }
+    #                 resp = requests.get(url=url, files=files, data=data, timeout=10)
+
     if txt.endswith('.pdf'):
         report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"发现已经存在翻译好的PDF文档")
         yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
@@ -406,14 +424,34 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
     # <-------------- zip PDF ------------->
     zip_res = zip_result(project_folder)
     if success:
+        allow_cloud_io = True
+        arxiv_id = "2203.01927"
+        if allow_cloud_io and arxiv_id:
+            # 如果用户允许，我们将arxiv论文PDF上传到云端
+            for file_path in chatbot._cookies.get("files_to_promote", []):
+                if file_path.endswith('translate_zh.pdf') or file_path.endswith('comparison.pdf'):
+                    def compute_hash(file_path):
+                        return map_file_to_sha256(file_path)
+                    with open(file_path, 'rb') as f:
+                        import requests
+                        url = 'https://cloud-2.agent-matrix.com/upload'
+                        files = {'file': (file_path, f, 'application/octet-stream')}
+                        data = {
+                            'arxiv_id': arxiv_id,
+                            'file_hash': compute_hash(file_path),
+                        }
+                        resp = requests.post(url=url, files=files, data=data, timeout=10)
+
+
         chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
-        yield from update_ui(chatbot=chatbot, history=history);
+        yield from update_ui(chatbot=chatbot, history=history)
         time.sleep(1)  # 刷新界面
         promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
+
     else:
         chatbot.append((f"失败了",
                         '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 您可以到Github Issue区, 用该压缩包进行反馈。如系统是Linux，请检查系统字体（见Github wiki） ...'))
-        yield from update_ui(chatbot=chatbot, history=history);
+        yield from update_ui(chatbot=chatbot, history=history)
         time.sleep(1)  # 刷新界面
         promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
 
diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py
index 4293f0d0..67c9b8c6 100644
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -423,6 +423,9 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
                 except Exception as e:
                     logger.error(e)
                     pass
+
+
+
             return True # 成功啦
         else:
             if n_fix>=max_try: break
diff --git a/tests/test_latex_auto_correct.py b/tests/test_latex_auto_correct.py
index c51e7414..93c8f707 100644
--- a/tests/test_latex_auto_correct.py
+++ b/tests/test_latex_auto_correct.py
@@ -19,4 +19,5 @@ if __name__ == "__main__":
     plugin_test = importlib.import_module('test_utils').plugin_test
 
 
-    plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2203.01927")
+    # plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2203.01927")
+    plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="gpt_log/arxiv_cache/2203.01927/workfolder")

From 50a1ea83ef406051975e347c084df35bb548c487 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Fri, 18 Oct 2024 18:05:50 +0000
Subject: [PATCH 2/3] control whether to allow sharing translation results with
 GPTAC academic cloud.

---
 crazy_functions/Latex_Function.py          | 67 +++++++++-------------
 crazy_functions/Latex_Function_Wrap.py     |  8 +++
 crazy_functions/latex_fns/latex_actions.py | 65 ++++++++++++++++++++-
 tests/test_latex_auto_correct.py           |  5 +-
 4 files changed, 104 insertions(+), 41 deletions(-)

diff --git a/crazy_functions/Latex_Function.py b/crazy_functions/Latex_Function.py
index 236460eb..51b03283 100644
--- a/crazy_functions/Latex_Function.py
+++ b/crazy_functions/Latex_Function.py
@@ -3,7 +3,7 @@ from toolbox import CatchException, report_exception, update_ui_lastest_msg, zip
 from functools import partial
 from loguru import logger
 
-import glob, os, requests, time, json, tarfile
+import glob, os, requests, time, json, tarfile, threading
 
 pj = os.path.join
 ARXIV_CACHE_DIR = get_conf("ARXIV_CACHE_DIR")
@@ -338,11 +338,17 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
     # <-------------- more requirements ------------->
     if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
     more_req = plugin_kwargs.get("advanced_arg", "")
-    no_cache = more_req.startswith("--no-cache")
-    if no_cache: more_req.lstrip("--no-cache")
+
+    no_cache = ("--no-cache" in more_req)
+    if no_cache: more_req = more_req.replace("--no-cache", "").strip()
+
+    allow_gptac_cloud_io = ("--allow-cloudio" in more_req)  # 从云端下载翻译结果，以及上传翻译结果到云端
+    if allow_gptac_cloud_io: more_req = more_req.replace("--allow-cloudio", "").strip()
+
     allow_cache = not no_cache
     _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
 
+
     # <-------------- check deps ------------->
     try:
         import glob, os, time, subprocess
@@ -364,29 +370,25 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
             chatbot=chatbot, history=history)
         return
 
-    # allow_cloud_io = True
-    # arxiv_id = "2203.01927"
-    # if allow_cloud_io and arxiv_id:
-    #     # 如果用户允许，我们将arxiv论文PDF上传到云端
-    #     for file_path in chatbot._cookies.get("files_to_promote", []):
-    #         if file_path.endswith('comparison.pdf'):
-    #             def compute_hash(file_path):
-    #                 return map_file_to_sha256(file_path)
-    #             with open(file_path, 'rb') as f:
-    #                 import requests
-    #                 url = 'https://cloud-2.agent-matrix.com/upload'
-    #                 files = {'file': (file_path, f, 'application/octet-stream')}
-    #                 data = {
-    #                     'arxiv_id': arxiv_id,
-    #                     'file_hash': compute_hash(file_path),
-    #                 }
-    #                 resp = requests.get(url=url, files=files, data=data, timeout=10)
-
     if txt.endswith('.pdf'):
         report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"发现已经存在翻译好的PDF文档")
         yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
         return
 
+    # #################################################################
+    if allow_gptac_cloud_io and arxiv_id:
+        # 访问 GPTAC学术云，查询云端是否存在该论文的翻译版本
+        from crazy_functions.latex_fns.latex_actions import check_gptac_cloud
+        success, downloaded = check_gptac_cloud(arxiv_id, chatbot)
+        if success:
+            chatbot.append([
+                f"检测到GPTAC云端存在翻译版本, 如果不满意翻译结果, 请禁用云端分享, 然后重新执行。", 
+                None
+            ])
+            yield from update_ui(chatbot=chatbot, history=history)
+            return
+    #################################################################
+
     if os.path.exists(txt):
         project_folder = txt
     else:
@@ -424,24 +426,11 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
     # <-------------- zip PDF ------------->
     zip_res = zip_result(project_folder)
     if success:
-        allow_cloud_io = True
-        arxiv_id = "2203.01927"
-        if allow_cloud_io and arxiv_id:
-            # 如果用户允许，我们将arxiv论文PDF上传到云端
-            for file_path in chatbot._cookies.get("files_to_promote", []):
-                if file_path.endswith('translate_zh.pdf') or file_path.endswith('comparison.pdf'):
-                    def compute_hash(file_path):
-                        return map_file_to_sha256(file_path)
-                    with open(file_path, 'rb') as f:
-                        import requests
-                        url = 'https://cloud-2.agent-matrix.com/upload'
-                        files = {'file': (file_path, f, 'application/octet-stream')}
-                        data = {
-                            'arxiv_id': arxiv_id,
-                            'file_hash': compute_hash(file_path),
-                        }
-                        resp = requests.post(url=url, files=files, data=data, timeout=10)
-
+        if allow_gptac_cloud_io and arxiv_id:
+            # 如果用户允许，我们将翻译好的arxiv论文PDF上传到GPTAC学术云
+            from crazy_functions.latex_fns.latex_actions import upload_to_gptac_cloud_if_user_allow
+            threading.Thread(target=upload_to_gptac_cloud_if_user_allow, 
+                args=(chatbot, arxiv_id), daemon=True).start()
 
         chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
         yield from update_ui(chatbot=chatbot, history=history)
diff --git a/crazy_functions/Latex_Function_Wrap.py b/crazy_functions/Latex_Function_Wrap.py
index 5d7b1f31..e591e380 100644
--- a/crazy_functions/Latex_Function_Wrap.py
+++ b/crazy_functions/Latex_Function_Wrap.py
@@ -30,6 +30,9 @@ class Arxiv_Localize(GptAcademicPluginTemplate):
                             default_value="", type="string").model_dump_json(), # 高级参数输入区，自动同步
             "allow_cache":
                 ArgProperty(title="是否允许从缓存中调取结果", options=["允许缓存", "从头执行"], default_value="允许缓存", description="无", type="dropdown").model_dump_json(),
+            "allow_cloudio":
+                ArgProperty(title="是否允许向GPTAC学术云共享翻译结果", options=["允许", "禁止"], default_value="禁止", description="人人为我，我为人人", type="dropdown").model_dump_json(),
+
         }
         return gui_definition
 
@@ -38,9 +41,14 @@ class Arxiv_Localize(GptAcademicPluginTemplate):
         执行插件
         """
         allow_cache = plugin_kwargs["allow_cache"]
+        allow_cloudio = plugin_kwargs["allow_cloudio"]
         advanced_arg = plugin_kwargs["advanced_arg"]
 
         if allow_cache == "从头执行": plugin_kwargs["advanced_arg"] = "--no-cache " + plugin_kwargs["advanced_arg"]
+
+        # 从云端下载翻译结果，以及上传翻译结果到云端；人人为我，我为人人。
+        if allow_cloudio == "允许": plugin_kwargs["advanced_arg"] = "--allow-cloudio " + plugin_kwargs["advanced_arg"]
+
         yield from Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
 
 
diff --git a/crazy_functions/latex_fns/latex_actions.py b/crazy_functions/latex_fns/latex_actions.py
index 67c9b8c6..cfa0f155 100644
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -3,7 +3,7 @@ import re
 import shutil
 import numpy as np
 from loguru import logger
-from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
+from toolbox import update_ui, update_ui_lastest_msg, get_log_folder, gen_time_str
 from toolbox import get_conf, promote_file_to_downloadzone
 from crazy_functions.latex_fns.latex_toolbox import PRESERVE, TRANSFORM
 from crazy_functions.latex_fns.latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
@@ -471,3 +471,66 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
     except:
         from toolbox import trimmed_format_exc
         logger.error('writing html result failed:', trimmed_format_exc())
+
+
+def upload_to_gptac_cloud_if_user_allow(chatbot, arxiv_id):
+    try:
+        # 如果用户允许，我们将arxiv论文PDF上传到GPTAC学术云
+        from toolbox import map_file_to_sha256
+        # 检查是否顺利，如果没有生成预期的文件，则跳过
+        is_result_good = False
+        for file_path in chatbot._cookies.get("files_to_promote", []):
+            if file_path.endswith('translate_zh.pdf'):
+                is_result_good = True
+        if not is_result_good:
+            return
+        # 上传文件
+        for file_path in chatbot._cookies.get("files_to_promote", []):
+            align_name = None
+            # normalized name
+            for name in ['translate_zh.pdf', 'comparison.pdf']:
+                if file_path.endswith(name): align_name = name
+            # if match any align name
+            if align_name:
+                logger.info(f'Uploading to GPTAC cloud as the user has set `allow_cloud_io`: {file_path}')
+                with open(file_path, 'rb') as f:
+                    import requests
+                    url = 'https://cloud-2.agent-matrix.com/upload'
+                    files = {'file': (align_name, f, 'application/octet-stream')}
+                    data = {
+                        'arxiv_id': arxiv_id,
+                        'file_hash': map_file_to_sha256(file_path),
+                    }
+                    resp = requests.post(url=url, files=files, data=data, timeout=30)
+                logger.info(f'Uploading terminate ({resp.status_code})`: {file_path}')
+    except:
+        # 如果上传失败，不会中断程序，因为这是次要功能
+        pass
+
+def check_gptac_cloud(arxiv_id, chatbot):
+    import requests
+    success = False
+    downloaded = []
+    try:
+        for pdf_target in ['translate_zh.pdf', 'comparison.pdf']:
+            url = 'https://cloud-2.agent-matrix.com/paper_exist'
+            data = {
+                'arxiv_id': arxiv_id,
+                'name': pdf_target,
+            }
+            resp = requests.post(url=url, data=data)
+            cache_hit_result = resp.text.strip('"')
+            if cache_hit_result.startswith("http"):
+                url = cache_hit_result
+                logger.info(f'Downloading from GPTAC cloud: {url}')
+                resp = requests.get(url=url, timeout=30)
+                target = os.path.join(get_log_folder(plugin_name='gptac_cloud'), gen_time_str(), pdf_target)
+                os.makedirs(os.path.dirname(target), exist_ok=True)
+                with open(target, 'wb') as f:
+                    f.write(resp.content)
+                new_path = promote_file_to_downloadzone(target, chatbot=chatbot)
+                success = True
+                downloaded.append(new_path)
+    except:
+        pass
+    return success, downloaded
diff --git a/tests/test_latex_auto_correct.py b/tests/test_latex_auto_correct.py
index 93c8f707..ea421370 100644
--- a/tests/test_latex_auto_correct.py
+++ b/tests/test_latex_auto_correct.py
@@ -20,4 +20,7 @@ if __name__ == "__main__":
 
 
     # plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2203.01927")
-    plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="gpt_log/arxiv_cache/2203.01927/workfolder")
+    # plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="gpt_log/arxiv_cache/2203.01927/workfolder")
+    # plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="2410.05779")
+    plugin_test(plugin='crazy_functions.Latex_Function->Latex翻译中文并重新编译PDF', main_input="gpt_log/default_user/workfolder")
+

From 42d10a9481d14e2603b16351261d5c07250ac548 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Mon, 21 Oct 2024 14:05:05 +0000
Subject: [PATCH 3/3] update doc2x functions

---
 .../pdf_fns/parse_pdf_via_doc2x.py            | 191 +++++++++++-------
 tests/test_doc2x.py                           |   7 +
 2 files changed, 121 insertions(+), 77 deletions(-)
 create mode 100644 tests/test_doc2x.py

diff --git a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
index d64aa91c..97c62fbf 100644
--- a/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
+++ b/crazy_functions/pdf_fns/parse_pdf_via_doc2x.py
@@ -4,7 +4,9 @@ from toolbox import promote_file_to_downloadzone, extract_archive
 from toolbox import generate_file_link, zip_folder
 from crazy_functions.crazy_utils import get_files_from_everything
 from shared_utils.colorful import *
+from loguru import logger
 import os
+import time
 
 def refresh_key(doc2x_api_key):
     import requests, json
@@ -22,105 +24,140 @@ def refresh_key(doc2x_api_key):
         raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
     return doc2x_api_key
 
+
+
 def 解析PDF_DOC2X_转Latex(pdf_file_path):
+    zip_file_path, unzipped_folder = 解析PDF_DOC2X(pdf_file_path, format='tex')
+    return unzipped_folder
+
+
+def 解析PDF_DOC2X(pdf_file_path, format='tex'):
+    """
+        format: 'tex', 'md', 'docx'
+    """
     import requests, json, os
     DOC2X_API_KEY = get_conf('DOC2X_API_KEY')
     latex_dir = get_log_folder(plugin_name="pdf_ocr_latex")
+    markdown_dir = get_log_folder(plugin_name="pdf_ocr")
     doc2x_api_key = DOC2X_API_KEY
-    if doc2x_api_key.startswith('sk-'):
-        url = "https://api.doc2x.noedgeai.com/api/v1/pdf"
-    else:
-        doc2x_api_key = refresh_key(doc2x_api_key)
-        url = "https://api.doc2x.noedgeai.com/api/platform/pdf"
 
+
+    # < ------ 第1步：上传 ------ >
+    logger.info("Doc2x 第1步：上传")
+    with open(pdf_file_path, 'rb') as file:
+        res = requests.post(
+            "https://v2.doc2x.noedgeai.com/api/v2/parse/pdf",
+            headers={"Authorization": "Bearer " + doc2x_api_key},
+            data=file
+        )
+    # res_json = []
+    if res.status_code == 200:
+        res_json = res.json()
+    else:
+        raise RuntimeError(f"Doc2x return an error: {res.json()}")
+    uuid = res_json['data']['uid']
+
+    # < ------ 第2步：轮询等待 ------ >
+    logger.info("Doc2x 第2步：轮询等待")
+    params = {'uid': uuid}
+    while True:
+        res = requests.get(
+            'https://v2.doc2x.noedgeai.com/api/v2/parse/status',
+            headers={"Authorization": "Bearer " + doc2x_api_key},
+            params=params
+        )
+        res_json = res.json()
+        if res_json['data']['status'] == "success":
+            break
+        elif res_json['data']['status'] == "processing":
+            time.sleep(3)
+            logger.info(f"Doc2x is processing at {res_json['data']['progress']}%")
+        elif res_json['data']['status'] == "failed":
+            raise RuntimeError(f"Doc2x return an error: {res_json}")
+
+
+    # < ------ 第3步：提交转化 ------ >
+    logger.info("Doc2x 第3步：提交转化")
+    data = {
+        "uid": uuid,
+        "to": format,
+        "formula_mode": "dollar",
+        "filename": "output"
+    }
     res = requests.post(
-        url,
-        files={"file": open(pdf_file_path, "rb")},
-        data={"ocr": "1"},
-        headers={"Authorization": "Bearer " + doc2x_api_key}
+        'https://v2.doc2x.noedgeai.com/api/v2/convert/parse',
+        headers={"Authorization": "Bearer " + doc2x_api_key},
+        json=data
     )
-    res_json = []
     if res.status_code == 200:
-        decoded = res.content.decode("utf-8")
-        for z_decoded in decoded.split('\n'):
-            if len(z_decoded) == 0: continue
-            assert z_decoded.startswith("data: ")
-            z_decoded = z_decoded[len("data: "):]
-            decoded_json = json.loads(z_decoded)
-            res_json.append(decoded_json)
+        res_json = res.json()
     else:
-        raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
+        raise RuntimeError(f"Doc2x return an error: {res.json()}")
 
-    uuid = res_json[0]['uuid']
-    to = "latex" # latex, md, docx
-    url = "https://api.doc2x.noedgeai.com/api/export"+"?request_id="+uuid+"&to="+to
 
-    res = requests.get(url, headers={"Authorization": "Bearer " + doc2x_api_key})
-    latex_zip_path = os.path.join(latex_dir, gen_time_str() + '.zip')
-    latex_unzip_path = os.path.join(latex_dir, gen_time_str())
-    if res.status_code == 200:
-        with open(latex_zip_path, "wb") as f: f.write(res.content)
-    else:
-        raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
+    # < ------ 第4步：等待结果 ------ >
+    logger.info("Doc2x 第4步：等待结果")
+    params = {'uid': uuid}
+    while True:
+        res = requests.get(
+            'https://v2.doc2x.noedgeai.com/api/v2/convert/parse/result',
+            headers={"Authorization": "Bearer " + doc2x_api_key},
+            params=params
+        )
+        res_json = res.json()
+        if res_json['data']['status'] == "success":
+            break
+        elif res_json['data']['status'] == "processing":
+            time.sleep(3)
+            logger.info(f"Doc2x still processing")
+        elif res_json['data']['status'] == "failed":
+            raise RuntimeError(f"Doc2x return an error: {res_json}")
 
+
+    # < ------ 第5步：最后的处理 ------ >
+    logger.info("Doc2x 第5步：最后的处理")
+
+    if format=='tex':
+        target_path = latex_dir
+    if format=='md':
+        target_path = markdown_dir
+    os.makedirs(target_path, exist_ok=True)
+
+    max_attempt = 3
+    # < ------ 下载 ------ >
+    for attempt in range(max_attempt):
+        try:
+            result_url = res_json['data']['url']
+            res = requests.get(result_url)
+            zip_path = os.path.join(target_path, gen_time_str() + '.zip')
+            unzip_path = os.path.join(target_path, gen_time_str())
+            if res.status_code == 200:
+                with open(zip_path, "wb") as f: f.write(res.content)
+            else:
+                raise RuntimeError(f"Doc2x return an error: {res.json()}")
+        except Exception as e:
+            if attempt < max_attempt - 1:
+                logger.error(f"Failed to download latex file, retrying... {e}")
+                time.sleep(3)
+                continue
+            else:
+                raise e
+
+    # < ------ 解压 ------ >
     import zipfile
-    with zipfile.ZipFile(latex_zip_path, 'r') as zip_ref:
-        zip_ref.extractall(latex_unzip_path)
-
-
-    return latex_unzip_path
-
-
+    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+        zip_ref.extractall(unzip_path)
+    return zip_path, unzip_path
 
 
 def 解析PDF_DOC2X_单文件(fp, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, DOC2X_API_KEY, user_request):
 
-
     def pdf2markdown(filepath):
-        import requests, json, os
-        markdown_dir = get_log_folder(plugin_name="pdf_ocr")
-        doc2x_api_key = DOC2X_API_KEY
-        if doc2x_api_key.startswith('sk-'):
-            url = "https://api.doc2x.noedgeai.com/api/v1/pdf"
-        else:
-            doc2x_api_key = refresh_key(doc2x_api_key)
-            url = "https://api.doc2x.noedgeai.com/api/platform/pdf"
-
-        chatbot.append((None, "加载PDF文件，发送至DOC2X解析..."))
+        chatbot.append((None, f"Doc2x 解析中"))
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
-        res = requests.post(
-            url,
-            files={"file": open(filepath, "rb")},
-            data={"ocr": "1"},
-            headers={"Authorization": "Bearer " + doc2x_api_key}
-        )
-        res_json = []
-        if res.status_code == 200:
-            decoded = res.content.decode("utf-8")
-            for z_decoded in decoded.split('\n'):
-                if len(z_decoded) == 0: continue
-                assert z_decoded.startswith("data: ")
-                z_decoded = z_decoded[len("data: "):]
-                decoded_json = json.loads(z_decoded)
-                res_json.append(decoded_json)
-            if 'limit exceeded' in decoded_json.get('status', ''):
-                raise RuntimeError("Doc2x API 页数受限，请联系 Doc2x 方面，并更换新的 API 秘钥。")
-        else:
-            raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
-        uuid = res_json[0]['uuid']
-        to = "md" # latex, md, docx
-        url = "https://api.doc2x.noedgeai.com/api/export"+"?request_id="+uuid+"&to="+to
+        md_zip_path, unzipped_folder = 解析PDF_DOC2X(filepath, format='md')
 
-        chatbot.append((None, f"读取解析: {url} ..."))
-        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
-        res = requests.get(url, headers={"Authorization": "Bearer " + doc2x_api_key})
-        md_zip_path = os.path.join(markdown_dir, gen_time_str() + '.zip')
-        if res.status_code == 200:
-            with open(md_zip_path, "wb") as f: f.write(res.content)
-        else:
-            raise RuntimeError(format("[ERROR] status code: %d, body: %s" % (res.status_code, res.text)))
         promote_file_to_downloadzone(md_zip_path, chatbot=chatbot)
         chatbot.append((None, f"完成解析 {md_zip_path} ..."))
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
diff --git a/tests/test_doc2x.py b/tests/test_doc2x.py
new file mode 100644
index 00000000..9d02c4b7
--- /dev/null
+++ b/tests/test_doc2x.py
@@ -0,0 +1,7 @@
+import init_test
+
+from crazy_functions.pdf_fns.parse_pdf_via_doc2x import 解析PDF_DOC2X_转Latex
+
+# 解析PDF_DOC2X_转Latex("gpt_log/arxiv_cache_old/2410.10819/workfolder/merge.pdf")
+# 解析PDF_DOC2X_转Latex("gpt_log/arxiv_cache_ooo/2410.07095/workfolder/merge.pdf")
+解析PDF_DOC2X_转Latex("2410.11190v2.pdf")