[arxiv trans] add html comparison to zip file

修复提示
3.42
2025-12-06 14:36:48 +00:00 · 2023-06-29 12:29:49 +08:00 · 2023-06-29 12:15:52 +08:00 · 2023-06-29 11:32:19 +08:00 · 2023-06-29 11:30:42 +08:00 · 2023-06-27 23:37:25 +08:00
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -348,17 +348,28 @@ def get_crazy_functions():
    try:
        from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
        function_plugins.update({
-            "[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": {
+            "Latex英文纠错+高亮修正位置 [需Latex]": {
                "Color": "stop",
                "AsButton": False,
-                # "AdvancedArgs": True,
-                # "ArgsReminder": "",
+                "AdvancedArgs": True,
+                "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令（使用英文）。",
                "Function": HotReload(Latex英文纠错加PDF对比)
            }
        })
        from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
        function_plugins.update({
-            "Arixv翻译（输入arxivID） [需Latex]": {
+            "Arixv翻译（输入arxivID）[需Latex]": {
+                "Color": "stop",
+                "AsButton": False,
+                "AdvancedArgs": True,
+                "ArgsReminder": 
+                    "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ 
+                    "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+                "Function": HotReload(Latex翻译中文并重新编译PDF)
+            }
+        })
+        function_plugins.update({
+            "本地论文翻译（上传Latex压缩包）[需Latex]": {
                "Color": "stop",
                "AsButton": False,
                "AdvancedArgs": True,
@@ -368,17 +379,6 @@ def get_crazy_functions():
                "Function": HotReload(Latex翻译中文并重新编译PDF)
            }
        })
-        # function_plugins.update({
-        #     "本地论文翻译（上传Latex压缩包） [需Latex]": {
-        #         "Color": "stop",
-        #         "AsButton": False,
-        #         "AdvancedArgs": True,
-        #         "ArgsReminder": 
-        #             "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ 
-        #             "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
-        #         "Function": HotReload(Latex翻译中文并重新编译PDF)
-        #     }
-        # })
    except:
        print('Load function plugin failed')

--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -19,9 +19,9 @@ def switch_prompt(pfg, mode, more_requirement):
    - sys_prompt_array: A list of strings containing prompts for system prompts.
    """
    n_split = len(pfg.sp_file_contents)
-    if mode == 'proofread':
+    if mode == 'proofread_en':
        inputs_array = [r"Below is a section from an academic paper, proofread this section." + 
-                        r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + 
+                        r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
                        r"Answer me only with the revised text:" + 
                        f"\n\n{frag}" for frag in pfg.sp_file_contents]
        sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
@@ -70,6 +70,12 @@ def move_project(project_folder, arxiv_id=None):
        shutil.rmtree(new_workfolder)
    except:
        pass
+
+    # align subfolder if there is a folder wrapper
+    items = glob.glob(pj(project_folder,'*'))
+    if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
+        if os.path.isdir(items[0]): project_folder = items[0]
+
    shutil.copytree(src=project_folder, dst=new_workfolder)
    return new_workfolder

@@ -141,7 +147,11 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
    chatbot.append([ "函数插件功能？",
        "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。仅在Windows系统进行了测试，其他操作系统表现未知。"])
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
+    
+    # <-------------- more requirements ------------->
+    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+    more_req = plugin_kwargs.get("advanced_arg", "")
+    _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)

    # <-------------- check deps ------------->
    try:
@@ -180,13 +190,13 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo


    # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
-    if not os.path.exists(project_folder + '/merge_proofread.tex'):
+    if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, 
-                                chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
+                                chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)


    # <-------------- compile PDF ------------->
-    success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread', 
+    success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en', 
                             work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
    

@@ -195,6 +205,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
    if success:
        chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
    else:
        chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
@@ -278,6 +289,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
    if success:
        chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
    else:
        chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -190,10 +190,10 @@ def test_Latex():
    # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
    # txt = r"https://arxiv.org/abs/2002.09253"
    # txt = r"https://arxiv.org/abs/2306.07831"
-    txt = r"https://arxiv.org/abs/2212.10156"
+    # txt = r"https://arxiv.org/abs/2212.10156"
    # txt = r"https://arxiv.org/abs/2211.11559"
    # txt = r"https://arxiv.org/abs/2303.08774"
-    # txt = r"https://arxiv.org/abs/2303.12712"
+    txt = r"https://arxiv.org/abs/2303.12712"
    # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
    

--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -430,7 +430,7 @@ class LatexPaperSplit():
    """
    def __init__(self) -> None:
        self.nodes = None
-        self.msg = "{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成，" + \
+        self.msg = "*{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成，" + \
            "版权归原文作者所有。翻译内容可靠性无保障，请仔细鉴别并以原文为准。" + \
            "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
        # 请您不要删除或修改这行警告，除非您是论文的原作者（如果您是论文原作者，欢迎加REAME中的QQ联系开发者）
@@ -532,11 +532,11 @@ class LatexPaperFileGroup():
                f.write(res)
        return manifest

-def write_html(sp_file_contents, sp_file_result, chatbot):
+def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):

    # write html
    try:
-        import copy
+        import shutil
        from .crazy_utils import construct_html
        from toolbox import gen_time_str
        ch = construct_html() 
@@ -554,6 +554,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot):
                ch.add_row(a=orig, b=trans)
        create_report_file_name = f"{gen_time_str()}.trans.html"
        ch.save_file(create_report_file_name)
+        shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
        promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
    except:
        from toolbox import trimmed_format_exc
@@ -634,7 +635,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
        pfg.get_token_num = None
        objdump(pfg, file=pj(project_folder,'temp.pkl'))

-    write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)
+    write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)

    #  <-------- 写出文件 ----------> 
    msg = f"当前大语言模型: {llm_kwargs['llm_model']}，当前语言模型温度设定: {llm_kwargs['temperature']}。"
@@ -741,13 +742,15 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
        results_ += f"对比PDF编译是否成功: {diff_pdf_success};" 
        yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面

+        if diff_pdf_success:
+            result_pdf = pj(work_folder_modified, f'merge_diff.pdf')    # get pdf path
+            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
        if modified_pdf_success:
            yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history)    # 刷新Gradio前端界面
-            os.chdir(current_dir)
-            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
+            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
            if os.path.exists(pj(work_folder, '..', 'translation')):
                shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
-            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
+            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
            return True # 成功啦
        else:
            if n_fix>=max_try: break
--- a/crazy_functions/理解PDF文档内容.py
+++ b/crazy_functions/理解PDF文档内容.py
@@ -13,7 +13,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
    # 递归地切割PDF文件，每一块（尽量是完整的一个section，比如introduction，experiment等，必要时再进行切割）
    # 的长度必须小于 2500 个 Token
    file_content, page_one = read_and_clean_pdf_text(file_name) # （尝试）按照章节切割PDF
-
+    file_content = file_content.encode('utf-8', 'ignore').decode()   # avoid reading non-utf8 chars
+    page_one = str(page_one).encode('utf-8', 'ignore').decode()  # avoid reading non-utf8 chars
+    
    TOKEN_LIMIT_PER_FRAGMENT = 2500

    from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
--- a/4
+++ b/4
@@ -1,5 +1,5 @@
 {
-  "version": 3.41,
+  "version": 3.42,
  "show_feature": true,
-  "new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
+  "new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
 }
作者	SHA1	备注	提交日期
qingxu fu	49253c4dc6	[arxiv trans] add html comparison to zip file	2023-06-29 12:29:49 +08:00
qingxu fu	1a00093015	修复提示	2023-06-29 12:15:52 +08:00
qingxu fu	64f76e7401	3.42	2023-06-29 11:32:19 +08:00
qingxu fu	eb4c07997e	修复Latex矫错和本地Latex论文翻译的问题	2023-06-29 11:30:42 +08:00
binary-husky	d684b4cdb3	Merge pull request #905 from Xminry/master Update 理解PDF文档内容.py	2023-06-27 23:37:25 +08:00
Xminry	4290821a50	Update 理解PDF文档内容.py	2023-06-27 01:57:31 +08:00