new

2025-12-09 07:56:48 +00:00 · 2023-12-26 23:59:36 +08:00
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@@ -175,7 +175,6 @@ class LatexPaperFileGroup():
        self.sp_file_contents = []
        self.sp_file_index = []
        self.sp_file_tag = []
-
        # count_token
        from request_llms.bridge_all import model_info
        enc = model_info["gpt-3.5-turbo"]['tokenizer']
@@ -192,13 +191,12 @@ class LatexPaperFileGroup():
                self.sp_file_index.append(index)
                self.sp_file_tag.append(self.file_paths[index])
            else:
-                from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
-                segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
+                from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
+                segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
                for j, segment in enumerate(segments):
                    self.sp_file_contents.append(segment)
                    self.sp_file_index.append(index)
                    self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
-        print('Segmentation: done')

    def merge_result(self):
        self.file_result = ["" for _ in range(len(self.file_paths))]
@@ -404,7 +402,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
            result_pdf = pj(work_folder_modified, f'merge_diff.pdf')    # get pdf path
            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
        if modified_pdf_success:
-            yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history)    # 刷新Gradio前端界面
+            yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍候 ...', chatbot, history)    # 刷新Gradio前端界面
            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
            origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
            if os.path.exists(pj(work_folder, '..', 'translation')):
@@ -416,8 +414,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
                    from .latex_toolbox import merge_pdfs
                    concat_pdf = pj(work_folder_modified, f'comparison.pdf')
                    merge_pdfs(origin_pdf, result_pdf, concat_pdf)
+                    if os.path.exists(pj(work_folder, '..', 'translation')):
+                        shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
                    promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
                except Exception as e:
+                    print(e)
                    pass
            return True # 成功啦
        else:
--- a/crazy_functions/latex_fns/latex_toolbox.py
+++ b/crazy_functions/latex_fns/latex_toolbox.py
@@ -493,11 +493,38 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
        return False
    return True

+def run_in_subprocess_wrapper_func(func, args, kwargs, return_dict, exception_dict):
+    import sys
+    try:
+        result = func(*args, **kwargs)
+        return_dict['result'] = result
+    except Exception as e:
+        exc_info = sys.exc_info()
+        exception_dict['exception'] = exc_info

+def run_in_subprocess(func):
+    import multiprocessing
+    def wrapper(*args, **kwargs):
+        return_dict = multiprocessing.Manager().dict()
+        exception_dict = multiprocessing.Manager().dict()
+        process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, 
+                                            args=(func, args, kwargs, return_dict, exception_dict))
+        process.start()
+        process.join()
+        process.close()
+        if 'exception' in exception_dict:
+            # ooops, the subprocess ran into an exception
+            exc_info = exception_dict['exception']
+            raise exc_info[1].with_traceback(exc_info[2])
+        if 'result' in return_dict.keys():
+            # If the subprocess ran successfully, return the result
+            return return_dict['result']
+    return wrapper

-def merge_pdfs(pdf1_path, pdf2_path, output_path):
-    import PyPDF2
+def _merge_pdfs(pdf1_path, pdf2_path, output_path):
+    import PyPDF2   # PyPDF2这个库有严重的内存泄露问题，把它放到子进程中运行，从而方便内存的释放
    Percent = 0.95
+    # raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.')
    # Open the first PDF file
    with open(pdf1_path, 'rb') as pdf1_file:
        pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
@@ -531,3 +558,5 @@ def merge_pdfs(pdf1_path, pdf2_path, output_path):
            # Save the merged PDF file
            with open(output_path, 'wb') as output_file:
                output_writer.write(output_file)
+
+merge_pdfs = run_in_subprocess(_merge_pdfs) # PyPDF2这个库有严重的内存泄露问题，把它放到子进程中运行，从而方便内存的释放