version 3.6

2025-12-09 16:06:48 +00:00 · 2023-11-20 01:17:59 +08:00
--- a/crazy_functions/pdf_fns/parse_pdf.py
+++ b/crazy_functions/pdf_fns/parse_pdf.py
@@ -14,7 +14,7 @@ import math
 class GROBID_OFFLINE_EXCEPTION(Exception): pass

 def get_avail_grobid_url():
-    GROBID_URLS, = get_conf('GROBID_URLS')
+    GROBID_URLS = get_conf('GROBID_URLS')
    if len(GROBID_URLS) == 0: return None
    try:
        _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
@@ -73,7 +73,7 @@ def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chat
    return res_path

 def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
-    from crazy_functions.crazy_utils import construct_html
+    from crazy_functions.pdf_fns.report_gen_html import construct_html
    from crazy_functions.crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
    from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
    from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
@@ -82,7 +82,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi
    # title
    title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
    # authors
-    authors = article_dict.get('authors', '无法获取 authors'); prompt += f'authors:{authors}\n\n'
+    authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
    # abstract
    abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
    # command
@@ -103,7 +103,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi
    inputs_show_user_array = []

    # get_token_num
-    from request_llm.bridge_all import model_info
+    from request_llms.bridge_all import model_info
    enc = model_info[llm_kwargs['llm_model']]['tokenizer']
    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))

--- a/crazy_functions/pdf_fns/report_gen_html.py
+++ b/crazy_functions/pdf_fns/report_gen_html.py
@@ -0,0 +1,58 @@
+from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
+import os
+
+
+
+
+class construct_html():
+    def __init__(self) -> None:
+        self.html_string = ""
+
+    def add_row(self, a, b):
+        from toolbox import markdown_convertion
+        template = """
+            {
+                primary_col: {
+                    header: String.raw`__PRIMARY_HEADER__`,
+                    msg: String.raw`__PRIMARY_MSG__`,
+                },
+                secondary_rol: {
+                    header: String.raw`__SECONDARY_HEADER__`,
+                    msg: String.raw`__SECONDARY_MSG__`,
+                }
+            },
+        """
+        def std(str):
+            str = str.replace(r'`',r'&#96;')
+            if str.endswith("\\"): str += ' '
+            if str.endswith("}"): str += ' '
+            if str.endswith("$"): str += ' '
+            return str
+
+        template_ = template
+        a_lines = a.split('\n')
+        b_lines = b.split('\n')
+
+        if len(a_lines) == 1 or len(a_lines[0]) > 50:
+            template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
+            template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
+        else:
+            template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
+            template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
+
+        if len(b_lines) == 1 or len(b_lines[0]) > 50:
+            template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
+            template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
+        else:
+            template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
+            template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
+        self.html_string += template_
+
+    def save_file(self, file_name):
+        from toolbox import get_log_folder
+        with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
+            html_template = f.read()
+        html_template = html_template.replace("__TF_ARR__", self.html_string)
+        with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
+            f.write(html_template.encode('utf-8', 'ignore').decode())
+        return os.path.join(get_log_folder(), file_name)
--- a/crazy_functions/pdf_fns/report_template.html
+++ b/crazy_functions/pdf_fns/report_template.html