镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-09 16:06:48 +00:00
version 3.6
这个提交包含在:
@@ -14,7 +14,7 @@ import math
|
||||
class GROBID_OFFLINE_EXCEPTION(Exception): pass
|
||||
|
||||
def get_avail_grobid_url():
|
||||
GROBID_URLS, = get_conf('GROBID_URLS')
|
||||
GROBID_URLS = get_conf('GROBID_URLS')
|
||||
if len(GROBID_URLS) == 0: return None
|
||||
try:
|
||||
_grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
|
||||
@@ -73,7 +73,7 @@ def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chat
|
||||
return res_path
|
||||
|
||||
def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
|
||||
from crazy_functions.crazy_utils import construct_html
|
||||
from crazy_functions.pdf_fns.report_gen_html import construct_html
|
||||
from crazy_functions.crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
@@ -82,7 +82,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi
|
||||
# title
|
||||
title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
|
||||
# authors
|
||||
authors = article_dict.get('authors', '无法获取 authors'); prompt += f'authors:{authors}\n\n'
|
||||
authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
|
||||
# abstract
|
||||
abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
|
||||
# command
|
||||
@@ -103,7 +103,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi
|
||||
inputs_show_user_array = []
|
||||
|
||||
# get_token_num
|
||||
from request_llm.bridge_all import model_info
|
||||
from request_llms.bridge_all import model_info
|
||||
enc = model_info[llm_kwargs['llm_model']]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
|
||||
import os
|
||||
|
||||
|
||||
|
||||
|
||||
class construct_html():
|
||||
def __init__(self) -> None:
|
||||
self.html_string = ""
|
||||
|
||||
def add_row(self, a, b):
|
||||
from toolbox import markdown_convertion
|
||||
template = """
|
||||
{
|
||||
primary_col: {
|
||||
header: String.raw`__PRIMARY_HEADER__`,
|
||||
msg: String.raw`__PRIMARY_MSG__`,
|
||||
},
|
||||
secondary_rol: {
|
||||
header: String.raw`__SECONDARY_HEADER__`,
|
||||
msg: String.raw`__SECONDARY_MSG__`,
|
||||
}
|
||||
},
|
||||
"""
|
||||
def std(str):
|
||||
str = str.replace(r'`',r'`')
|
||||
if str.endswith("\\"): str += ' '
|
||||
if str.endswith("}"): str += ' '
|
||||
if str.endswith("$"): str += ' '
|
||||
return str
|
||||
|
||||
template_ = template
|
||||
a_lines = a.split('\n')
|
||||
b_lines = b.split('\n')
|
||||
|
||||
if len(a_lines) == 1 or len(a_lines[0]) > 50:
|
||||
template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
|
||||
template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
|
||||
else:
|
||||
template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
|
||||
template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
|
||||
|
||||
if len(b_lines) == 1 or len(b_lines[0]) > 50:
|
||||
template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
|
||||
template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
|
||||
else:
|
||||
template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
|
||||
template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
|
||||
self.html_string += template_
|
||||
|
||||
def save_file(self, file_name):
|
||||
from toolbox import get_log_folder
|
||||
with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
|
||||
html_template = f.read()
|
||||
html_template = html_template.replace("__TF_ARR__", self.html_string)
|
||||
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
|
||||
f.write(html_template.encode('utf-8', 'ignore').decode())
|
||||
return os.path.join(get_log_folder(), file_name)
|
||||
文件差异因一行或多行过长而隐藏
在新工单中引用
屏蔽一个用户