镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 06:26:47 +00:00
improve long text breakdown perfomance
这个提交包含在:
@@ -31,15 +31,11 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
|
||||
|
||||
print(file_content)
|
||||
# private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
from request_llms.bridge_all import model_info
|
||||
max_token = model_info[llm_kwargs['llm_model']]['max_token']
|
||||
TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content,
|
||||
get_token_fn=model_info[llm_kwargs['llm_model']]['token_cnt'],
|
||||
limit=TOKEN_LIMIT_PER_FRAGMENT
|
||||
)
|
||||
paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
|
||||
this_paper_history = []
|
||||
for i, paper_frag in enumerate(paper_fragments):
|
||||
i_say = f'请对下面的文章片段用中文做概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{paper_frag}```'
|
||||
|
||||
在新工单中引用
屏蔽一个用户