镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
improve long text breakdown perfomance
这个提交包含在:
@@ -175,7 +175,6 @@ class LatexPaperFileGroup():
|
||||
self.sp_file_contents = []
|
||||
self.sp_file_index = []
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
from request_llms.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
@@ -192,13 +191,12 @@ class LatexPaperFileGroup():
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
|
||||
print('Segmentation: done')
|
||||
|
||||
def merge_result(self):
|
||||
self.file_result = ["" for _ in range(len(self.file_paths))]
|
||||
|
||||
在新工单中引用
屏蔽一个用户