improve long text breakdown perfomance

这个提交包含在:
binary-husky
2023-12-19 19:30:44 +08:00
父节点 6e9936531d
当前提交 a0bfa7ba1c
共有 13 个文件被更改,包括 186 次插入143 次删除

查看文件

@@ -28,8 +28,8 @@ class PaperFileGroup():
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)