镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
tiktoken做lazyload处理
这个提交包含在:
@@ -11,9 +11,8 @@ class PaperFileGroup():
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
import tiktoken
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
|
||||
@@ -11,9 +11,8 @@ class PaperFileGroup():
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
import tiktoken
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ import traceback
|
||||
from toolbox import update_ui, get_conf
|
||||
|
||||
def input_clipping(inputs, history, max_token_limit):
|
||||
import tiktoken
|
||||
import numpy as np
|
||||
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
|
||||
mode = 'input-and-history'
|
||||
|
||||
@@ -59,9 +59,8 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_
|
||||
|
||||
# 第5步:Token限制下的截断与处理
|
||||
MAX_TOKEN = 3000
|
||||
import tiktoken
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
|
||||
|
||||
|
||||
@@ -11,9 +11,8 @@ class PaperFileGroup():
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
import tiktoken
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
|
||||
@@ -68,8 +68,8 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
||||
|
||||
# 递归地切割PDF文件
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
|
||||
|
||||
@@ -17,8 +17,8 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
||||
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from toolbox import get_conf
|
||||
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
|
||||
|
||||
在新工单中引用
屏蔽一个用户