镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
accelerate nltk
这个提交包含在:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -163,3 +163,5 @@ objdump*
|
||||
TODO
|
||||
experimental_mods
|
||||
search_results
|
||||
gg.docx
|
||||
unstructured_reader.py
|
||||
|
||||
@@ -34,5 +34,7 @@ RUN uv venv --python=3.12 && uv pip install -r requirements.txt -i https://mirro
|
||||
# # 非必要步骤,用于预热模块(可以删除)
|
||||
RUN python -c 'from check_proxy import warm_up_modules; warm_up_modules()'
|
||||
|
||||
ENV CGO_ENABLED=0
|
||||
|
||||
# 启动(必要)
|
||||
CMD ["bash", "-c", "python main.py"]
|
||||
|
||||
@@ -254,13 +254,20 @@ def try_warm_up_vectordb():
|
||||
nltk.data.path.append(target)
|
||||
try:
|
||||
# 尝试加载 punkt
|
||||
logger.info(f'nltk模块预热')
|
||||
nltk.data.find('tokenizers/punkt')
|
||||
nltk.data.find('tokenizers/punkt_tab')
|
||||
nltk.data.find('taggers/averaged_perceptron_tagger_eng')
|
||||
logger.info('nltk模块预热完成(读取本地缓存)')
|
||||
except:
|
||||
# 如果找不到,则尝试下载
|
||||
try:
|
||||
logger.info(f'模块预热: nltk punkt (从 Github 下载部分文件到 {target})')
|
||||
nltk.download('punkt', download_dir=target)
|
||||
from shared_utils.nltk_downloader import Downloader
|
||||
_downloader = Downloader()
|
||||
_downloader.download('punkt', download_dir=target)
|
||||
_downloader.download('punkt_tab', download_dir=target)
|
||||
_downloader.download('averaged_perceptron_tagger_eng', download_dir=target)
|
||||
logger.info('nltk模块预热完成')
|
||||
except Exception:
|
||||
logger.exception('模块预热: nltk punkt 失败,可能需要手动安装 nltk punkt')
|
||||
|
||||
2561
shared_utils/nltk_downloader.py
普通文件
2561
shared_utils/nltk_downloader.py
普通文件
文件差异内容过多而无法显示
加载差异
在新工单中引用
屏蔽一个用户