file dynamic load

这个提交包含在:
binary-husky
2025-07-13 02:51:14 +08:00
父节点 a49085088c
当前提交 55607cbe8b
共有 6 个文件被更改,包括 583 次插入15 次删除

查看文件

@@ -1,7 +1,6 @@
import os
supports_format = ['.csv', '.docx', '.epub', '.ipynb', '.mbox', '.md', '.pdf', '.txt', '.ppt',
'.pptm', '.pptx']
supports_format = ['.csv', '.docx', '.epub', '.ipynb', '.mbox', '.md', '.pdf', '.txt', '.ppt', '.pptm', '.pptx', '.bat']
# 修改后的 extract_text 函数,结合 SimpleDirectoryReader 和自定义解析逻辑
@@ -13,10 +12,12 @@ def extract_text(file_path):
if ext in supports_format:
try:
reader = SimpleDirectoryReader(input_files=[file_path])
print(f"Extracting text from {file_path} using SimpleDirectoryReader")
documents = reader.load_data()
if len(documents) > 0:
return documents[0].text
print(f"Complete: Extracting text from {file_path} using SimpleDirectoryReader")
buffer = [ doc.text for doc in documents ]
return '\n'.join(buffer)
except Exception as e:
pass
return None
else:
return '格式不支持'