镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-08 07:26:48 +00:00
file dynamic load
这个提交包含在:
@@ -0,0 +1,74 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
|
||||
def start_with_url(inputs:str):
|
||||
"""
|
||||
检查输入是否以 http:// 或 https:// 开头,且为有效的网址
|
||||
"""
|
||||
if not ("http://" in inputs or "https://" in inputs):
|
||||
return False
|
||||
try:
|
||||
text = text.strip(',.!?,。!? \t\n\r')
|
||||
words = text.split()
|
||||
if len(words) != 1:
|
||||
return False
|
||||
from urllib.parse import urlparse
|
||||
result = urlparse(text)
|
||||
return all([result.scheme, result.netloc])
|
||||
except:
|
||||
return False
|
||||
|
||||
def load_web_content(inputs:str, chatbot_with_cookie, history:list):
|
||||
from crazy_functions.doc_fns.read_fns.web_reader import WebTextExtractor, WebExtractorConfig
|
||||
from toolbox import update_ui
|
||||
|
||||
extractor = WebTextExtractor(WebExtractorConfig())
|
||||
try:
|
||||
# 添加正在处理的提示信息
|
||||
chatbot_with_cookie.append([None, "正在提取网页内容,请稍作等待..."])
|
||||
yield from update_ui(chatbot=chatbot_with_cookie, history=history)
|
||||
web_content = extractor.extract_text(inputs)
|
||||
# 移除提示信息
|
||||
chatbot_with_cookie.pop()
|
||||
# 显示提取的内容
|
||||
chatbot_with_cookie.append([None, f"网页{inputs}的文本内容如下:" + web_content])
|
||||
history.extend([f"网页{inputs}的文本内容如下:" + web_content])
|
||||
yield from update_ui(chatbot=chatbot_with_cookie, history=history)
|
||||
except Exception as e:
|
||||
# 如果出错,移除提示信息(如果存在)
|
||||
if len(chatbot_with_cookie) > 0 and chatbot_with_cookie[-1][-1] == "正在提取网页内容,请稍作等待...":
|
||||
chatbot_with_cookie.pop()
|
||||
chatbot_with_cookie.append([inputs, f"网页内容提取失败: {str(e)}"])
|
||||
yield from update_ui(chatbot=chatbot_with_cookie, history=history)
|
||||
|
||||
def extract_file_path(text):
|
||||
# 匹配以 private_upload 开头,包含时间戳格式的路径
|
||||
pattern = r'(private_upload/[^\s]+?/\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2})'
|
||||
match = re.search(pattern, text)
|
||||
if match and os.path.exists(match.group(1)):
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
def contain_uploaded_files(inputs: str):
|
||||
file_path_match = extract_file_path(inputs)
|
||||
if file_path_match:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def load_uploaded_files(inputs, method, llm_kwargs, plugin_kwargs, chatbot_with_cookie, history, system_prompt, stream, additional_fn):
|
||||
# load file
|
||||
from crazy_functions.doc_fns.text_content_loader import TextContentLoader
|
||||
file_path = extract_file_path(inputs)
|
||||
loader = TextContentLoader(chatbot_with_cookie, history)
|
||||
yield from loader.execute(file_path)
|
||||
|
||||
# get question
|
||||
original_question = inputs.replace(file_path, '').strip()
|
||||
if not original_question:
|
||||
original_question = f"请简单分析上述文件内容"
|
||||
else:
|
||||
original_question = f"基于上述文件内容,{original_question}"
|
||||
|
||||
return original_question
|
||||
在新工单中引用
屏蔽一个用户