添加历史存档读取的功能

2025-12-06 22:46:48 +00:00 · 2023-04-29 00:00:26 +08:00
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -563,3 +563,46 @@ def read_and_clean_pdf_text(fp):
        #    print亮绿('***************************')

    return meta_txt, page_one_meta
+
+
+def get_files_from_everything(txt, type): # type='.md'
+    """
+    这个函数是用来获取指定目录下所有指定类型（如.md）的文件，并且对于网络上的文件，也可以获取它。
+    下面是对每个参数和返回值的说明：
+    参数 
+    - txt: 路径或网址，表示要搜索的文件或者文件夹路径或网络上的文件。 
+    - type: 字符串，表示要搜索的文件类型。默认是.md。
+    返回值 
+    - success: 布尔值，表示函数是否成功执行。 
+    - file_manifest: 文件路径列表，里面包含以指定类型为后缀名的所有文件的绝对路径。 
+    - project_folder: 字符串，表示文件所在的文件夹路径。如果是网络上的文件，就是临时文件夹的路径。
+    该函数详细注释已添加，请确认是否满足您的需要。
+    """
+    import glob, os
+
+    success = True
+    if txt.startswith('http'):
+        # 网络的远程文件
+        import requests
+        from toolbox import get_conf
+        proxies, = get_conf('proxies')
+        r = requests.get(txt, proxies=proxies)
+        with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content)
+        project_folder = './gpt_log/'
+        file_manifest = ['./gpt_log/temp'+type]
+    elif txt.endswith(type):
+        # 直接给定文件
+        file_manifest = [txt]
+        project_folder = os.path.dirname(txt)
+    elif os.path.exists(txt):
+        # 本地路径，递归搜索
+        project_folder = txt
+        file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
+        if len(file_manifest) == 0:
+            success = False
+    else:
+        project_folder = None
+        file_manifest = []
+        success = False
+
+    return success, file_manifest, project_folder