From bfa72fb4cf74069de10b25c843d7692367ec6b0e Mon Sep 17 00:00:00 2001 From: lbykkkk Date: Sat, 9 Nov 2024 14:59:47 +0800 Subject: [PATCH] up --- crazy_functions/Arxiv_论文对话.py | 65 ++++ .../rag_essay_fns/paper_processing.py | 312 ++++++++++++++++++ crazy_functions/rag_essay_fns/rag_handler.py | 85 +++++ instruction.txt | 192 +++++++++++ 4 files changed, 654 insertions(+) create mode 100644 crazy_functions/Arxiv_论文对话.py create mode 100644 crazy_functions/rag_essay_fns/paper_processing.py create mode 100644 crazy_functions/rag_essay_fns/rag_handler.py create mode 100644 instruction.txt diff --git a/crazy_functions/Arxiv_论文对话.py b/crazy_functions/Arxiv_论文对话.py new file mode 100644 index 00000000..be448319 --- /dev/null +++ b/crazy_functions/Arxiv_论文对话.py @@ -0,0 +1,65 @@ +from toolbox import CatchException, update_ui +from crazy_functions.rag_essay_fns.paper_processing import ArxivPaperProcessor +from crazy_functions.rag_essay_fns.rag_handler import RagHandler +import asyncio + +@CatchException +def Rag论文对话(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt: 用户输入,通常是arxiv论文链接 + 功能:RAG论文总结和对话 + """ + # 初始化处理器 + processor = ArxivPaperProcessor() + rag_handler = RagHandler() + + # Step 1: 下载和提取论文 + download_result = processor.download_and_extract(txt, chatbot, history) + project_folder, arxiv_id = None, None + + for result in download_result: + if isinstance(result, tuple) and len(result) == 2: + project_folder, arxiv_id = result + break + + if not project_folder or not arxiv_id: + return + + # Step 2: 合并TEX文件 + paper_content = processor.merge_tex_files(project_folder, chatbot, history) + if not paper_content: + return + + # Step 3: RAG处理 + chatbot.append(["正在构建知识图谱...", "处理中..."]) + yield from update_ui(chatbot=chatbot, history=history) + + # 处理论文内容 + rag_handler.process_paper_content(paper_content) + + # 生成初始摘要 + summary = rag_handler.query("请总结这篇论文的主要内容,包括研究目的、方法、结果和结论。") + chatbot.append(["论文摘要", summary]) + yield from update_ui(chatbot=chatbot, history=history) + + # 交互式问答 + chatbot.append(["知识图谱构建完成", "您可以开始提问了。支持以下类型的问题:\n1. 论文的具体内容\n2. 研究方法的细节\n3. 实验结果分析\n4. 与其他工作的比较"]) + yield from update_ui(chatbot=chatbot, history=history) + + # 等待用户提问并回答 + while True: + question = yield from wait_user_input() + if not question: + break + + # 根据问题类型选择不同的查询模式 + if "比较" in question or "关系" in question: + mode = "global" # 使用全局模式处理比较类问题 + elif "具体" in question or "细节" in question: + mode = "local" # 使用局部模式处理细节问题 + else: + mode = "hybrid" # 默认使用混合模式 + + response = rag_handler.query(question, mode=mode) + chatbot.append([question, response]) + yield from update_ui(chatbot=chatbot, history=history) \ No newline at end of file diff --git a/crazy_functions/rag_essay_fns/paper_processing.py b/crazy_functions/rag_essay_fns/paper_processing.py new file mode 100644 index 00000000..5e60dc67 --- /dev/null +++ b/crazy_functions/rag_essay_fns/paper_processing.py @@ -0,0 +1,312 @@ +from typing import Tuple, Optional, Generator, List +from toolbox import update_ui, update_ui_lastest_msg, get_conf +import os, tarfile, requests, time, re + +class ArxivPaperProcessor: + """Arxiv论文处理器类""" + + def __init__(self): + self.supported_encodings = ['utf-8', 'latin1', 'gbk', 'gb2312', 'ascii'] + self.arxiv_cache_dir = get_conf("ARXIV_CACHE_DIR") + + def download_and_extract(self, txt: str, chatbot, history) -> Generator[Optional[Tuple[str, str]], None, None]: + """ + Step 1: 下载和提取arxiv论文 + 返回: 生成器: (project_folder, arxiv_id) + """ + try: + if txt == "": + chatbot.append(("", "请输入arxiv论文链接或ID")) + yield from update_ui(chatbot=chatbot, history=history) + return + + project_folder, arxiv_id = self.arxiv_download(txt, chatbot, history) + if project_folder is None or arxiv_id is None: + return + + if not os.path.exists(project_folder): + chatbot.append((txt, f"找不到项目文件夹: {project_folder}")) + yield from update_ui(chatbot=chatbot, history=history) + return + + # 期望的返回值 + yield project_folder, arxiv_id + + except Exception as e: + print(e) + # yield from update_ui_lastest_msg( + # "下载失败,请手动下载latex源码:请前往arxiv打开此论文下载页面,点other Formats,然后download source。", + # chatbot=chatbot, history=history) + return + + def arxiv_download(self, txt: str, chatbot, history) -> Tuple[str, str]: + """ + 下载arxiv论文并提取 + 返回: (project_folder, arxiv_id) + """ + def is_float(s: str) -> bool: + try: + float(s) + return True + except ValueError: + return False + + if txt.startswith('https://arxiv.org/pdf/'): + arxiv_id = txt.split('/')[-1] # 2402.14207v2.pdf + txt = arxiv_id.split('v')[0] # 2402.14207 + + if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID + txt = 'https://arxiv.org/abs/' + txt.strip() + if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID + txt = 'https://arxiv.org/abs/' + txt[:10] + + if not txt.startswith('https://arxiv.org'): + chatbot.append((txt, "不是有效的arxiv链接或ID")) + # yield from update_ui(chatbot=chatbot, history=history) + return None, None # 返回两个值,即使其中一个为None + + chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...']) + # yield from update_ui(chatbot=chatbot, history=history) + + url_ = txt # https://arxiv.org/abs/1707.06690 + + if not txt.startswith('https://arxiv.org/abs/'): + msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}。" + # yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面 + return None, None # 返回两个值,即使其中一个为None + + arxiv_id = url_.split('/')[-1].split('v')[0] + + dst = os.path.join(self.arxiv_cache_dir, arxiv_id, f'{arxiv_id}.tar.gz') + project_folder = os.path.join(self.arxiv_cache_dir, arxiv_id) + + success = self.download_arxiv_paper(url_, dst, chatbot, history) + + # if os.path.exists(dst) and get_conf('allow_cache'): + # # yield from update_ui_lastest_msg(f"调用缓存 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面 + # success = True + # else: + # # yield from update_ui_lastest_msg(f"开始下载 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面 + # success = self.download_arxiv_paper(url_, dst, chatbot, history) + # # yield from update_ui_lastest_msg(f"下载完成 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面 + + if not success: + # chatbot.append([f"下载失败 {arxiv_id}", ""]) + # yield from update_ui(chatbot=chatbot, history=history) + raise tarfile.ReadError(f"论文下载失败 {arxiv_id}") + + # yield from update_ui_lastest_msg(f"开始解压 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面 + extract_dst = self.extract_tar_file(dst, project_folder, chatbot, history) + # yield from update_ui_lastest_msg(f"解压完成 {arxiv_id}", chatbot=chatbot, history=history) # 刷新界面 + + return extract_dst, arxiv_id + + def download_arxiv_paper(self, url_: str, dst: str, chatbot, history) -> bool: + """下载arxiv论文""" + try: + proxies = get_conf('proxies') + for url_tar in [url_.replace('/abs/', '/src/'), url_.replace('/abs/', '/e-print/')]: + r = requests.get(url_tar, proxies=proxies) + if r.status_code == 200: + with open(dst, 'wb+') as f: + f.write(r.content) + return True + return False + except requests.RequestException as e: + # chatbot.append((f"下载失败 {url_}", str(e))) + # yield from update_ui(chatbot=chatbot, history=history) + return False + + def extract_tar_file(self, file_path: str, dest_dir: str, chatbot, history) -> str: + """解压arxiv论文""" + try: + with tarfile.open(file_path, 'r:gz') as tar: + tar.extractall(path=dest_dir) + return dest_dir + except tarfile.ReadError as e: + chatbot.append((f"解压失败 {file_path}", str(e))) + yield from update_ui(chatbot=chatbot, history=history) + raise e + + def find_main_tex_file(self, tex_files: list) -> str: + """查找主TEX文件""" + for tex_file in tex_files: + with open(tex_file, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + if r'\documentclass' in content: + return tex_file + return max(tex_files, key=lambda x: os.path.getsize(x)) + + def read_file_with_encoding(self, file_path: str) -> Optional[str]: + """使用多种编码尝试读取文件""" + for encoding in self.supported_encodings: + try: + with open(file_path, 'r', encoding=encoding) as f: + return f.read() + except UnicodeDecodeError: + continue + return None + + def process_tex_content(self, content: str, base_path: str, processed_files=None) -> str: + """处理TEX内容,包括递归处理包含的文件""" + if processed_files is None: + processed_files = set() + + include_patterns = [ + r'\\input{([^}]+)}', + r'\\include{([^}]+)}', + r'\\subfile{([^}]+)}', + r'\\input\s+([^\s{]+)', + ] + + for pattern in include_patterns: + matches = re.finditer(pattern, content) + for match in matches: + include_file = match.group(1) + if not include_file.endswith('.tex'): + include_file += '.tex' + + include_path = os.path.join(base_path, include_file) + include_path = os.path.normpath(include_path) + + if include_path in processed_files: + continue + processed_files.add(include_path) + + if os.path.exists(include_path): + included_content = self.read_file_with_encoding(include_path) + if included_content: + included_content = self.process_tex_content( + included_content, + os.path.dirname(include_path), + processed_files + ) + content = content.replace(match.group(0), included_content) + + return content + + def merge_tex_files(self, folder_path: str, chatbot, history) -> Optional[str]: + """ + Step 2: 合并TEX文件 + 返回: 合并后的内容 + """ + try: + tex_files = [] + for root, _, files in os.walk(folder_path): + tex_files.extend([os.path.join(root, f) for f in files if f.endswith('.tex')]) + + if not tex_files: + chatbot.append(("", "未找到任何TEX文件")) + yield from update_ui(chatbot=chatbot, history=history) + return None + + main_tex_file = self.find_main_tex_file(tex_files) + chatbot.append(("", f"找到主TEX文件:{os.path.basename(main_tex_file)}")) + yield from update_ui(chatbot=chatbot, history=history) + + tex_content = self.read_file_with_encoding(main_tex_file) + if tex_content is None: + chatbot.append(("", "无法读取TEX文件,可能是编码问题")) + yield from update_ui(chatbot=chatbot, history=history) + return None + + full_content = self.process_tex_content( + tex_content, + os.path.dirname(main_tex_file) + ) + + cleaned_content = self.clean_tex_content(full_content) + + chatbot.append(("", + f"成功处理所有TEX文件:\n" + f"- 原始内容大小:{len(full_content)}字符\n" + f"- 清理后内容大小:{len(cleaned_content)}字符" + )) + yield from update_ui(chatbot=chatbot, history=history) + + # 添加标题和摘要提取 + title = "" + abstract = "" + if tex_content: + # 提取标题 + title_match = re.search(r'\\title{([^}]*)}', tex_content) + if title_match: + title = title_match.group(1) + + # 提取摘要 + abstract_match = re.search(r'\\begin{abstract}(.*?)\\end{abstract}', + tex_content, re.DOTALL) + if abstract_match: + abstract = abstract_match.group(1) + + # 按token限制分段 + def split_by_token_limit(text: str, token_limit: int = 1024) -> List[str]: + segments = [] + current_segment = [] + current_tokens = 0 + + for line in text.split('\n'): + line_tokens = len(line.split()) + if current_tokens + line_tokens > token_limit: + segments.append('\n'.join(current_segment)) + current_segment = [line] + current_tokens = line_tokens + else: + current_segment.append(line) + current_tokens += line_tokens + + if current_segment: + segments.append('\n'.join(current_segment)) + + return segments + + text_segments = split_by_token_limit(cleaned_content) + + return { + 'title': title, + 'abstract': abstract, + 'segments': text_segments + } + + except Exception as e: + chatbot.append(("", f"处理TEX文件时发生错误:{str(e)}")) + yield from update_ui(chatbot=chatbot, history=history) + return None + + @staticmethod + def clean_tex_content(content: str) -> str: + """清理TEX内容""" + content = re.sub(r'(?m)%.*$', '', content) # 移除注释 + content = re.sub(r'\\cite{[^}]*}', '', content) # 移除引用 + content = re.sub(r'\\label{[^}]*}', '', content) # 移除标签 + content = re.sub(r'\s+', ' ', content) # 规范化空白 + return content.strip() + +if __name__ == "__main__": + # 测试 arxiv_download 函数 + processor = ArxivPaperProcessor() + chatbot = [] + history = [] + + # 测试不同格式的输入 + test_inputs = [ + "https://arxiv.org/abs/2402.14207", # 标准格式 + "https://arxiv.org/pdf/2402.14207.pdf", # PDF链接格式 + "2402.14207", # 纯ID格式 + "2402.14207v1", # 带版本号的ID格式 + "https://invalid.url", # 无效URL测试 + ] + + for input_url in test_inputs: + print(f"\n测试输入: {input_url}") + try: + project_folder, arxiv_id = processor.arxiv_download(input_url, chatbot, history) + if project_folder and arxiv_id: + print(f"下载成功:") + print(f"- 项目文件夹: {project_folder}") + print(f"- Arxiv ID: {arxiv_id}") + print(f"- 文件夹是否存在: {os.path.exists(project_folder)}") + else: + print("下载失败: 返回值为 None") + except Exception as e: + print(f"发生错误: {str(e)}") diff --git a/crazy_functions/rag_essay_fns/rag_handler.py b/crazy_functions/rag_essay_fns/rag_handler.py new file mode 100644 index 00000000..89631dae --- /dev/null +++ b/crazy_functions/rag_essay_fns/rag_handler.py @@ -0,0 +1,85 @@ +from typing import Dict, List, Optional +from lightrag import LightRAG, QueryParam +from lightrag.utils import EmbeddingFunc +import numpy as np +import os +from toolbox import get_conf +import openai + +class RagHandler: + def __init__(self): + # 初始化工作目录 + self.working_dir = os.path.join(get_conf('ARXIV_CACHE_DIR'), 'rag_cache') + if not os.path.exists(self.working_dir): + os.makedirs(self.working_dir) + + # 初始化 LightRAG + self.rag = LightRAG( + working_dir=self.working_dir, + llm_model_func=self._llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=1536, # OpenAI embedding 维度 + max_token_size=8192, + func=self._embedding_func, + ), + ) + + async def _llm_model_func(self, prompt: str, system_prompt: str = None, + history_messages: List = None, **kwargs) -> str: + """LLM 模型函数""" + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + if history_messages: + messages.extend(history_messages) + messages.append({"role": "user", "content": prompt}) + + response = await openai.ChatCompletion.acreate( + model="gpt-3.5-turbo", + messages=messages, + temperature=kwargs.get("temperature", 0), + max_tokens=kwargs.get("max_tokens", 1000) + ) + return response.choices[0].message.content + + async def _embedding_func(self, texts: List[str]) -> np.ndarray: + """Embedding 函数""" + response = await openai.Embedding.acreate( + model="text-embedding-ada-002", + input=texts + ) + embeddings = [item["embedding"] for item in response["data"]] + return np.array(embeddings) + + def process_paper_content(self, paper_content: Dict) -> None: + """处理论文内容,构建知识图谱""" + # 处理标题和摘要 + content_list = [] + if paper_content['title']: + content_list.append(f"Title: {paper_content['title']}") + if paper_content['abstract']: + content_list.append(f"Abstract: {paper_content['abstract']}") + + # 添加分段内容 + content_list.extend(paper_content['segments']) + + # 插入到 RAG 系统 + self.rag.insert(content_list) + + def query(self, question: str, mode: str = "hybrid") -> str: + """查询论文内容 + mode: 查询模式,可选 naive/local/global/hybrid + """ + try: + response = self.rag.query( + question, + param=QueryParam( + mode=mode, + top_k=5, # 返回相关度最高的5个结果 + max_token_for_text_unit=2048, # 每个文本单元的最大token数 + response_type="detailed" # 返回详细回答 + ) + ) + return response + except Exception as e: + return f"查询出错: {str(e)}" \ No newline at end of file diff --git a/instruction.txt b/instruction.txt new file mode 100644 index 00000000..64e8221f --- /dev/null +++ b/instruction.txt @@ -0,0 +1,192 @@ + +1、GPT Academic 项目结构 +. +├── Dockerfile +├── LICENSE +├── README.md +├── check_proxy.py +├── config.py +├── config_private.py +├── core_functional.py +├── crazy_functional.py +├── crazy_functions +│ ├── Arxiv_论文对话.py +│ ├── Conversation_To_File.py +│ ├── Image_Generate.py +│ ├── Image_Generate_Wrap.py +│ ├── Internet_GPT.py +│ ├── Internet_GPT_Wrap.py +│ ├── Latex_Function.py +│ ├── Latex_Function_Wrap.py +│ ├── Latex全文润色.py +│ ├── Latex全文翻译.py +│ ├── Markdown_Translate.py +│ ├── PDF_Translate.py +│ ├── PDF_Translate_Wrap.py +│ ├── Rag_Interface.py +│ ├── Social_Helper.py +│ ├── SourceCode_Analyse.py +│ ├── SourceCode_Comment.py +│ ├── SourceCode_Comment_Wrap.py +│ ├── __init__.py +│ │ ├── auto_agent.py +│ │ ├── echo_agent.py +│ │ ├── general.py +│ │ ├── persistent.py +│ │ ├── pipe.py +│ │ ├── python_comment_agent.py +│ │ ├── python_comment_compare.html +│ │ └── watchdog.py +│ ├── ast_fns +│ │ └── comment_remove.py +│ ├── chatglm微调工具.py +│ ├── crazy_utils.py +│ ├── diagram_fns +│ │ └── file_tree.py +│ ├── game_fns +│ │ ├── game_ascii_art.py +│ │ ├── game_interactive_story.py +│ │ └── game_utils.py +│ ├── gen_fns +│ │ └── gen_fns_shared.py +│ ├── ipc_fns +│ │ └── mp.py +│ ├── json_fns +│ │ ├── pydantic_io.py +│ │ └── select_tool.py +│ ├── latex_fns +│ │ ├── latex_actions.py +│ │ ├── latex_pickle_io.py +│ │ └── latex_toolbox.py +│ ├── live_audio +│ │ ├── aliyunASR.py +│ │ └── audio_io.py +│ ├── multi_stage +│ │ └── multi_stage_utils.py +│ ├── rag_essay_fns +│ │ └── multi_stage_utils.py +│ ├── pdf_fns +│ │ ├── breakdown_txt.py +│ │ ├── parse_pdf.py +│ │ ├── parse_pdf_grobid.py +│ │ ├── parse_pdf_legacy.py +│ │ ├── parse_pdf_via_doc2x.py +│ │ ├── parse_word.py +│ │ ├── report_gen_html.py +│ │ ├── report_template.html +│ │ └── report_template_v2.html +│ ├── plugin_template +│ │ └── plugin_class_template.py +│ ├── prompts +│ │ └── internet.py +│ ├── rag_fns +│ │ ├── llama_index_worker.py +│ │ ├── milvus_worker.py +│ │ ├── rag_file_support.py +│ │ └── vector_store_index.py +│ ├── vector_fns +│ │ ├── __init__.py +│ │ ├── general_file_loader.py +│ │ └── vector_database.py +│ ├── vt_fns +│ │ ├── vt_call_plugin.py +│ │ ├── vt_modify_config.py +│ │ └── vt_state.py +│ ├── 下载arxiv论文翻译摘要.py +│ ├── 互动小游戏.py +│ ├── 交互功能函数模板.py +│ ├── 函数动态生成.py +│ ├── 命令行助手.py +│ ├── 多智能体.py +│ ├── 总结word文档.py +│ ├── 总结音视频.py +│ ├── 批量总结PDF文档.py +│ ├── 批量总结PDF文档pdfminer.py +│ ├── 批量翻译PDF文档_NOUGAT.py +│ ├── 数学动画生成manim.py +│ ├── 理解PDF文档内容.py +│ ├── 生成函数注释.py +│ ├── 生成多种Mermaid图表.py +│ ├── 知识库问答.py +│ ├── 联网的ChatGPT.py +│ ├── 联网的ChatGPT_bing版.py +│ ├── 虚空终端.py +│ ├── 解析JupyterNotebook.py +│ ├── 询问多个大语言模型.py +│ ├── 语音助手.py +│ ├── 读文章写摘要.py +│ ├── 谷歌检索小助手.py +│ ├── 辅助功能.py +│ └── 高级功能函数模板.py +├── docker-compose.yml +├── instruction.txt +├── main.py +├── multi_language.py +├── requirements.txt +├── shared_utils +│ ├── advanced_markdown_format.py +│ ├── char_visual_effect.py +│ ├── colorful.py +│ ├── config_loader.py +│ ├── connect_void_terminal.py +│ ├── cookie_manager.py +│ ├── fastapi_server.py +│ ├── handle_upload.py +│ ├── key_pattern_manager.py +│ ├── logging.py +│ ├── map_names.py +│ └── text_mask.py +├── toolbox.py +└── version + +2、light_rag的实现方案路径为crazy_functions/rag_fns/LightRAG,主要功能实现文件为operate.py,rag使用到的其他文件为prompt.py、base.py、storage.py、utils.py,请参考实现方案实现插件功能。light_rag的使用案例可以参考crazy_functions/rag_fns/LightRAG/examples路径下的lightrag_hf_demo.py、lightrag_lmdeploy_demo.py: +路径目录结构为 + +├── README.md +├── examples +│   ├── batch_eval.py +│   ├── generate_query.py +│   ├── graph_visual_with_html.py +│   ├── graph_visual_with_neo4j.py +│   ├── lightrag_azure_openai_demo.py +│   ├── lightrag_bedrock_demo.py +│   ├── lightrag_hf_demo.py +│   ├── lightrag_ollama_demo.py +│   ├── lightrag_openai_compatible_demo.py +│   ├── lightrag_openai_demo.py +│   └── vram_management_demo.py +├── lightrag +│   ├── __init__.py +│   ├── base.py +│   ├── lightrag.py +│   ├── llm.py +│   ├── operate.py +│   ├── prompt.py +│   ├── storage.py +│   └── utils.py +├── reproduce +│   ├── Step_0.py +│   ├── Step_1.py +│   ├── Step_1_openai_compatible.py +│   ├── Step_2.py +│   ├── Step_3.py +│   └── Step_3_openai_compatible.py +├── requirements.txt +└── setup.py + + +3、我需要开发一个rag插件,请帮我实现一个插件,插件的名称是rag论文总结,插件主入口在crazy_functions/Arxiv_论文对话.py中的Rag论文对话函数,插件的功能步骤分为文件处理和RAG两个步骤 + 文件处理步骤流程和要求按顺序如下,请参考gpt_academic已实现的功能复用现有函数即可: + a. 支持从 arXiv 下载论文源码、检查本地项目路径、扫描 .tex 文件,此步骤可参考crazy_functions/Latex_Function.py。 + b、在项目中找到主要的 LaTeX 文件,将多个 TEX 文件合并成一个大的 TEX 文件,便于统一处理,此步骤可参考crazy_functions/Latex_Function.py。 + c、将合并后的文档进行精细切分,包括读取标题和摘要,此步骤可参考crazy_functions/Latex_Function.py。 + d、将文档按照 token 限制(1024)进行进一步分段,此步骤可参考crazy_functions/Latex_Function.py。 + +3、对于RAG,我希望采用light_rag的方案,参考已有方案其主要的功能实现是: +主要功能包括: + e 参考- `chunking_by_token_size`,利用`_handle_entity_relation_summary`函数对d步骤生成的文本块进行实体或关系的摘要。 + f 利用`_handle_single_entity_extraction` 和 `_handle_single_relationship_extraction`:从记录中提取单个实体或关系信息。 + g `_merge_nodes_then_upsert` 和 `_merge_edges_then_upsert`:合并并插入节点或边。 + h `extract_entities`:处理多个文本块,提取实体和关系,并存储在知识图谱和向量数据库中。 + i `local_query`:根据查询提取关键词并生成响应。 +