Merge branch 'master' into boyin_rag

这个提交包含在:
Boyin Liu
2024-10-05 23:58:43 +08:00
提交者 GitHub
当前提交 748e31102f
共有 93 个文件被更改,包括 2597 次插入799 次删除

查看文件

@@ -1,4 +1,5 @@
import atexit
from loguru import logger
from typing import List
from llama_index.core import Document
@@ -37,14 +38,14 @@ class SaveLoad():
return True
def save_to_checkpoint(self, checkpoint_dir=None):
print(f'saving vector store to: {checkpoint_dir}')
logger.info(f'saving vector store to: {checkpoint_dir}')
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
self.vs_index.storage_context.persist(persist_dir=checkpoint_dir)
def load_from_checkpoint(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
print('loading checkpoint from disk')
logger.info('loading checkpoint from disk')
from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir)
self.vs_index = load_index_from_storage(storage_context, embed_model=self.embed_model)
@@ -80,10 +81,10 @@ class LlamaIndexRagWorker(SaveLoad):
# This function is for debugging
self.vs_index.storage_context.index_store.to_dict()
docstore = self.vs_index.storage_context.docstore.docs
vector_store_preview = "\n".join([f"{_id} | {tn.text}" for _id, tn in docstore.items()])
print('\n++ --------inspect_vector_store begin--------')
print(vector_store_preview)
print('oo --------inspect_vector_store end--------')
vector_store_preview = "\n".join([ f"{_id} | {tn.text}" for _id, tn in docstore.items() ])
logger.info('\n++ --------inspect_vector_store begin--------')
logger.info(vector_store_preview)
logger.info('oo --------inspect_vector_store end--------')
return vector_store_preview
def add_documents_to_vector_store(self, document_list: List[Document]):
@@ -126,9 +127,8 @@ class LlamaIndexRagWorker(SaveLoad):
return DEFAULT_QUERY_GENERATION_PROMPT.format(context_str=context_str, query_str=query)
def generate_node_array_preview(self, nodes):
buf = "\n".join([f"(No.{i+1} | score {n.score:.3f}): {n.text}" for i, n in enumerate(nodes)])
if self.debug_mode:
print(buf)
buf = "\n".join(([f"(No.{i+1} | score {n.score:.3f}): {n.text}" for i, n in enumerate(nodes)]))
if self.debug_mode: logger.info(buf)
return buf
def purge_vector_store(self):

查看文件

@@ -2,6 +2,7 @@ import llama_index
import os
import atexit
from typing import List
from loguru import logger
from llama_index.core import Document
from llama_index.core.schema import TextNode
from request_llms.embed_models.openai_embed import OpenAiEmbeddingModel
@@ -44,14 +45,14 @@ class MilvusSaveLoad():
return True
def save_to_checkpoint(self, checkpoint_dir=None):
print(f'saving vector store to: {checkpoint_dir}')
logger.info(f'saving vector store to: {checkpoint_dir}')
# if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
# self.vs_index.storage_context.persist(persist_dir=checkpoint_dir)
def load_from_checkpoint(self, checkpoint_dir=None):
if checkpoint_dir is None: checkpoint_dir = self.checkpoint_dir
if self.does_checkpoint_exist(checkpoint_dir=checkpoint_dir):
print('loading checkpoint from disk')
logger.info('loading checkpoint from disk')
from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir=checkpoint_dir)
try:
@@ -101,7 +102,7 @@ class MilvusRagWorker(MilvusSaveLoad, LlamaIndexRagWorker):
vector_store_preview = "\n".join(
[f"{node.id_} | {node.text}" for node in dummy_retrieve_res]
)
print('\n++ --------inspect_vector_store begin--------')
print(vector_store_preview)
print('oo --------inspect_vector_store end--------')
logger.info('\n++ --------inspect_vector_store begin--------')
logger.info(vector_store_preview)
logger.info('oo --------inspect_vector_store end--------')
return vector_store_preview