rag version one

这个提交包含在:
binary-husky
2024-08-28 15:14:13 +00:00
父节点 294716c832
当前提交 08c3c56f53
共有 9 个文件被更改,包括 313 次插入79 次删除

查看文件

@@ -5,11 +5,40 @@ from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log
from shared_utils.key_pattern_manager import select_api_key_for_embed_models
from typing import List, Any
class OpenAiEmbeddingModel():
import numpy as np
def mean_agg(embeddings):
"""Mean aggregation for embeddings."""
return np.array(embeddings).mean(axis=0).tolist()
class EmbeddingModel():
def get_agg_embedding_from_queries(
self,
queries: List[str],
agg_fn = None,
):
"""Get aggregated embedding from multiple queries."""
query_embeddings = [self.get_query_embedding(query) for query in queries]
agg_fn = agg_fn or mean_agg
return agg_fn(query_embeddings)
def get_text_embedding_batch(
self,
texts: List[str],
show_progress: bool = False,
):
return self.compute_embedding(texts, batch_mode=True)
class OpenAiEmbeddingModel(EmbeddingModel):
def __init__(self, llm_kwargs:dict=None):
self.llm_kwargs = llm_kwargs
def get_query_embedding(self, query: str):
return self.compute_embedding(query)
def compute_embedding(self, text="这是要计算嵌入的文本", llm_kwargs:dict=None, batch_mode=False):
from .bridge_all_embed import embed_model_info
@@ -20,9 +49,9 @@ class OpenAiEmbeddingModel():
raise RuntimeError("llm_kwargs is not provided!")
# setup api and req url
api_key = select_api_key_for_embed_models(llm_kwargs['api_key'], llm_kwargs['llm_model'])
embed_model = llm_kwargs['llm_model']
base_url = embed_model_info[llm_kwargs['llm_model']]['embed_endpoint'].replace('embeddings', '')
api_key = select_api_key_for_embed_models(llm_kwargs['api_key'], llm_kwargs['embed_model'])
embed_model = llm_kwargs['embed_model']
base_url = embed_model_info[llm_kwargs['embed_model']]['embed_endpoint'].replace('embeddings', '')
# send and compute
with ProxyNetworkActivate("Connect_OpenAI_Embedding"):
@@ -40,21 +69,11 @@ class OpenAiEmbeddingModel():
embedding = [d.embedding for d in res.data]
else:
embedding = res.data[0].embedding
return embedding
def embedding_dimension(self, llm_kwargs):
from .bridge_all_embed import embed_model_info
return embed_model_info[llm_kwargs['llm_model']]['embed_dimension']
def get_text_embedding_batch(
self,
texts: List[str],
show_progress: bool = False,
):
return self.compute_embedding(texts, batch_mode=True)
return embed_model_info[llm_kwargs['embed_model']]['embed_dimension']
if __name__ == "__main__":
pass