* update welcome svg

* fix loading chatglm3 (#1937)

* update welcome svg

* update welcome message

* fix loading chatglm3

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>
Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com>

* begin rag project with llama index

* rag version one

* rag beta release

* add social worker (proto)

* fix llamaindex version

---------

Co-authored-by: moetayuko <loli@yuko.moe>
这个提交包含在:
binary-husky
2024-09-08 23:20:42 +08:00
提交者 GitHub
父节点 16f4fd636e
当前提交 dd66ca26f7
共有 19 个文件被更改,包括 1103 次插入12 次删除

查看文件

@@ -18,7 +18,7 @@ class GetGLM3Handle(LocalLLMHandle):
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
from transformers import AutoModel, AutoTokenizer
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
import os, glob
import os
import platform
@@ -45,15 +45,13 @@ class GetGLM3Handle(LocalLLMHandle):
chatglm_model = AutoModel.from_pretrained(
pretrained_model_name_or_path=_model_name_,
trust_remote_code=True,
device="cuda",
load_in_4bit=True,
quantization_config=BitsAndBytesConfig(load_in_4bit=True),
)
elif LOCAL_MODEL_QUANT == "INT8": # INT8
chatglm_model = AutoModel.from_pretrained(
pretrained_model_name_or_path=_model_name_,
trust_remote_code=True,
device="cuda",
load_in_8bit=True,
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
)
else:
chatglm_model = AutoModel.from_pretrained(

查看文件

@@ -0,0 +1,40 @@
import tiktoken, copy, re
from functools import lru_cache
from concurrent.futures import ThreadPoolExecutor
from toolbox import get_conf, trimmed_format_exc, apply_gpt_academic_string_mask, read_one_api_model_name
# Endpoint 重定向
API_URL_REDIRECT, AZURE_ENDPOINT, AZURE_ENGINE = get_conf("API_URL_REDIRECT", "AZURE_ENDPOINT", "AZURE_ENGINE")
openai_endpoint = "https://api.openai.com/v1/chat/completions"
if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
if openai_endpoint in API_URL_REDIRECT: openai_endpoint = API_URL_REDIRECT[openai_endpoint]
openai_embed_endpoint = openai_endpoint.replace("chat/completions", "embeddings")
from .openai_embed import OpenAiEmbeddingModel
embed_model_info = {
# text-embedding-3-small Increased performance over 2nd generation ada embedding model | 1,536
"text-embedding-3-small": {
"embed_class": OpenAiEmbeddingModel,
"embed_endpoint": openai_embed_endpoint,
"embed_dimension": 1536,
},
# text-embedding-3-large Most capable embedding model for both english and non-english tasks | 3,072
"text-embedding-3-large": {
"embed_class": OpenAiEmbeddingModel,
"embed_endpoint": openai_embed_endpoint,
"embed_dimension": 3072,
},
# text-embedding-ada-002 Most capable 2nd generation embedding model, replacing 16 first generation models | 1,536
"text-embedding-ada-002": {
"embed_class": OpenAiEmbeddingModel,
"embed_endpoint": openai_embed_endpoint,
"embed_dimension": 1536,
},
}

查看文件

@@ -0,0 +1,79 @@
from llama_index.embeddings.openai import OpenAIEmbedding
from openai import OpenAI
from toolbox import get_conf
from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder, ProxyNetworkActivate
from shared_utils.key_pattern_manager import select_api_key_for_embed_models
from typing import List, Any
import numpy as np
def mean_agg(embeddings):
"""Mean aggregation for embeddings."""
return np.array(embeddings).mean(axis=0).tolist()
class EmbeddingModel():
def get_agg_embedding_from_queries(
self,
queries: List[str],
agg_fn = None,
):
"""Get aggregated embedding from multiple queries."""
query_embeddings = [self.get_query_embedding(query) for query in queries]
agg_fn = agg_fn or mean_agg
return agg_fn(query_embeddings)
def get_text_embedding_batch(
self,
texts: List[str],
show_progress: bool = False,
):
return self.compute_embedding(texts, batch_mode=True)
class OpenAiEmbeddingModel(EmbeddingModel):
def __init__(self, llm_kwargs:dict=None):
self.llm_kwargs = llm_kwargs
def get_query_embedding(self, query: str):
return self.compute_embedding(query)
def compute_embedding(self, text="这是要计算嵌入的文本", llm_kwargs:dict=None, batch_mode=False):
from .bridge_all_embed import embed_model_info
# load kwargs
if llm_kwargs is None:
llm_kwargs = self.llm_kwargs
if llm_kwargs is None:
raise RuntimeError("llm_kwargs is not provided!")
# setup api and req url
api_key = select_api_key_for_embed_models(llm_kwargs['api_key'], llm_kwargs['embed_model'])
embed_model = llm_kwargs['embed_model']
base_url = embed_model_info[llm_kwargs['embed_model']]['embed_endpoint'].replace('embeddings', '')
# send and compute
with ProxyNetworkActivate("Connect_OpenAI_Embedding"):
self.oai_client = OpenAI(api_key=api_key, base_url=base_url)
if batch_mode:
input = text
assert isinstance(text, list)
else:
input = [text]
assert isinstance(text, str)
res = self.oai_client.embeddings.create(input=input, model=embed_model)
# parse result
if batch_mode:
embedding = [d.embedding for d in res.data]
else:
embedding = res.data[0].embedding
return embedding
def embedding_dimension(self, llm_kwargs):
from .bridge_all_embed import embed_model_info
return embed_model_info[llm_kwargs['embed_model']]['embed_dimension']
if __name__ == "__main__":
pass