Frontier (#1958)

* update welcome svg * fix loading chatglm3 (#1937) * update welcome svg * update welcome message * fix loading chatglm3 --------- Co-authored-by: binary-husky <qingxu.fu@outlook.com> Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com> * begin rag project with llama index * rag version one * rag beta release * add social worker (proto) * fix llamaindex version --------- Co-authored-by: moetayuko <loli@yuko.moe>
2025-12-06 14:36:48 +00:00 · 2024-09-08 23:20:42 +08:00
--- a/request_llms/bridge_chatglm3.py
+++ b/request_llms/bridge_chatglm3.py
@@ -18,7 +18,7 @@ class GetGLM3Handle(LocalLLMHandle):

    def load_model_and_tokenizer(self):
        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
-        from transformers import AutoModel, AutoTokenizer
+        from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
        import os, glob
        import os
        import platform
@@ -45,15 +45,13 @@ class GetGLM3Handle(LocalLLMHandle):
                chatglm_model = AutoModel.from_pretrained(
                    pretrained_model_name_or_path=_model_name_,
                    trust_remote_code=True,
-                    device="cuda",
-                    load_in_4bit=True,
+                    quantization_config=BitsAndBytesConfig(load_in_4bit=True),
                )
            elif LOCAL_MODEL_QUANT == "INT8":  # INT8
                chatglm_model = AutoModel.from_pretrained(
                    pretrained_model_name_or_path=_model_name_,
                    trust_remote_code=True,
-                    device="cuda",
-                    load_in_8bit=True,
+                    quantization_config=BitsAndBytesConfig(load_in_8bit=True),
                )
            else:
                chatglm_model = AutoModel.from_pretrained(