fix loading chatglm3 (#1937)

* update welcome svg

* update welcome message

* fix loading chatglm3

---------

Co-authored-by: binary-husky <qingxu.fu@outlook.com>
Co-authored-by: binary-husky <96192199+binary-husky@users.noreply.github.com>
这个提交包含在:
moetayuko
2024-08-19 23:32:45 +08:00
提交者 GitHub
父节点 5010537f3c
当前提交 a95b3daab9
共有 3 个文件被更改,包括 32 次插入22 次删除

查看文件

@@ -18,7 +18,7 @@ class GetGLM3Handle(LocalLLMHandle):
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
from transformers import AutoModel, AutoTokenizer
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
import os, glob
import os
import platform
@@ -45,15 +45,13 @@ class GetGLM3Handle(LocalLLMHandle):
chatglm_model = AutoModel.from_pretrained(
pretrained_model_name_or_path=_model_name_,
trust_remote_code=True,
device="cuda",
load_in_4bit=True,
quantization_config=BitsAndBytesConfig(load_in_4bit=True),
)
elif LOCAL_MODEL_QUANT == "INT8": # INT8
chatglm_model = AutoModel.from_pretrained(
pretrained_model_name_or_path=_model_name_,
trust_remote_code=True,
device="cuda",
load_in_8bit=True,
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
)
else:
chatglm_model = AutoModel.from_pretrained(