镜像自地址
https://github.com/SCIR-HI/Med-ChatGLM.git
已同步 2025-12-06 14:46:49 +00:00
v0.1 commit
这个提交包含在:
92
configuration_chatglm.py
普通文件
92
configuration_chatglm.py
普通文件
@@ -0,0 +1,92 @@
|
||||
""" ChatGLM model configuration """
|
||||
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from transformers.utils import logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
class ChatGLMConfig(PretrainedConfig):
|
||||
r"""
|
||||
This is the configuration class to store the configuration of a [`~ChatGLMModel`].
|
||||
It is used to instantiate an ChatGLM model according to the specified arguments, defining the model
|
||||
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
|
||||
the ChatGLM-6B [THUDM/ChatGLM-6B](https://huggingface.co/THUDM/chatglm-6b) architecture.
|
||||
|
||||
Configuration objects inherit from [`PretrainedConfig`] and can be used
|
||||
to control the model outputs. Read the documentation from [`PretrainedConfig`]
|
||||
for more information.
|
||||
|
||||
|
||||
Args:
|
||||
vocab_size (`int`, *optional*, defaults to 150528):
|
||||
Vocabulary size of the ChatGLM-6B model. Defines the number of different tokens that can be represented by the
|
||||
`inputs_ids` passed when calling [`~ChatGLMModel`] or
|
||||
[`~TFChatGLMModel`].
|
||||
hidden_size (`int`, *optional*, defaults to 4096):
|
||||
Dimension of the encoder layers and the pooler layer.
|
||||
num_hidden_layers (`int`, *optional*, defaults to 28):
|
||||
Number of hidden layers in the Transformer encoder.
|
||||
num_attention_heads (`int`, *optional*, defaults to 32):
|
||||
Number of attention heads for each attention layer in the Transformer encoder.
|
||||
inner_hidden_size (`int`, *optional*, defaults to 16384):
|
||||
Dimension of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
|
||||
max_sequence_length (`int`, *optional*, defaults to 512):
|
||||
The maximum sequence length that this model might ever be used with.
|
||||
Typically set this to something large just in case (e.g., 512 or 1024 or 2048).
|
||||
layernorm_epsilon (`float`, *optional*, defaults to 1e-5):
|
||||
The epsilon used by the layer normalization layers.
|
||||
use_cache (`bool`, *optional*, defaults to `True`):
|
||||
Whether the model should return the last key/values attentions (not used by all models).
|
||||
Example:
|
||||
|
||||
```python
|
||||
>>> from configuration_chatglm import ChatGLMConfig
|
||||
>>> from modeling_chatglm import ChatGLMModel
|
||||
|
||||
>>> # Initializing a ChatGLM-6B THUDM/ChatGLM-6B style configuration
|
||||
>>> configuration = ChatGLMConfig()
|
||||
|
||||
>>> # Initializing a model from the THUDM/ChatGLM-6B style configuration
|
||||
>>> model = ChatGLMModel(configuration)
|
||||
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```
|
||||
"""
|
||||
model_type = "chatglm"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=150528,
|
||||
hidden_size=4096,
|
||||
num_layers=28,
|
||||
num_attention_heads=32,
|
||||
layernorm_epsilon=1e-5,
|
||||
use_cache=False,
|
||||
bos_token_id=150004,
|
||||
eos_token_id=150005,
|
||||
pad_token_id=0,
|
||||
max_sequence_length=2048,
|
||||
inner_hidden_size=16384,
|
||||
position_encoding_2d=True,
|
||||
**kwargs
|
||||
):
|
||||
self.num_layers = num_layers
|
||||
self.vocab_size = vocab_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_attention_heads = num_attention_heads
|
||||
self.max_sequence_length = max_sequence_length
|
||||
self.layernorm_epsilon = layernorm_epsilon
|
||||
self.inner_hidden_size = inner_hidden_size
|
||||
self.use_cache = use_cache
|
||||
self.bos_token_id = bos_token_id
|
||||
self.eos_token_id = eos_token_id
|
||||
self.pad_token_id = pad_token_id
|
||||
self.position_encoding_2d = position_encoding_2d
|
||||
super().__init__(
|
||||
pad_token_id=pad_token_id,
|
||||
bos_token_id=bos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
**kwargs
|
||||
)
|
||||
在新工单中引用
屏蔽一个用户