Skip to content

Commit 184a4c6

Browse files
committed
fix: address comments
1 parent a8109e3 commit 184a4c6

File tree

4 files changed

+12
-13
lines changed

4 files changed

+12
-13
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2741,8 +2741,7 @@ def set_gguf_parameters(self):
27412741
# ref: https://stackoverflow.com/a/17511341/22827863
27422742
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
27432743
dt_rank = self.find_hparam(["time_step_rank", "dt_rank"], optional=True) or -(d_model // -16)
2744-
rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
2745-
num_hidden_layers = self.find_hparam(["n_layer", "num_hidden_layers"])
2744+
rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
27462745
use_b_dt_norm = False
27472746
# For falconmamba we do apply RMS norm on B / DT and C layers
27482747
if self.find_hparam(["model_type"]) in ["falcon_mamba"]:
@@ -2754,13 +2753,13 @@ def set_gguf_parameters(self):
27542753
self.gguf_writer.add_embedding_length(d_model)
27552754
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
27562755
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
2757-
self.gguf_writer.add_block_count(num_hidden_layers)
2756+
self.gguf_writer.add_block_count(self.block_count)
27582757
self.gguf_writer.add_ssm_conv_kernel(d_conv)
27592758
self.gguf_writer.add_ssm_inner_size(d_inner)
27602759
self.gguf_writer.add_ssm_state_size(d_state)
27612760
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
27622761
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
2763-
self.gguf_writer.add_mamba_b_dt_rms(use_b_dt_norm) # For classic Mamba we don't apply rms norm on B / DT layers
2762+
self.gguf_writer.add_mamba_dt_b_c_rms(use_b_dt_norm) # For classic Mamba we don't apply rms norm on B / DT layers
27642763
self.gguf_writer.add_file_type(self.ftype)
27652764

27662765
_tok_embd = None

gguf-py/gguf/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class SSM:
130130
INNER_SIZE = "{arch}.ssm.inner_size"
131131
STATE_SIZE = "{arch}.ssm.state_size"
132132
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
133-
B_DT_RMS = "{arch}.ssm.b_dt_rms"
133+
DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
134134

135135
class Tokenizer:
136136
MODEL = "tokenizer.ggml.model"
@@ -1373,7 +1373,7 @@ def get_type(val: Any) -> GGUFValueType:
13731373
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
13741374
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
13751375
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
1376-
KEY_SSM_B_DT_RMS = Keys.SSM.B_DT_RMS
1376+
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
13771377

13781378
# tokenization
13791379
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL

gguf-py/gguf/gguf_writer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -715,8 +715,8 @@ def add_rope_scaling_orig_ctx_len(self, value: int) -> None:
715715
def add_rope_scaling_finetuned(self, value: bool) -> None:
716716
self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value)
717717

718-
def add_mamba_b_dt_rms(self, value: bool) -> None:
719-
self.add_bool(Keys.SSM.B_DT_RMS.format(arch=self.arch), value)
718+
def add_mamba_dt_b_c_rms(self, value: bool) -> None:
719+
self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
720720

721721
def add_rope_scaling_yarn_log_mul(self, value: float) -> None:
722722
self.add_float32(Keys.Rope.SCALING_YARN_LOG_MUL.format(arch=self.arch), value)

src/llama.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ enum llm_kv {
328328
LLM_KV_SSM_CONV_KERNEL,
329329
LLM_KV_SSM_STATE_SIZE,
330330
LLM_KV_SSM_TIME_STEP_RANK,
331-
LLM_KV_SSM_B_DT_RMS,
331+
LLM_KV_SSM_DT_B_C_RMS,
332332

333333
LLM_KV_TOKENIZER_MODEL,
334334
LLM_KV_TOKENIZER_PRE,
@@ -2239,7 +2239,7 @@ struct llama_hparams {
22392239
uint32_t ssm_d_inner = 0;
22402240
uint32_t ssm_d_state = 0;
22412241
uint32_t ssm_dt_rank = 0;
2242-
bool ssm_b_dt_rms = false;
2242+
bool ssm_dt_b_c_rms = false;
22432243

22442244
float f_clamp_kqv = 0.0f;
22452245
float f_max_alibi_bias = 0.0f;
@@ -5055,7 +5055,7 @@ static void llm_load_hparams(
50555055
ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner);
50565056
ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state);
50575057
ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank);
5058-
ml.get_key(LLM_KV_SSM_B_DT_RMS, hparams.ssm_b_dt_rms, false);
5058+
ml.get_key(LLM_KV_SSM_DT_B_C_RMS, hparams.ssm_dt_b_c_rms, false);
50595059

50605060
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
50615061

@@ -12166,7 +12166,7 @@ struct llm_build_context {
1216612166
const int64_t d_state = hparams.ssm_d_state;
1216712167
const int64_t dt_rank = hparams.ssm_dt_rank;
1216812168
// Some variants of Mamba arch (e.g. FalconMamba do apply layer norm on B and Dt layers)
12169-
const bool ssm_b_dt_rms = hparams.ssm_b_dt_rms;
12169+
const bool ssm_dt_b_c_rms = hparams.ssm_dt_b_c_rms;
1217012170
// Use the same RMS norm as the final layer norm
1217112171
const float norm_rms_eps = hparams.f_norm_rms_eps;
1217212172

@@ -12250,7 +12250,7 @@ struct llm_build_context {
1225012250
struct ggml_tensor * C = ggml_view_2d(ctx0, x_db, d_state, n_tokens, x_db->nb[1], ggml_element_size(x_db)*(dt_rank+d_state));
1225112251

1225212252
// Some Mamba variants (e.g. FalconMamba) apply RMS norm in B, C & Dt layers
12253-
if (ssm_b_dt_rms) {
12253+
if (ssm_dt_b_c_rms) {
1225412254
dt = ggml_rms_norm(ctx0, dt, norm_rms_eps);
1225512255
B = ggml_rms_norm(ctx0, B, norm_rms_eps);
1225612256
C = ggml_rms_norm(ctx0, C, norm_rms_eps);

0 commit comments

Comments
 (0)