File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -2742,10 +2742,10 @@ def set_gguf_parameters(self):
2742
2742
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
2743
2743
dt_rank = self .find_hparam (["time_step_rank" , "dt_rank" ], optional = True ) or - (d_model // - 16 )
2744
2744
rms_norm_eps = self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
2745
- use_b_dt_norm = False
2745
+ use_dt_b_c_norm = False
2746
2746
# For falconmamba we do apply RMS norm on B / DT and C layers
2747
2747
if self .find_hparam (["model_type" ], optional = True ) in ("falcon_mamba" ,):
2748
- use_b_dt_norm = True
2748
+ use_dt_b_c_norm = True
2749
2749
# Fail early for models which don't have a block expansion factor of 2
2750
2750
assert d_inner == 2 * d_model
2751
2751
@@ -2759,7 +2759,7 @@ def set_gguf_parameters(self):
2759
2759
self .gguf_writer .add_ssm_state_size (d_state )
2760
2760
self .gguf_writer .add_ssm_time_step_rank (dt_rank )
2761
2761
self .gguf_writer .add_layer_norm_rms_eps (rms_norm_eps )
2762
- self .gguf_writer .add_mamba_dt_b_c_rms (use_b_dt_norm ) # For classic Mamba we don't apply rms norm on B / DT layers
2762
+ self .gguf_writer .add_mamba_dt_b_c_rms (use_dt_b_c_norm ) # For classic Mamba we don't apply rms norm on B / DT layers
2763
2763
self .gguf_writer .add_file_type (self .ftype )
2764
2764
2765
2765
_tok_embd = None
You can’t perform that action at this time.
0 commit comments