@@ -2711,7 +2711,7 @@ class StarCoder2Model(Model):
2711
2711
model_arch = gguf .MODEL_ARCH .STARCODER2
2712
2712
2713
2713
2714
- @Model .register ("MambaForCausalLM" , "MambaLMHeadModel" )
2714
+ @Model .register ("MambaForCausalLM" , "MambaLMHeadModel" , "FalconMambaForCausalLM" )
2715
2715
class MambaModel (Model ):
2716
2716
model_arch = gguf .MODEL_ARCH .MAMBA
2717
2717
@@ -2731,7 +2731,7 @@ def set_vocab(self):
2731
2731
else :
2732
2732
# Use the GPT-NeoX tokenizer when no tokenizer files are present
2733
2733
self ._set_vocab_builtin ("gpt-neox" , vocab_size )
2734
-
2734
+
2735
2735
def set_gguf_parameters (self ):
2736
2736
d_model = self .find_hparam (["hidden_size" , "d_model" ])
2737
2737
d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
@@ -2742,21 +2742,25 @@ def set_gguf_parameters(self):
2742
2742
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
2743
2743
dt_rank = self .find_hparam (["time_step_rank" , "dt_rank" ], optional = True ) or - (d_model // - 16 )
2744
2744
rms_norm_eps = self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
2745
-
2745
+ num_hidden_layers = self .find_hparam (["n_layer" , "num_hidden_layers" ])
2746
+ use_b_dt_norm = False
2747
+ # For falconmamba we do apply RMS norm on B / DT and C layers
2748
+ if self .find_hparam (["model_type" ]) in ["falcon_mamba" ]:
2749
+ use_b_dt_norm = True
2746
2750
# Fail early for models which don't have a block expansion factor of 2
2747
2751
assert d_inner == 2 * d_model
2748
2752
2749
2753
self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
2750
2754
self .gguf_writer .add_embedding_length (d_model )
2751
2755
self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
2752
2756
self .gguf_writer .add_head_count (0 ) # unused, but seemingly required when loading
2753
- self .gguf_writer .add_block_count (self . hparams [ "n_layer" ] )
2757
+ self .gguf_writer .add_block_count (num_hidden_layers )
2754
2758
self .gguf_writer .add_ssm_conv_kernel (d_conv )
2755
2759
self .gguf_writer .add_ssm_inner_size (d_inner )
2756
2760
self .gguf_writer .add_ssm_state_size (d_state )
2757
2761
self .gguf_writer .add_ssm_time_step_rank (dt_rank )
2758
2762
self .gguf_writer .add_layer_norm_rms_eps (rms_norm_eps )
2759
- self .gguf_writer .add_mamba_b_dt_rms (False ) # For classic Mamba we don't apply rms norm on B / DT layers
2763
+ self .gguf_writer .add_mamba_b_dt_rms (use_b_dt_norm ) # For classic Mamba we don't apply rms norm on B / DT layers
2760
2764
self .gguf_writer .add_file_type (self .ftype )
2761
2765
2762
2766
_tok_embd = None
@@ -3855,43 +3859,10 @@ def prepare_tensors(self):
3855
3859
self .gguf_writer .add_tensor (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), np .array (rope_factors , dtype = np .float32 ))
3856
3860
3857
3861
super ().prepare_tensors ()
3858
-
3859
-
3860
- @Model .register ("FalconMambaForCausalLM" )
3861
- class FalconMambaModel (MambaModel ):
3862
- model_arch = gguf .MODEL_ARCH .MAMBA
3863
-
3864
- def set_gguf_parameters (self ):
3865
- d_model = self .find_hparam (["hidden_size" , "d_model" ])
3866
- d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
3867
- d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * d_model
3868
- d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 16
3869
- # ceiling division
3870
- # ref: https://stackoverflow.com/a/17511341/22827863
3871
- # ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
3872
- dt_rank = self .find_hparam (["time_step_rank" , "dt_rank" ], optional = True ) or - (d_model // - 16 )
3873
- rms_norm_eps = self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
3874
-
3875
- # Fail early for models which don't have a block expansion factor of 2
3876
- assert d_inner == 2 * d_model
3877
-
3878
- self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
3879
- self .gguf_writer .add_embedding_length (d_model )
3880
- self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
3881
- self .gguf_writer .add_head_count (0 ) # unused, but seemingly required when loading
3882
- self .gguf_writer .add_block_count (self .hparams ["num_hidden_layers" ])
3883
- self .gguf_writer .add_ssm_conv_kernel (d_conv )
3884
- self .gguf_writer .add_mamba_b_dt_rms (True ) # For FalconMamba we do apply rms norm on B / DT layers
3885
- self .gguf_writer .add_ssm_inner_size (d_inner )
3886
- self .gguf_writer .add_ssm_state_size (d_state )
3887
- self .gguf_writer .add_ssm_time_step_rank (dt_rank )
3888
- self .gguf_writer .add_layer_norm_rms_eps (rms_norm_eps )
3889
- self .gguf_writer .add_file_type (self .ftype )
3890
-
3862
+
3891
3863
3892
3864
###### CONVERSION LOGIC ######
3893
3865
3894
-
3895
3866
# tree of lazy tensors
3896
3867
class LazyTorchTensor (gguf .LazyBase ):
3897
3868
_tensor_type = torch .Tensor
0 commit comments