@@ -81,13 +81,13 @@ def set_gguf_parameters(self):
81
81
self .gguf_writer .add_name (self .dir_model .name )
82
82
self .gguf_writer .add_block_count (self .block_count )
83
83
84
- if (n_ctx := self .hparams . get ( "max_position_embeddings" )) is not None :
84
+ if (n_ctx := self .find_hparam ([ "max_position_embeddings" , "n_ctx" ], optional = True )) is not None :
85
85
self .gguf_writer .add_context_length (n_ctx )
86
86
87
87
n_embd = self .find_hparam (["hidden_size" , "n_embd" ])
88
88
self .gguf_writer .add_embedding_length (n_embd )
89
89
90
- if (n_ff := self .find_hparam (["intermediate_size" , "n_inner" ])) is not None :
90
+ if (n_ff := self .find_hparam (["intermediate_size" , "n_inner" ], optional = True )) is not None :
91
91
self .gguf_writer .add_feed_forward_length (n_ff )
92
92
93
93
n_head = self .find_hparam (["num_attention_heads" , "n_head" ])
@@ -98,7 +98,7 @@ def set_gguf_parameters(self):
98
98
99
99
if (f_rms_eps := self .hparams .get ("rms_norm_eps" )) is not None :
100
100
self .gguf_writer .add_layer_norm_rms_eps (f_rms_eps )
101
- if (f_norm_eps := self .hparams . get ( "layer_norm_eps" )) is not None :
101
+ if (f_norm_eps := self .find_hparam ([ "layer_norm_eps" , "layer_norm_epsilon" ], optional = True )) is not None :
102
102
self .gguf_writer .add_layer_norm_eps (f_norm_eps )
103
103
if (n_experts := self .hparams .get ("num_local_experts" )) is not None :
104
104
self .gguf_writer .add_expert_count (n_experts )
@@ -1750,9 +1750,7 @@ def __init__(self, *args, **kwargs):
1750
1750
1751
1751
def set_gguf_parameters (self ):
1752
1752
super ().set_gguf_parameters ()
1753
- self .gguf_writer .add_causal_attention (self .hparams ["causal" ])
1754
1753
self .gguf_writer .add_rope_freq_base (self .hparams ["rotary_emb_base" ])
1755
- self .gguf_writer .add_pooling_layer (True )
1756
1754
1757
1755
def get_tensors (self ):
1758
1756
assert self .vocab_size is not None
0 commit comments