@@ -142,7 +142,7 @@ def guessed(model: 'LazyModel') -> 'Params':
142
142
"Suggestion: provide 'config.json' of the model in the same directory containing model files." )
143
143
144
144
n_head = n_embd // 128 # guessed
145
- n_mult = 255 # guessed
145
+ n_mult = 256 # guessed
146
146
147
147
# TODO: verify this
148
148
n_ff = int (2 * (4 * n_embd ) / 3 )
@@ -151,7 +151,7 @@ def guessed(model: 'LazyModel') -> 'Params':
151
151
return Params (
152
152
n_vocab = n_vocab ,
153
153
n_embd = n_embd ,
154
- n_mult = 256 ,
154
+ n_mult = n_mult ,
155
155
n_layer = n_layer ,
156
156
n_ctx = - 1 ,
157
157
n_ff = n_ff ,
@@ -174,10 +174,10 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
174
174
175
175
n_mult = find_n_mult (n_ff , n_embd );
176
176
177
- if "max_sequence_length" in hparams :
178
- n_ctx = hparams ["max_sequence_length" ]
179
- elif "max_position_embeddings" in hparams :
180
- n_ctx = hparams ["max_position_embeddings" ]
177
+ if "max_sequence_length" in config :
178
+ n_ctx = config ["max_sequence_length" ]
179
+ elif "max_position_embeddings" in config :
180
+ n_ctx = config ["max_position_embeddings" ]
181
181
else :
182
182
raise Exception ("failed to guess 'n_ctx'. This model is unknown or unsupported.\n "
183
183
"Suggestion: provide 'config.json' of the model in the same directory containing model files." )
@@ -541,7 +541,7 @@ def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
541
541
542
542
for i in itertools .count ():
543
543
if f"model.layers.{ i } .self_attn.q_proj.weight" in model :
544
- out [f"layers.{ i } .attention.wq.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.q_proj.weight" ], params .n_head )
544
+ out [f"layers.{ i } .attention.wq.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.q_proj.weight" ], params .n_head , params . n_head_kv )
545
545
out [f"layers.{ i } .attention.wk.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.k_proj.weight" ], params .n_head , params .n_head_kv )
546
546
out [f"layers.{ i } .attention.wv.weight" ] = model [f"model.layers.{ i } .self_attn.v_proj.weight" ]
547
547
elif f"model.layers.{ i } .self_attn.W_pack.weight" in model :
@@ -1011,7 +1011,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
1011
1011
vocab = load_vocab (vocab_dir , args .vocabtype )
1012
1012
1013
1013
model = model_plus .model
1014
- model = do_necessary_conversions (model , params )
1014
+ model = do_necessary_conversions (model , params ) # TODO: utilize gguf.get_tensor_name_map
1015
1015
output_type = pick_output_type (model , args .outtype )
1016
1016
model = convert_to_output_type (model , output_type )
1017
1017
outfile = args .outfile or default_outfile (model_plus .paths , output_type )
0 commit comments