Skip to content

Commit f9db574

Browse files
committed
convert-new.py : minor fixes
1 parent c40ec5c commit f9db574

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

convert-new.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def guessed(model: 'LazyModel') -> 'Params':
142142
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
143143

144144
n_head = n_embd // 128 # guessed
145-
n_mult = 255 # guessed
145+
n_mult = 256 # guessed
146146

147147
# TODO: verify this
148148
n_ff = int(2 * (4 * n_embd) / 3)
@@ -151,7 +151,7 @@ def guessed(model: 'LazyModel') -> 'Params':
151151
return Params(
152152
n_vocab = n_vocab,
153153
n_embd = n_embd,
154-
n_mult = 256,
154+
n_mult = n_mult,
155155
n_layer = n_layer,
156156
n_ctx = -1,
157157
n_ff = n_ff,
@@ -174,10 +174,10 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
174174

175175
n_mult = find_n_mult(n_ff, n_embd);
176176

177-
if "max_sequence_length" in hparams:
178-
n_ctx = hparams["max_sequence_length"]
179-
elif "max_position_embeddings" in hparams:
180-
n_ctx = hparams["max_position_embeddings"]
177+
if "max_sequence_length" in config:
178+
n_ctx = config["max_sequence_length"]
179+
elif "max_position_embeddings" in config:
180+
n_ctx = config["max_position_embeddings"]
181181
else:
182182
raise Exception("failed to guess 'n_ctx'. This model is unknown or unsupported.\n"
183183
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
@@ -541,7 +541,7 @@ def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
541541

542542
for i in itertools.count():
543543
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
544-
out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
544+
out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head_kv)
545545
out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
546546
out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
547547
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
@@ -1011,7 +1011,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
10111011
vocab = load_vocab(vocab_dir, args.vocabtype)
10121012

10131013
model = model_plus.model
1014-
model = do_necessary_conversions(model, params)
1014+
model = do_necessary_conversions(model, params) # TODO: utilize gguf.get_tensor_name_map
10151015
output_type = pick_output_type(model, args.outtype)
10161016
model = convert_to_output_type(model, output_type)
10171017
outfile = args.outfile or default_outfile(model_plus.paths, output_type)

0 commit comments

Comments
 (0)