Skip to content

Commit c35fc0b

Browse files
committed
convert : fix layer names
1 parent 01080a5 commit c35fc0b

File tree

3 files changed

+57
-57
lines changed

3 files changed

+57
-57
lines changed

convert-llama-h5-to-gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def count_model_parts(dir_model: str) -> int:
9595

9696
gguf_writer.add_architecture(llm_arch)
9797
gguf_writer.add_name(last_dir)
98-
gguf_writer.add_file_type( "All tensors F32" if ftype == 0 else "Most tensors F16, some F32")
98+
gguf_writer.add_file_type("All tensors F32" if ftype == 0 else "Most tensors F16, some F32")
9999
gguf_writer.add_source_hf_repo(hf_repo)
100100
gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"])
101101
gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"])

gguf-llama.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ struct gguf_file_loader {
626626
hparams.n_embd = read_u32("llama.embedding_length");
627627
hparams.n_ff = read_u32("llama.feed_forward_length");
628628
hparams.n_head = read_u32("llama.attention.head_count");
629-
hparams.n_layer = read_u32("llama.layer_count");
629+
hparams.n_layer = read_u32("llama.block_count");
630630
hparams.n_rot = read_u32("llama.rope.dimension_count");
631631
hparams.f_rms_norm_eps = read_f32("llama.attention.layer_norm_rms_epsilon");
632632

@@ -1373,7 +1373,7 @@ static void llama_model_load_internal(
13731373

13741374
ml->ggml_ctx = ctx;
13751375

1376-
model.tok_embeddings = ml->get_tensor("tok_embeddings.weight", {n_embd, n_vocab}, GGML_BACKEND_CPU);
1376+
model.tok_embeddings = ml->get_tensor("token_embd.weight", {n_embd, n_vocab}, GGML_BACKEND_CPU);
13771377

13781378
// "output" tensor
13791379
{
@@ -1394,8 +1394,8 @@ static void llama_model_load_internal(
13941394
backend_output = GGML_BACKEND_CPU;
13951395
}
13961396

1397-
model.norm = ml->get_tensor("norm.weight", {n_embd}, backend_norm);
1398-
model.output = ml->get_tensor("output.weight", {n_embd, n_vocab}, backend_output);
1397+
model.norm = ml->get_tensor("output_norm.weight", {n_embd}, backend_norm);
1398+
model.output = ml->get_tensor("output.weight", {n_embd, n_vocab}, backend_output);
13991399
if (backend_norm == GGML_BACKEND_GPU) {
14001400
vram_weights += ggml_nbytes(model.norm);
14011401
}
@@ -1413,20 +1413,20 @@ static void llama_model_load_internal(
14131413

14141414
auto & layer = model.layers[i];
14151415

1416-
std::string layers_i = "layers." + std::to_string(i);
1416+
std::string layers_i = "blk." + std::to_string(i);
14171417

1418-
layer.attention_norm = ml->get_tensor(layers_i + ".attention_norm.weight", {n_embd}, backend);
1418+
layer.attention_norm = ml->get_tensor(layers_i + ".attn_norm.weight", {n_embd}, backend);
14191419

1420-
layer.wq = ml->get_tensor(layers_i + ".attention.wq.weight", {n_embd, n_embd}, backend_split);
1421-
layer.wk = ml->get_tensor(layers_i + ".attention.wk.weight", {n_embd, n_embd_gqa}, backend_split);
1422-
layer.wv = ml->get_tensor(layers_i + ".attention.wv.weight", {n_embd, n_embd_gqa}, backend_split);
1423-
layer.wo = ml->get_tensor(layers_i + ".attention.wo.weight", {n_embd, n_embd}, backend_split);
1420+
layer.wq = ml->get_tensor(layers_i + ".attn_q.weight", {n_embd, n_embd}, backend_split);
1421+
layer.wk = ml->get_tensor(layers_i + ".attn_k.weight", {n_embd, n_embd_gqa}, backend_split);
1422+
layer.wv = ml->get_tensor(layers_i + ".attn_v.weight", {n_embd, n_embd_gqa}, backend_split);
1423+
layer.wo = ml->get_tensor(layers_i + ".attn_output.weight", {n_embd, n_embd}, backend_split);
14241424

14251425
layer.ffn_norm = ml->get_tensor(layers_i + ".ffn_norm.weight", {n_embd}, backend);
14261426

1427-
layer.w1 = ml->get_tensor(layers_i + ".feed_forward.w1.weight", {n_embd, n_ff}, backend_split);
1428-
layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}, backend_split);
1429-
layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend_split);
1427+
layer.w1 = ml->get_tensor(layers_i + ".ffn_gate.weight", {n_embd, n_ff}, backend_split);
1428+
layer.w2 = ml->get_tensor(layers_i + ".ffn_down.weight", { n_ff, n_embd}, backend_split);
1429+
layer.w3 = ml->get_tensor(layers_i + ".ffn_up.weight", {n_embd, n_ff}, backend_split);
14301430

14311431
if (backend == GGML_BACKEND_GPU) {
14321432
vram_weights +=

gguf_namemap.py

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,92 +4,92 @@ def get_tensor_namemap( n_blocks : int):
44
tensor_map = {}
55
# Token embeddings
66
mapped_to = "token_embd"
7-
tensor_map["gpt_neox.embed_in"] = mapped_to # gptneox
8-
tensor_map["transformer.wte"] = mapped_to # gpt2 mpt
7+
tensor_map["gpt_neox.embed_in"] = mapped_to # gptneox
8+
tensor_map["transformer.wte"] = mapped_to # gpt2 mpt
99
tensor_map["transformer.word_embeddings"] = mapped_to # falcon
10-
tensor_map["model.embed_tokens"] = mapped_to # llama-hf
11-
tensor_map["tok_embeddings"] = mapped_to # llama-pth
10+
tensor_map["model.embed_tokens"] = mapped_to # llama-hf
11+
tensor_map["tok_embeddings"] = mapped_to # llama-pth
1212
# Position embeddings
1313
mapped_to = "pos_embd"
1414
tensor_map["transformer.wpe"] = mapped_to # gpt2
1515
# Output norm
1616
mapped_to = "output_norm"
1717
tensor_map["gpt_neox.final_layer_norm"] = mapped_to # gptneox
18-
tensor_map["transformer.ln_f"] = mapped_to # gpt2 falcon
19-
tensor_map["transformer.norm_f"] = mapped_to # mpt
20-
tensor_map["model.norm"] = mapped_to # llama-hf
21-
tensor_map["norm"] = mapped_to # llama-pth
18+
tensor_map["transformer.ln_f"] = mapped_to # gpt2 falcon
19+
tensor_map["transformer.norm_f"] = mapped_to # mpt
20+
tensor_map["model.norm"] = mapped_to # llama-hf
21+
tensor_map["norm"] = mapped_to # llama-pth
2222
# Output
2323
mapped_to = "output"
2424
tensor_map["embed_out"] = mapped_to # gptneox
25-
tensor_map["lm_head"] = mapped_to # gpt2 mpt falcon llama-hf
26-
tensor_map["output"] = mapped_to # llama-pth
25+
tensor_map["lm_head"] = mapped_to # gpt2 mpt falcon llama-hf
26+
tensor_map["output"] = mapped_to # llama-pth
2727
# Attention and fee-forward layer blocks
2828
for i in range(0,n_blocks):
2929
# Attention norm
3030
mapped_to = "blk."+str(i)+".attn_norm"
3131
tensor_map["gpt_neox.layers."+str(i)+".input_layernorm"] = mapped_to # gptneox
32-
tensor_map["transformer.h."+str(i)+".ln_1"] = mapped_to # gpt2
33-
tensor_map["transformer.blocks."+str(i)+".norm_1"] = mapped_to # mpt
34-
tensor_map["transformer.h."+str(i)+".input_layernorm"] = mapped_to # falcon7b
35-
tensor_map["transformer.h."+str(i)+".ln_attn"] = mapped_to # falcon40b
36-
tensor_map["model.layers."+str(i)+".input_layernorm"] = mapped_to # llama-hf
37-
tensor_map["layers."+str(i)+".attention_norm"] = mapped_to # llama-pth
32+
tensor_map["transformer.h."+str(i)+".ln_1"] = mapped_to # gpt2
33+
tensor_map["transformer.blocks."+str(i)+".norm_1"] = mapped_to # mpt
34+
tensor_map["transformer.h."+str(i)+".input_layernorm"] = mapped_to # falcon7b
35+
tensor_map["transformer.h."+str(i)+".ln_attn"] = mapped_to # falcon40b
36+
tensor_map["model.layers."+str(i)+".input_layernorm"] = mapped_to # llama-hf
37+
tensor_map["layers."+str(i)+".attention_norm"] = mapped_to # llama-pth
3838
# Attention norm 2
3939
mapped_to = "blk."+str(i)+".attn_norm_2"
4040
tensor_map["transformer.h."+str(i)+".ln_mlp"] = mapped_to # falcon40b
4141
# Attention query-key-value
4242
mapped_to = "blk."+str(i)+".attn_qkv"
43-
tensor_map["gpt_neox.layers."+str(i)+".attention.query_key_value"] = mapped_to # gptneox
44-
tensor_map["transformer.h."+str(i)+".attn.c_attn"] = mapped_to # gpt2
45-
tensor_map["transformer.blocks."+str(i)+".attn.Wqkv"] = mapped_to # mpt
46-
tensor_map["transformer.h."+str(i)+".self_attention.query_key_value"] = mapped_to # falcon
43+
tensor_map["gpt_neox.layers."+str(i)+".attention.query_key_value"] = mapped_to # gptneox
44+
tensor_map["transformer.h."+str(i)+".attn.c_attn"] = mapped_to # gpt2
45+
tensor_map["transformer.blocks."+str(i)+".attn.Wqkv"] = mapped_to # mpt
46+
tensor_map["transformer.h."+str(i)+".self_attention.query_key_value"] = mapped_to # falcon
4747
# Attention query
4848
mapped_to = "blk."+str(i)+".attn_q"
4949
tensor_map["model.layers."+str(i)+".self_attn.q_proj"] = mapped_to # llama-hf
50-
tensor_map["layers."+str(i)+".attention.wq"] = mapped_to # llama-pth
50+
tensor_map["layers."+str(i)+".attention.wq"] = mapped_to # llama-pth
5151
# Attention key
5252
mapped_to = "blk."+str(i)+".attn_k"
5353
tensor_map["model.layers."+str(i)+".self_attn.k_proj"] = mapped_to # llama-hf
54-
tensor_map["layers."+str(i)+".attention.wk"] = mapped_to # llama-pth
54+
tensor_map["layers."+str(i)+".attention.wk"] = mapped_to # llama-pth
5555
# Attention value
5656
mapped_to = "blk."+str(i)+".attn_v"
5757
tensor_map["model.layers."+str(i)+".self_attn.v_proj"] = mapped_to # llama-hf
58-
tensor_map["layers."+str(i)+".attention.wv"] = mapped_to # llama-pth
58+
tensor_map["layers."+str(i)+".attention.wv"] = mapped_to # llama-pth
5959
# Attention output
6060
mapped_to = "blk."+str(i)+".attn_output"
61-
tensor_map["gpt_neox.layers."+str(i)+".attention.dense"] = mapped_to # gptneox
62-
tensor_map["transformer.h."+str(i)+".attn.c_proj"] = mapped_to # gpt2
63-
tensor_map["transformer.blocks."+str(i)+".attn.out_proj"] = mapped_to # mpt
61+
tensor_map["gpt_neox.layers."+str(i)+".attention.dense"] = mapped_to # gptneox
62+
tensor_map["transformer.h."+str(i)+".attn.c_proj"] = mapped_to # gpt2
63+
tensor_map["transformer.blocks."+str(i)+".attn.out_proj"] = mapped_to # mpt
6464
tensor_map["transformer.h."+str(i)+".self_attention.dense"] = mapped_to # falcon
65-
tensor_map["model.layers."+str(i)+".self_attn.o_proj"] = mapped_to # llama-hf
66-
tensor_map["layers."+str(i)+".attention.wo"] = mapped_to # llama-pth
65+
tensor_map["model.layers."+str(i)+".self_attn.o_proj"] = mapped_to # llama-hf
66+
tensor_map["layers."+str(i)+".attention.wo"] = mapped_to # llama-pth
6767
# Feed-forward norm
6868
mapped_to = "blk."+str(i)+".ffn_norm"
6969
tensor_map["gpt_neox.layers."+str(i)+".post_attention_layernorm"] = mapped_to # gptneox
70-
tensor_map["transformer.h."+str(i)+".ln_2"] = mapped_to # gpt2
71-
tensor_map["transformer.blocks."+str(i)+".norm_2"] = mapped_to # mpt
72-
tensor_map["model.layers."+str(i)+".post_attention_layernorm"] = mapped_to # llama-hf
73-
tensor_map["layers."+str(i)+".ffn_norm"] = mapped_to # llama-pth
70+
tensor_map["transformer.h."+str(i)+".ln_2"] = mapped_to # gpt2
71+
tensor_map["transformer.blocks."+str(i)+".norm_2"] = mapped_to # mpt
72+
tensor_map["model.layers."+str(i)+".post_attention_layernorm"] = mapped_to # llama-hf
73+
tensor_map["layers."+str(i)+".ffn_norm"] = mapped_to # llama-pth
7474
# Feed-forward up
7575
mapped_to = "blk."+str(i)+".ffn_up"
7676
tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_h_to_4h"] = mapped_to # gptneox
77-
tensor_map["transformer.h."+str(i)+".mlp.c_fc"] = mapped_to # gpt2
78-
tensor_map["transformer.blocks."+str(i)+".ffn.up_proj"] = mapped_to # mpt
79-
tensor_map["transformer.h."+str(i)+".mlp.dense_h_to_4h"] = mapped_to # falcon
80-
tensor_map["model.layers."+str(i)+".mlp.up_proj"] = mapped_to # llama-hf
81-
tensor_map["layers."+str(i)+".feed_forward.w3"] = mapped_to # llama-pth
77+
tensor_map["transformer.h."+str(i)+".mlp.c_fc"] = mapped_to # gpt2
78+
tensor_map["transformer.blocks."+str(i)+".ffn.up_proj"] = mapped_to # mpt
79+
tensor_map["transformer.h."+str(i)+".mlp.dense_h_to_4h"] = mapped_to # falcon
80+
tensor_map["model.layers."+str(i)+".mlp.up_proj"] = mapped_to # llama-hf
81+
tensor_map["layers."+str(i)+".feed_forward.w3"] = mapped_to # llama-pth
8282
# Feed-forward gate
8383
mapped_to = "blk."+str(i)+".ffn_gate"
8484
tensor_map["model.layers."+str(i)+".mlp.gate_proj"] = mapped_to # llama-hf
85-
tensor_map["layers."+str(i)+".feed_forward.w1"] = mapped_to # llama-pth
85+
tensor_map["layers."+str(i)+".feed_forward.w1"] = mapped_to # llama-pth
8686
# Feed-forward down
8787
mapped_to = "blk."+str(i)+".ffn_down"
8888
tensor_map["gpt_neox.layers."+str(i)+".mlp.dense_4h_to_h"] = mapped_to # gptneox
89-
tensor_map["transformer.h."+str(i)+".mlp.c_proj"] = mapped_to # gpt2
90-
tensor_map["transformer.blocks."+str(i)+".ffn.down_proj"] = mapped_to # mpt
91-
tensor_map["transformer.h."+str(i)+".mlp.dense_4h_to_h"] = mapped_to # falcon
92-
tensor_map["model.layers."+str(i)+".mlp.down_proj"] = mapped_to # llama-hf
93-
tensor_map["layers."+str(i)+".feed_forward.w2"] = mapped_to # llama-pth
89+
tensor_map["transformer.h."+str(i)+".mlp.c_proj"] = mapped_to # gpt2
90+
tensor_map["transformer.blocks."+str(i)+".ffn.down_proj"] = mapped_to # mpt
91+
tensor_map["transformer.h."+str(i)+".mlp.dense_4h_to_h"] = mapped_to # falcon
92+
tensor_map["model.layers."+str(i)+".mlp.down_proj"] = mapped_to # llama-hf
93+
tensor_map["layers."+str(i)+".feed_forward.w2"] = mapped_to # llama-pth
9494

9595
return tensor_map

0 commit comments

Comments
 (0)