@@ -99,7 +99,7 @@ def parse_args() -> argparse.Namespace:
99
99
with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
100
100
hparams = json .load (f )
101
101
102
- if hparams ["architectures" ][0 ] != "RWForCausalLM " :
102
+ if hparams ["architectures" ][0 ] != "FalconForCausalLM " :
103
103
print ("Model architecture not supported: " + hparams ["architectures" ][0 ])
104
104
105
105
sys .exit (1 )
@@ -112,15 +112,15 @@ def parse_args() -> argparse.Namespace:
112
112
113
113
print ("gguf: get model metadata" )
114
114
115
- block_count = hparams ["n_layer " ]
115
+ block_count = hparams ["num_hidden_layers " ]
116
116
117
117
gguf_writer .add_name ("Falcon" )
118
118
gguf_writer .add_context_length (2048 ) # not in config.json
119
119
gguf_writer .add_tensor_data_layout ("jploski" ) # qkv tensor transform
120
120
gguf_writer .add_embedding_length (hparams ["hidden_size" ])
121
121
gguf_writer .add_feed_forward_length (4 * hparams ["hidden_size" ])
122
122
gguf_writer .add_block_count (block_count )
123
- gguf_writer .add_head_count (hparams ["n_head " ])
123
+ gguf_writer .add_head_count (hparams ["num_attention_heads " ])
124
124
if "n_head_kv" in hparams :
125
125
gguf_writer .add_head_count_kv (hparams ["n_head_kv" ])
126
126
else :
@@ -179,7 +179,7 @@ def parse_args() -> argparse.Namespace:
179
179
tensor_map = gguf .get_tensor_name_map (ARCH ,block_count )
180
180
181
181
# params for qkv transform
182
- n_head = hparams ["n_head " ]
182
+ n_head = hparams ["num_attention_heads " ]
183
183
n_head_kv = hparams ["n_head_kv" ] if "n_head_kv" in hparams else 1
184
184
185
185
head_dim = hparams ["hidden_size" ] // n_head
0 commit comments