@@ -77,13 +77,11 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
77
77
for part_name in self .part_names :
78
78
print (f"gguf: loading model part '{ part_name } '" )
79
79
ctx : ContextManager [Any ]
80
-
81
80
if self .is_safetensors :
82
81
from safetensors import safe_open
83
82
ctx = cast (ContextManager [Any ], safe_open (self .dir_model / part_name , framework = "pt" , device = "cpu" ))
84
83
else :
85
- ctx = contextlib .nullcontext (
86
- torch .load (str (self .dir_model / part_name ), map_location = "cpu" , mmap = True , weights_only = True ))
84
+ ctx = contextlib .nullcontext (torch .load (str (self .dir_model / part_name ), map_location = "cpu" , mmap = True , weights_only = True ))
87
85
88
86
with ctx as model_part :
89
87
for name in model_part .keys ():
@@ -120,8 +118,7 @@ def set_gguf_parameters(self):
120
118
if (f_rms_eps := self .hparams .get ("rms_norm_eps" )) is not None :
121
119
self .gguf_writer .add_layer_norm_rms_eps (f_rms_eps )
122
120
print (f"gguf: rms norm epsilon = { f_rms_eps } " )
123
- if (f_norm_eps := self .find_hparam (["layer_norm_eps" , "layer_norm_epsilon" , "norm_epsilon" ],
124
- optional = True )) is not None :
121
+ if (f_norm_eps := self .find_hparam (["layer_norm_eps" , "layer_norm_epsilon" , "norm_epsilon" ], optional = True )) is not None :
125
122
self .gguf_writer .add_layer_norm_eps (f_norm_eps )
126
123
print (f"gguf: layer norm epsilon = { f_norm_eps } " )
127
124
if (n_experts := self .hparams .get ("num_local_experts" )) is not None :
@@ -209,7 +206,6 @@ def func(modelcls: type[Model]):
209
206
for name in names :
210
207
cls ._model_classes [name ] = modelcls
211
208
return modelcls
212
-
213
209
return func
214
210
215
211
@classmethod
@@ -294,7 +290,7 @@ def _set_vocab_qwen(self):
294
290
295
291
# for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined
296
292
added_vocab = tokenizer .special_tokens
297
- reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in (vocab | added_vocab ).items ()}
293
+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in (vocab | added_vocab ).items ()}
298
294
299
295
for i in range (vocab_size ):
300
296
if i not in reverse_vocab :
@@ -779,8 +775,8 @@ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | Non
779
775
780
776
return (
781
777
weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
782
- .swapaxes (1 , 2 )
783
- .reshape (weights .shape )
778
+ .swapaxes (1 , 2 )
779
+ .reshape (weights .shape )
784
780
)
785
781
786
782
def _reverse_hf_permute_part (
@@ -931,8 +927,8 @@ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | Non
931
927
932
928
return (
933
929
weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
934
- .swapaxes (1 , 2 )
935
- .reshape (weights .shape )
930
+ .swapaxes (1 , 2 )
931
+ .reshape (weights .shape )
936
932
)
937
933
938
934
@@ -1209,8 +1205,7 @@ def set_gguf_parameters(self):
1209
1205
self .gguf_writer .add_block_count (block_count )
1210
1206
self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
1211
1207
rotary_factor = self .find_hparam (["partial_rotary_factor" , "rope_pct" ])
1212
- self .gguf_writer .add_rope_dimension_count (
1213
- int (rotary_factor * (hparams ["hidden_size" ] // hparams ["num_attention_heads" ])))
1208
+ self .gguf_writer .add_rope_dimension_count (int (rotary_factor * (hparams ["hidden_size" ] // hparams ["num_attention_heads" ])))
1214
1209
self .gguf_writer .add_head_count (hparams ["num_attention_heads" ])
1215
1210
self .gguf_writer .add_head_count_kv (hparams ["num_key_value_heads" ])
1216
1211
self .gguf_writer .add_parallel_residual (hparams ["use_parallel_residual" ] if "use_parallel_residual" in hparams else True )
@@ -1304,7 +1299,7 @@ class LlamaModel(Model):
1304
1299
1305
1300
def set_vocab (self ):
1306
1301
try :
1307
- self ._set_vocab_sentencepiece ()
1302
+ self . _set_vocab_sentencepiece ()
1308
1303
except FileNotFoundError :
1309
1304
try :
1310
1305
self ._set_vocab_llama_hf ()
@@ -1653,8 +1648,8 @@ def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | Non
1653
1648
1654
1649
return (
1655
1650
weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
1656
- .swapaxes (1 , 2 )
1657
- .reshape (weights .shape )
1651
+ .swapaxes (1 , 2 )
1652
+ .reshape (weights .shape )
1658
1653
)
1659
1654
1660
1655
def write_tensors (self ):
@@ -1914,8 +1909,7 @@ def write_tensors(self):
1914
1909
1915
1910
for name , data_torch in self .get_tensors ():
1916
1911
# we don't need these
1917
- if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".attention.rotary_emb.inv_freq" ,
1918
- ".attn.bias" , ".attn.masked_bias" )):
1912
+ if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".attention.rotary_emb.inv_freq" , ".attn.bias" , ".attn.masked_bias" )):
1919
1913
continue
1920
1914
1921
1915
if name .endswith ((".c_attn.weight" , ".c_proj.weight" , ".c_fc.weight" , ".c_proj.weight" )):
@@ -2300,8 +2294,7 @@ def write_tensors(self):
2300
2294
bid = re .findall (qkv_pattern , name )[0 ]
2301
2295
qkv = data_torch
2302
2296
qkv = rearrange (qkv .T , " o (g n i) ->o g n i" , g = num_groups , n = q_per_kv + 2 , i = head_dim )
2303
- q , k , v = qkv [..., : q_per_kv , :], qkv [..., q_per_kv : q_per_kv + 1 , :], qkv [...,
2304
- q_per_kv + 1 : q_per_kv + 2 , :]
2297
+ q , k , v = qkv [..., : q_per_kv , :], qkv [..., q_per_kv : q_per_kv + 1 , :], qkv [..., q_per_kv + 1 : q_per_kv + 2 , :]
2305
2298
# The model weights of q and k equire additional reshape.
2306
2299
q = self ._hf_permute_qk (rearrange (q , " o g n i -> o (g n i)" ).T , num_heads , num_heads )
2307
2300
k = self ._hf_permute_qk (rearrange (k , " o g n i -> o (g n i)" ).T , num_heads , num_kv_heads )
@@ -2384,7 +2377,6 @@ def write_tensors(self):
2384
2377
2385
2378
# map tensor names
2386
2379
new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
2387
-
2388
2380
if new_name is None :
2389
2381
print (f"Can not map tensor { name !r} " )
2390
2382
sys .exit ()
@@ -2441,31 +2433,6 @@ def set_gguf_parameters(self):
2441
2433
self .gguf_writer .add_rope_freq_base (self .hparams ["rotary_emb_base" ])
2442
2434
2443
2435
2444
- @Model .register ("JinaBertModel" )
2445
- class JinaBertModel (BertModel ):
2446
- model_arch = gguf .MODEL_ARCH .JINA_BERT
2447
-
2448
- def __init__ (self , * args , ** kwargs ):
2449
- super ().__init__ (* args , ** kwargs )
2450
- self .intermediate_size = self .hparams ["intermediate_size" ]
2451
-
2452
- def get_tensors (self ):
2453
- for name , data in super ().get_tensors ():
2454
- if 'gated_layers' in name :
2455
- d1 = data [:self .intermediate_size , :]
2456
- name1 = name .replace ('gated_layers' , 'gated_layers_w' )
2457
- d2 = data [self .intermediate_size :, :]
2458
- name2 = name .replace ('gated_layers' , 'gated_layers_v' )
2459
- yield name1 , d1
2460
- yield name2 , d2
2461
- continue
2462
-
2463
- yield name , data
2464
-
2465
-
2466
- JinaBertForMaskedML = JinaBertModel
2467
-
2468
-
2469
2436
@Model .register ("GemmaForCausalLM" )
2470
2437
class GemmaModel (Model ):
2471
2438
model_arch = gguf .MODEL_ARCH .GEMMA
@@ -2493,8 +2460,7 @@ def set_gguf_parameters(self):
2493
2460
self .gguf_writer .add_block_count (block_count )
2494
2461
self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
2495
2462
self .gguf_writer .add_head_count (hparams ["num_attention_heads" ])
2496
- self .gguf_writer .add_head_count_kv (
2497
- self .hparams ["num_key_value_heads" ] if "num_key_value_heads" in hparams else hparams ["num_attention_heads" ])
2463
+ self .gguf_writer .add_head_count_kv (self .hparams ["num_key_value_heads" ] if "num_key_value_heads" in hparams else hparams ["num_attention_heads" ])
2498
2464
self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
2499
2465
self .gguf_writer .add_key_length (hparams ["head_dim" ])
2500
2466
self .gguf_writer .add_value_length (hparams ["head_dim" ])
@@ -2604,10 +2570,10 @@ def set_gguf_parameters(self):
2604
2570
assert d_inner == 2 * d_model
2605
2571
2606
2572
self .gguf_writer .add_name (self .dir_model .name )
2607
- self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
2573
+ self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
2608
2574
self .gguf_writer .add_embedding_length (d_model )
2609
- self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
2610
- self .gguf_writer .add_head_count (0 ) # unused, but seemingly required when loading
2575
+ self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
2576
+ self .gguf_writer .add_head_count (0 ) # unused, but seemingly required when loading
2611
2577
self .gguf_writer .add_block_count (self .hparams ["n_layer" ])
2612
2578
self .gguf_writer .add_ssm_conv_kernel (d_conv )
2613
2579
self .gguf_writer .add_ssm_inner_size (d_inner )
@@ -2622,7 +2588,7 @@ def write_tensors(self):
2622
2588
2623
2589
tok_embd = None
2624
2590
tok_embd_name = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .TOKEN_EMBD ] + ".weight"
2625
- output_name = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .OUTPUT ] + ".weight"
2591
+ output_name = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .OUTPUT ] + ".weight"
2626
2592
2627
2593
for name , data_torch in self .get_tensors ():
2628
2594
old_dtype = data_torch .dtype
@@ -2748,6 +2714,29 @@ def write_tensors(self):
2748
2714
2749
2715
self .gguf_writer .add_tensor (new_name , data )
2750
2716
2717
+ @Model .register ("JinaBertModel" )
2718
+ class JinaBertModel (BertModel ):
2719
+ model_arch = gguf .MODEL_ARCH .JINA_BERT
2720
+
2721
+ def __init__ (self , * args , ** kwargs ):
2722
+ super ().__init__ (* args , ** kwargs )
2723
+ self .intermediate_size = self .hparams ["intermediate_size" ]
2724
+
2725
+ def get_tensors (self ):
2726
+ for name , data in super ().get_tensors ():
2727
+ if 'gated_layers' in name :
2728
+ d1 = data [:self .intermediate_size , :]
2729
+ name1 = name .replace ('gated_layers' , 'gated_layers_w' )
2730
+ d2 = data [self .intermediate_size :, :]
2731
+ name2 = name .replace ('gated_layers' , 'gated_layers_v' )
2732
+ yield name1 , d1
2733
+ yield name2 , d2
2734
+ continue
2735
+
2736
+ yield name , data
2737
+
2738
+
2739
+ JinaBertForMaskedML = JinaBertModel
2751
2740
2752
2741
###### CONVERSION LOGIC ######
2753
2742
@@ -2816,6 +2805,7 @@ def main() -> None:
2816
2805
print (f"Loading model: { dir_model .name } " )
2817
2806
2818
2807
hparams = Model .load_hparams (dir_model )
2808
+
2819
2809
with torch .inference_mode ():
2820
2810
model_class = Model .from_model_architecture (hparams ["architectures" ][0 ])
2821
2811
model_instance = model_class (dir_model , ftype_map [args .outtype ], fname_out , args .bigendian , args .use_temp_file )
0 commit comments