@@ -1514,35 +1514,6 @@ def set_gguf_parameters(self):
1514
1514
if self .hparams .get ("vocab_size" , 32000 ) == 49152 :
1515
1515
self .gguf_writer .add_add_bos_token (False )
1516
1516
1517
- if rope_scaling := self .find_hparam (["rope_scaling" ], optional = True ):
1518
- if rope_scaling .get ("rope_type" , '' ).lower () == "llama3" :
1519
- base = hparams .get ("rope_theta" , 10000.0 )
1520
- dim = int ((hparams ["hidden_size" ] // hparams ["num_attention_heads" ]) * hparams .get ("partial_rotary_embeddings" , 1.0 ))
1521
- freqs = 1.0 / (base ** (torch .arange (0 , dim , 2 , dtype = torch .float32 ) / dim ))
1522
-
1523
- factor = rope_scaling .get ("factor" , 8.0 )
1524
- low_freq_factor = rope_scaling .get ("low_freq_factor" , 1.0 )
1525
- high_freq_factor = rope_scaling .get ("high_freq_factor" , 4.0 )
1526
- old_context_len = hparams .get ("original_max_position_embeddings" , 8192 )
1527
-
1528
- low_freq_wavelen = old_context_len / low_freq_factor
1529
- high_freq_wavelen = old_context_len / high_freq_factor
1530
-
1531
- rope_factors = []
1532
- for freq in freqs :
1533
- wavelen = 2 * math .pi / freq
1534
- if wavelen < high_freq_wavelen :
1535
- rope_factors .append (1 )
1536
- elif wavelen > low_freq_wavelen :
1537
- rope_factors .append (factor )
1538
- else :
1539
- assert low_freq_wavelen != high_freq_wavelen
1540
- smooth = (old_context_len / wavelen - low_freq_factor ) / (high_freq_factor - low_freq_factor )
1541
- rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
1542
-
1543
- self .gguf_writer .add_rope_scaling_attn_factors (1.0 )
1544
- self .gguf_writer .add_tensor (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), np .array (rope_factors , dtype = np .float32 ))
1545
-
1546
1517
@staticmethod
1547
1518
def permute (weights : Tensor , n_head : int , n_head_kv : int | None ):
1548
1519
if n_head_kv is not None and n_head != n_head_kv :
@@ -1599,6 +1570,34 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1599
1570
return [(self .map_tensor_name (name ), data_torch )]
1600
1571
1601
1572
def prepare_tensors (self ):
1573
+ if rope_scaling := self .find_hparam (["rope_scaling" ], optional = True ):
1574
+ if rope_scaling .get ("rope_type" , '' ).lower () == "llama3" :
1575
+ base = self .hparams .get ("rope_theta" , 10000.0 )
1576
+ dim = int ((self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]) * self .hparams .get ("partial_rotary_embeddings" , 1.0 ))
1577
+ freqs = 1.0 / (base ** (torch .arange (0 , dim , 2 , dtype = torch .float32 ) / dim ))
1578
+
1579
+ factor = rope_scaling .get ("factor" , 8.0 )
1580
+ low_freq_factor = rope_scaling .get ("low_freq_factor" , 1.0 )
1581
+ high_freq_factor = rope_scaling .get ("high_freq_factor" , 4.0 )
1582
+ old_context_len = self .hparams .get ("original_max_position_embeddings" , 8192 )
1583
+
1584
+ low_freq_wavelen = old_context_len / low_freq_factor
1585
+ high_freq_wavelen = old_context_len / high_freq_factor
1586
+
1587
+ rope_factors = []
1588
+ for freq in freqs :
1589
+ wavelen = 2 * math .pi / freq
1590
+ if wavelen < high_freq_wavelen :
1591
+ rope_factors .append (1 )
1592
+ elif wavelen > low_freq_wavelen :
1593
+ rope_factors .append (factor )
1594
+ else :
1595
+ assert low_freq_wavelen != high_freq_wavelen
1596
+ smooth = (old_context_len / wavelen - low_freq_factor ) / (high_freq_factor - low_freq_factor )
1597
+ rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
1598
+
1599
+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FREQS ] + ".weight" , np .array (rope_factors , dtype = np .float32 ))
1600
+
1602
1601
super ().prepare_tensors ()
1603
1602
1604
1603
if self ._experts is not None :
0 commit comments