@@ -3484,6 +3484,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
3484
3484
class Rwkv7Model (Rwkv6Model ):
3485
3485
model_arch = gguf .MODEL_ARCH .RWKV7
3486
3486
3487
+ def calc_lora_rank (self , hidden_size , exponent , multiplier ):
3488
+ return max (1 , round (hidden_size ** exponent * multiplier / 32 )) * 32
3489
+
3487
3490
def set_gguf_parameters (self ):
3488
3491
block_count = self .hparams ["num_hidden_layers" ]
3489
3492
head_size = self .hparams ["head_size" ]
@@ -3492,11 +3495,10 @@ def set_gguf_parameters(self):
3492
3495
intermediate_size = self .hparams ["intermediate_size" ] if self .hparams ["intermediate_size" ] is not None else (hidden_size * 4 )
3493
3496
3494
3497
# ICLR: In-Context-Learning-Rate
3495
- calc_lora_rank = lambda exponent , multiplier : max (1 , round (hidden_size ** exponent * multiplier / 32 )) * 32
3496
- lora_rank_decay = self .hparams ["lora_rank_decay" ] if self .hparams ["lora_rank_decay" ] is not None else calc_lora_rank (0.5 , 1.8 )
3497
- lora_rank_iclr = self .hparams ["lora_rank_iclr" ] if self .hparams ["lora_rank_iclr" ] is not None else calc_lora_rank (0.5 , 1.8 )
3498
- lora_rank_value_residual_mix = self .hparams ["lora_rank_value_residual_mix" ] if self .hparams ["lora_rank_value_residual_mix" ] is not None else calc_lora_rank (0.5 , 1.3 )
3499
- lora_rank_gate = self .hparams ["lora_rank_gate" ] if self .hparams ["lora_rank_gate" ] is not None else calc_lora_rank (0.8 , 0.6 )
3498
+ lora_rank_decay = self .hparams ["lora_rank_decay" ] if self .hparams ["lora_rank_decay" ] is not None else self .calc_lora_rank (hidden_size , 0.5 , 1.8 )
3499
+ lora_rank_iclr = self .hparams ["lora_rank_iclr" ] if self .hparams ["lora_rank_iclr" ] is not None else self .calc_lora_rank (hidden_size , 0.5 , 1.8 )
3500
+ lora_rank_value_residual_mix = self .hparams ["lora_rank_value_residual_mix" ] if self .hparams ["lora_rank_value_residual_mix" ] is not None else self .calc_lora_rank (hidden_size , 0.5 , 1.3 )
3501
+ lora_rank_gate = self .hparams ["lora_rank_gate" ] if self .hparams ["lora_rank_gate" ] is not None else self .calc_lora_rank (hidden_size , 0.8 , 0.6 )
3500
3502
3501
3503
# RWKV isn't context limited
3502
3504
self .gguf_writer .add_context_length (1048576 )
0 commit comments