@@ -3555,6 +3555,84 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
3555
3555
yield (new_name , data_torch )
3556
3556
3557
3557
3558
+ @Model .register ("RwkvHybridForCausalLM" )
3559
+ class ARwkv7Model (Model ):
3560
+ model_arch = gguf .MODEL_ARCH .ARWKV7
3561
+
3562
+ def set_vocab (self ):
3563
+ try :
3564
+ self ._set_vocab_sentencepiece ()
3565
+ except FileNotFoundError :
3566
+ self ._set_vocab_gpt2 ()
3567
+
3568
+ def set_gguf_parameters (self ):
3569
+ block_count = self .hparams ["num_hidden_layers" ]
3570
+ hidden_size = self .hparams ["hidden_size" ]
3571
+ head_size = self .hparams ["head_size" ]
3572
+ rms_norm_eps = self .hparams ["rms_norm_eps" ]
3573
+ intermediate_size = self .hparams ["intermediate_size" ]
3574
+ wkv_has_gate = self .hparams ["wkv_has_gate" ]
3575
+ assert self .hparams ["wkv_version" ] == 7
3576
+
3577
+ # ICLR: In-Context-Learning-Rate
3578
+ lora_rank_decay = 64
3579
+ lora_rank_iclr = 64
3580
+ lora_rank_value_residual_mix = 32
3581
+ lora_rank_gate = 128 if wkv_has_gate else 0
3582
+
3583
+ # RWKV isn't context limited
3584
+ self .gguf_writer .add_context_length (1048576 )
3585
+ self .gguf_writer .add_embedding_length (hidden_size )
3586
+ self .gguf_writer .add_block_count (block_count )
3587
+ self .gguf_writer .add_layer_norm_rms_eps (rms_norm_eps )
3588
+ self .gguf_writer .add_wkv_head_size (head_size )
3589
+ self .gguf_writer .add_decay_lora_rank (lora_rank_decay )
3590
+ self .gguf_writer .add_iclr_lora_rank (lora_rank_iclr )
3591
+ self .gguf_writer .add_value_residual_mix_lora_rank (lora_rank_value_residual_mix )
3592
+ self .gguf_writer .add_gate_lora_rank (lora_rank_gate )
3593
+ self .gguf_writer .add_feed_forward_length (intermediate_size )
3594
+ self .gguf_writer .add_file_type (self .ftype )
3595
+ self .gguf_writer .add_token_shift_count (1 )
3596
+
3597
+ # required by llama.cpp, unused
3598
+ self .gguf_writer .add_head_count (0 )
3599
+
3600
+ lerp_weights : dict [int , dict [str , Tensor ]] = {}
3601
+
3602
+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3603
+ if bid is not None and "self_attn.time_mixer.x_" in name :
3604
+ try :
3605
+ self .lerp_weights [bid ][name ] = data_torch
3606
+ except KeyError :
3607
+ self .lerp_weights [bid ] = {name : data_torch }
3608
+ if all (f"model.layers.{ bid } .self_attn.time_mixer.x_{ i } " in self .lerp_weights [bid ].keys () for i in ["r" , "w" , "k" , "v" , "a" , "g" ]):
3609
+ new_name = f"blk.{ bid } .time_mix_lerp_fused.weight"
3610
+ data = torch .stack ([self .lerp_weights [bid ][f"model.layers.{ bid } .self_attn.time_mixer.x_{ i } " ].squeeze (0 ) for i in ["r" , "w" , "k" , "v" , "a" , "g" ]], dim = 0 )
3611
+ yield (new_name , data )
3612
+ return
3613
+ else :
3614
+ data_torch = data_torch .squeeze ()
3615
+ new_name = self .map_tensor_name (name )
3616
+
3617
+ if not (new_name .endswith (".weight" ) or new_name .endswith (".bias" )):
3618
+ new_name += ".weight"
3619
+
3620
+ if any (
3621
+ new_name .endswith (t ) for t in [
3622
+ "time_mix_w1.weight" , "time_mix_w2.weight" ,
3623
+ "time_mix_a1.weight" , "time_mix_a2.weight" ,
3624
+ "time_mix_v1.weight" , "time_mix_v2.weight" ,
3625
+ "time_mix_g1.weight" , "time_mix_g2.weight" ,
3626
+ ]
3627
+ ):
3628
+ data_torch = data_torch .transpose (0 , 1 )
3629
+
3630
+ if 'r_k' in new_name :
3631
+ data_torch = data_torch .flatten ()
3632
+
3633
+ yield (new_name , data_torch )
3634
+
3635
+
3558
3636
@Model .register ("MambaForCausalLM" , "MambaLMHeadModel" , "FalconMambaForCausalLM" )
3559
3637
class MambaModel (Model ):
3560
3638
model_arch = gguf .MODEL_ARCH .MAMBA
0 commit comments