@@ -1078,17 +1078,76 @@ def set_gguf_parameters(self):
1078
1078
self .gguf_writer .add_name ("MiniCPM" )
1079
1079
self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
1080
1080
self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
1081
- self .gguf_writer .add_feed_forward_length (self .hparams ["intermediate_size" ])
1082
1081
self .gguf_writer .add_block_count (block_count )
1082
+ self .gguf_writer .add_feed_forward_length (self .hparams ["intermediate_size" ])
1083
+ self .gguf_writer .add_rope_dimension_count (self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ])
1083
1084
self .gguf_writer .add_head_count (self .hparams ["num_attention_heads" ])
1084
1085
self .gguf_writer .add_head_count_kv (self .hparams ["num_key_value_heads" ])
1085
1086
self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
1086
1087
self .gguf_writer .add_file_type (self .ftype )
1087
- self .gguf_writer .add_rope_dimension_count (self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ])
1088
1088
1089
1089
def set_vocab (self ):
1090
1090
self ._set_vocab_hf ()
1091
1091
1092
+ def _reverse_hf_permute (self , weights : Tensor , n_head : int , n_kv_head : int | None = None ) -> Tensor :
1093
+ if n_kv_head is not None and n_head != n_kv_head :
1094
+ n_head //= n_kv_head
1095
+
1096
+ return (
1097
+ weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
1098
+ .swapaxes (1 , 2 )
1099
+ .reshape (weights .shape )
1100
+ )
1101
+
1102
+ def write_tensors (self ):
1103
+ block_count = self .hparams .get ("n_layers" , self .hparams .get ("num_hidden_layers" , self .hparams .get ("n_layer" )))
1104
+ tensor_map = gguf .get_tensor_name_map (self .model_arch , block_count )
1105
+ n_head = self .hparams .get ("num_attention_heads" )
1106
+ n_kv_head = self .hparams .get ("num_key_value_heads" )
1107
+ for name , data_torch in self .get_tensors ():
1108
+ # we don't need these
1109
+ if name .endswith ((".attention.masked_bias" , ".attention.bias" , ".attention.rotary_emb.inv_freq" )):
1110
+ continue
1111
+
1112
+ old_dtype = data_torch .dtype
1113
+
1114
+ # convert any unsupported data types to float32
1115
+ if data_torch .dtype not in (torch .float16 , torch .float32 ):
1116
+ data_torch = data_torch .to (torch .float32 )
1117
+
1118
+ # HF models permute some of the tensors, so we need to undo that
1119
+ if name .endswith (("q_proj.weight" )):
1120
+ data_torch = self ._reverse_hf_permute (data_torch , n_head , n_head )
1121
+ if name .endswith (("k_proj.weight" )):
1122
+ data_torch = self ._reverse_hf_permute (data_torch , n_head , n_kv_head )
1123
+
1124
+ data = data_torch .squeeze ().numpy ()
1125
+
1126
+ # map tensor names
1127
+ new_name = tensor_map .get_name (name , try_suffixes = (".weight" , ".bias" ))
1128
+ if new_name is None :
1129
+ print (f"Can not map tensor { name !r} " )
1130
+ sys .exit ()
1131
+
1132
+ n_dims = len (data .shape )
1133
+ data_dtype = data .dtype
1134
+
1135
+ # if f32 desired, convert any float16 to float32
1136
+ if self .ftype == 0 and data_dtype == np .float16 :
1137
+ data = data .astype (np .float32 )
1138
+
1139
+ # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
1140
+ if self .ftype == 1 and data_dtype == np .float16 and n_dims == 1 :
1141
+ data = data .astype (np .float32 )
1142
+
1143
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
1144
+ if self .ftype == 1 and data_dtype == np .float32 and name .endswith (".weight" ) and n_dims == 2 :
1145
+ data = data .astype (np .float16 )
1146
+
1147
+ print (f"{ new_name } , n_dims = { n_dims } , { old_dtype } --> { data .dtype } " )
1148
+
1149
+ self .gguf_writer .add_tensor (new_name , data )
1150
+
1092
1151
1093
1152
class QwenModel (Model ):
1094
1153
@staticmethod
0 commit comments