@@ -199,17 +199,21 @@ def write_tensors(self):
199
199
if self .ftype == 0 and data_dtype == np .float16 :
200
200
data = data .astype (np .float32 )
201
201
202
- # when both are true, the tensor keeps its original type
202
+ # when both are True, f32 should win
203
203
extra_f32 = self .extra_f32_tensors (name , new_name , bid , n_dims )
204
204
extra_f16 = self .extra_f16_tensors (name , new_name , bid , n_dims )
205
205
206
- # 1d tensors need to be converted to float32
207
- # Most of the codebase that takes in 1D tensors only handles F32 tensors
208
- if self .ftype == 1 and data_dtype == np .float16 and (n_dims == 1 or extra_f32 ) and not extra_f16 :
209
- data = data .astype (np .float32 )
206
+ # Most of the codebase that takes in 1D tensors or norms only handles F32 tensors
207
+ extra_f32 = extra_f32 or n_dims == 1 or new_name .endswith ("_norm.weight" )
210
208
211
209
# if f16 desired, convert any float32 2-dim weight tensors to float16
212
- if self .ftype == 1 and data_dtype == np .float32 and (name .endswith (".weight" ) and n_dims >= 2 or extra_f16 ) and not extra_f32 :
210
+ extra_f16 = extra_f16 or (name .endswith (".weight" ) and n_dims >= 2 )
211
+
212
+ # when both extra_f32 and extra_f16 are False, convert to float32 by default
213
+ if self .ftype == 1 and data_dtype == np .float16 and (extra_f32 or not extra_f16 ):
214
+ data = data .astype (np .float32 )
215
+
216
+ if self .ftype == 1 and data_dtype == np .float32 and extra_f16 and not extra_f32 :
213
217
data = data .astype (np .float16 )
214
218
215
219
# reverse shape to make it similar to the internal ggml dimension order
@@ -1100,11 +1104,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1100
1104
1101
1105
return [(self .map_tensor_name (name ), data_torch )]
1102
1106
1103
- def extra_f32_tensors (self , name : str , new_name : str , bid : int | None , n_dims : int ) -> bool :
1104
- del name , bid , n_dims # unused
1105
-
1106
- return new_name .endswith ("_norm.weight" )
1107
-
1108
1107
def _stack_qk_norm (self , bid : int , n_head : int , norms : dict [str , Tensor ], layer_name : str = "q_layernorm" ):
1109
1108
datas : list [Tensor ] = []
1110
1109
# extract the norms in order
@@ -1505,11 +1504,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1505
1504
1506
1505
return [(self .map_tensor_name (name ), data_torch )]
1507
1506
1508
- def extra_f32_tensors (self , name : str , new_name : str , bid : int | None , n_dims : int ) -> bool :
1509
- del name , bid , n_dims # unused
1510
-
1511
- return new_name .endswith ("_norm.weight" )
1512
-
1513
1507
def write_tensors (self ):
1514
1508
super ().write_tensors ()
1515
1509
0 commit comments