Skip to content

Commit 639b374

Browse files
committed
convert-hf : convert norms to f32 by default
1 parent 21068b6 commit 639b374

File tree

1 file changed

+10
-16
lines changed

1 file changed

+10
-16
lines changed

convert-hf-to-gguf.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -199,17 +199,21 @@ def write_tensors(self):
199199
if self.ftype == 0 and data_dtype == np.float16:
200200
data = data.astype(np.float32)
201201

202-
# when both are true, the tensor keeps its original type
202+
# when both are True, f32 should win
203203
extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
204204
extra_f16 = self.extra_f16_tensors(name, new_name, bid, n_dims)
205205

206-
# 1d tensors need to be converted to float32
207-
# Most of the codebase that takes in 1D tensors only handles F32 tensors
208-
if self.ftype == 1 and data_dtype == np.float16 and (n_dims == 1 or extra_f32) and not extra_f16:
209-
data = data.astype(np.float32)
206+
# Most of the codebase that takes in 1D tensors or norms only handles F32 tensors
207+
extra_f32 = extra_f32 or n_dims == 1 or new_name.endswith("_norm.weight")
210208

211209
# if f16 desired, convert any float32 2-dim weight tensors to float16
212-
if self.ftype == 1 and data_dtype == np.float32 and (name.endswith(".weight") and n_dims >= 2 or extra_f16) and not extra_f32:
210+
extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2)
211+
212+
# when both extra_f32 and extra_f16 are False, convert to float32 by default
213+
if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16):
214+
data = data.astype(np.float32)
215+
216+
if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32:
213217
data = data.astype(np.float16)
214218

215219
# reverse shape to make it similar to the internal ggml dimension order
@@ -1100,11 +1104,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
11001104

11011105
return [(self.map_tensor_name(name), data_torch)]
11021106

1103-
def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
1104-
del name, bid, n_dims # unused
1105-
1106-
return new_name.endswith("_norm.weight")
1107-
11081107
def _stack_qk_norm(self, bid: int, n_head: int, norms: dict[str, Tensor], layer_name: str = "q_layernorm"):
11091108
datas: list[Tensor] = []
11101109
# extract the norms in order
@@ -1505,11 +1504,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
15051504

15061505
return [(self.map_tensor_name(name), data_torch)]
15071506

1508-
def extra_f32_tensors(self, name: str, new_name: str, bid: int | None, n_dims: int) -> bool:
1509-
del name, bid, n_dims # unused
1510-
1511-
return new_name.endswith("_norm.weight")
1512-
15131507
def write_tensors(self):
15141508
super().write_tensors()
15151509

0 commit comments

Comments
 (0)