Skip to content

Commit 21068b6

Browse files
committed
convert-hf : display tensor shape
1 parent dcd8dfa commit 21068b6

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

convert-hf-to-gguf.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ def write_tensors(self):
191191
break
192192

193193
for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
194+
data: np.ndarray = data # type hint
194195
n_dims = len(data.shape)
195196
data_dtype = data.dtype
196197

@@ -211,7 +212,11 @@ def write_tensors(self):
211212
if self.ftype == 1 and data_dtype == np.float32 and (name.endswith(".weight") and n_dims >= 2 or extra_f16) and not extra_f32:
212213
data = data.astype(np.float16)
213214

214-
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
215+
# reverse shape to make it similar to the internal ggml dimension order
216+
shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
217+
218+
# n_dims is implicit in the shape
219+
print(f"{new_name}, shape = {shape_str}, {old_dtype} --> {data.dtype}")
215220

216221
self.gguf_writer.add_tensor(new_name, data)
217222

@@ -1774,7 +1779,6 @@ def set_vocab(self):
17741779

17751780
tokenizer = SentencePieceProcessor()
17761781
tokenizer.LoadFromFile(str(tokenizer_path))
1777-
tokenizer.serialized_model_proto
17781782

17791783
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
17801784

0 commit comments

Comments
 (0)