Skip to content

Commit ec9ca5b

Browse files
committed
gguf-convert-endian.py: refactor convert_byteorder() to use tqdm progressbar
1 parent 21ec0ff commit ec9ca5b

File tree

1 file changed

+33
-20
lines changed

1 file changed

+33
-20
lines changed

gguf-py/scripts/gguf-convert-endian.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import argparse
66
import os
77
import sys
8+
from tqdm import tqdm
89
from pathlib import Path
910

1011
import numpy as np
@@ -63,31 +64,43 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
6364
for part in field.parts:
6465
part.byteswap(inplace=True)
6566
logger.info(f"* Converting tensors ({len(reader.tensors)})")
66-
for idx, tensor in enumerate(reader.tensors):
67+
68+
for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
6769
log_message = (
68-
f" - {idx:4}: Converting tensor {repr(tensor.name)}, type={tensor.tensor_type.name}, "
69-
f"elements={tensor.n_elements}... "
70+
f"Converting tensor {repr(tensor.name)}, "
71+
f"type={tensor.tensor_type.name}, "
72+
f"elements={tensor.n_elements} "
7073
)
71-
tensor_type = tensor.tensor_type
74+
75+
# Byte-swap each part of the tensor's field
7276
for part in tensor.field.parts:
7377
part.byteswap(inplace=True)
74-
if tensor_type != gguf.GGMLQuantizationType.Q8_0:
78+
79+
# Byte-swap tensor data if necessary
80+
if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0:
81+
# Handle Q8_0 tensor blocks (block_q8_0)
82+
# Specific handling of block_q8_0 is required.
83+
# Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
84+
85+
block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant>
86+
87+
n_blocks = len(tensor.data) // block_size
88+
for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
89+
block_offs = block_num * block_size
90+
91+
# Byte-Swap f16 sized delta field
92+
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
93+
delta.byteswap(inplace=True)
94+
95+
# Byte-Swap Q8 weights
96+
if block_num % 100000 == 0:
97+
inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
98+
99+
else:
100+
# Handle other tensor types
75101
tensor.data.byteswap(inplace=True)
76-
logger.info(log_message)
77-
continue
78-
79-
# A Q8_0 block consists of a f16 delta followed by 32 int8 quants, so 34 bytes
80-
block_size = 34
81-
n_blocks = len(tensor.data) // block_size
82-
for block_num in range(n_blocks):
83-
block_offs = block_num * block_size
84-
# I know I said f16, but it doesn't matter here - any simple 16 bit type works.
85-
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
86-
delta.byteswap(inplace=True)
87-
if block_num % 100000 == 0:
88-
log_message += f"[{(n_blocks - block_num) // 1000}K]"
89-
90-
logger.info(log_message)
102+
103+
pbar.set_description(log_message)
91104

92105
logger.info("* Completion")
93106

0 commit comments

Comments
 (0)