|
5 | 5 | import argparse
|
6 | 6 | import os
|
7 | 7 | import sys
|
| 8 | +from tqdm import tqdm |
8 | 9 | from pathlib import Path
|
9 | 10 |
|
10 | 11 | import numpy as np
|
@@ -63,31 +64,43 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
|
63 | 64 | for part in field.parts:
|
64 | 65 | part.byteswap(inplace=True)
|
65 | 66 | logger.info(f"* Converting tensors ({len(reader.tensors)})")
|
66 |
| - for idx, tensor in enumerate(reader.tensors): |
| 67 | + |
| 68 | + for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")): |
67 | 69 | log_message = (
|
68 |
| - f" - {idx:4}: Converting tensor {repr(tensor.name)}, type={tensor.tensor_type.name}, " |
69 |
| - f"elements={tensor.n_elements}... " |
| 70 | + f"Converting tensor {repr(tensor.name)}, " |
| 71 | + f"type={tensor.tensor_type.name}, " |
| 72 | + f"elements={tensor.n_elements} " |
70 | 73 | )
|
71 |
| - tensor_type = tensor.tensor_type |
| 74 | + |
| 75 | + # Byte-swap each part of the tensor's field |
72 | 76 | for part in tensor.field.parts:
|
73 | 77 | part.byteswap(inplace=True)
|
74 |
| - if tensor_type != gguf.GGMLQuantizationType.Q8_0: |
| 78 | + |
| 79 | + # Byte-swap tensor data if necessary |
| 80 | + if tensor.tensor_type == gguf.GGMLQuantizationType.Q8_0: |
| 81 | + # Handle Q8_0 tensor blocks (block_q8_0) |
| 82 | + # Specific handling of block_q8_0 is required. |
| 83 | + # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations. |
| 84 | + |
| 85 | + block_size = 34 # 34 bytes = <f16 delta scaling factor> + 32 * <int8 quant> |
| 86 | + |
| 87 | + n_blocks = len(tensor.data) // block_size |
| 88 | + for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)): |
| 89 | + block_offs = block_num * block_size |
| 90 | + |
| 91 | + # Byte-Swap f16 sized delta field |
| 92 | + delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16) |
| 93 | + delta.byteswap(inplace=True) |
| 94 | + |
| 95 | + # Byte-Swap Q8 weights |
| 96 | + if block_num % 100000 == 0: |
| 97 | + inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]") |
| 98 | + |
| 99 | + else: |
| 100 | + # Handle other tensor types |
75 | 101 | tensor.data.byteswap(inplace=True)
|
76 |
| - logger.info(log_message) |
77 |
| - continue |
78 |
| - |
79 |
| - # A Q8_0 block consists of a f16 delta followed by 32 int8 quants, so 34 bytes |
80 |
| - block_size = 34 |
81 |
| - n_blocks = len(tensor.data) // block_size |
82 |
| - for block_num in range(n_blocks): |
83 |
| - block_offs = block_num * block_size |
84 |
| - # I know I said f16, but it doesn't matter here - any simple 16 bit type works. |
85 |
| - delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16) |
86 |
| - delta.byteswap(inplace=True) |
87 |
| - if block_num % 100000 == 0: |
88 |
| - log_message += f"[{(n_blocks - block_num) // 1000}K]" |
89 |
| - |
90 |
| - logger.info(log_message) |
| 102 | + |
| 103 | + pbar.set_description(log_message) |
91 | 104 |
|
92 | 105 | logger.info("* Completion")
|
93 | 106 |
|
|
0 commit comments