Skip to content

Commit eeffb0a

Browse files
metascroymalfet
authored andcommitted
Test for gguf_util correctness (#209)
* add correctness test function * correnctess test on gguf * fix
1 parent 46639bb commit eeffb0a

File tree

2 files changed

+111
-7
lines changed

2 files changed

+111
-7
lines changed

.github/workflows/gguf_util.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: Compile main
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
workflow_dispatch:
9+
10+
jobs:
11+
gguf-util-test:
12+
strategy:
13+
matrix:
14+
runner: [macos-14]
15+
runs-on: ${{matrix.runner}}
16+
steps:
17+
- name: Checkout repo
18+
uses: actions/checkout@v2
19+
- name: Setup Python
20+
uses: actions/setup-python@v2
21+
with:
22+
python-version: 3.11
23+
- name: Print machine info
24+
run: |
25+
uname -a
26+
if [ $(uname -s) == Darwin ]; then
27+
sysctl machdep.cpu.brand_string
28+
sysctl machdep.cpu.core_count
29+
fi
30+
- name: Install requirements
31+
run: |
32+
echo "Intalling pip packages"
33+
pip install gguf
34+
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
35+
pip install -r requirements.txt
36+
37+
git clone https://github.com/ggerganov/llama.cpp.git
38+
pushd llama.cpp
39+
make
40+
popd
41+
42+
- name: Download GGUF files
43+
run: |
44+
mkdir gguf_files
45+
wget -O gguf_files/llama-2-7b.Q4_0.gguf "https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_0.gguf?download=true"
46+
./llama.cpp/quantize --allow-requantize gguf_files/llama-2-7b.Q4_0.gguf gguf_files/llama-2-7b.Q4_0.requant_F32.gguf F32
47+
48+
- name: Load files
49+
run: |
50+
touch test.py
51+
echo "from build.gguf_util import test_by_to_float" >> test.py
52+
echo "test_by_to_float(\"gguf_files/llama-2-7b.Q4_0.gguf\", \"gguf_files/llama-2-7b.Q4_0.requant_F32.gguf\")" >> test.py
53+
cat test.py
54+
python test.py
55+
56+
echo "Tests complete."

build/gguf_util.py

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,56 @@
66

77
import torch
88
import gguf
9+
from quantize import group_dequantize_tensor_from_qparams
10+
11+
def to_float(t: gguf.gguf_reader.ReaderTensor):
12+
"""
13+
Unpack and dequantize GGUF tensor to torch tensor of type torch.float32.
14+
"""
15+
16+
# All other weights are dequantized to float
17+
if t.tensor_type == gguf.GGMLQuantizationType.Q4_0:
18+
return group_dequantize_tensor_from_qparams(*Q4_0.unpack(t), Q4_0.n_bit, Q4_0.groupsize).to(torch.float32)
19+
elif t.tensor_type == gguf.GGMLQuantizationType.Q6_K:
20+
return group_dequantize_tensor_from_qparams(*Q6_K.unpack(t), Q6_K.n_bit, Q6_K.groupsize).to(torch.float32)
21+
elif t.tensor_type == gguf.GGMLQuantizationType.F16:
22+
return F16.unpack(t).to(torch.float32)
23+
elif t.tensor_type == gguf.GGMLQuantizationType.F32:
24+
return F32.unpack(t).to(torch.float32)
25+
else:
26+
raise ValueError(f"Unsupported tensor type {t.tensor_type}")
27+
28+
29+
def test_by_to_float(source_file: str, target_file: str) -> None:
30+
"""
31+
Tests methods in this file by using the to_float method, and comparing with a correct
32+
reference. Raises error if there is a mismatch.
33+
34+
In more detail, a GGUF source_file with various GGUF tensor types is parsed, and these
35+
tensors are converted with to_float. These are then compared against a GGUF target_file.
36+
The target GGUF file must only contain F32 tensors, and should be generated by a method
37+
that is known to be correct.
38+
"""
39+
40+
gguf_sources = {t.name: t for t in gguf.GGUFReader(source_file, "r").tensors}
41+
gguf_targets = {t.name: t for t in gguf.GGUFReader(target_file, "r").tensors}
42+
43+
for t in gguf_targets.values():
44+
assert t.tensor_type == gguf.GGMLQuantizationType.F32, f"target_file must only contain F32 tensors, but found tensor {t.name} with type {repr(t.tensor_type)}."
45+
assert gguf_sources.keys() == gguf_targets.keys(), "source_file and target_file should have the same tensors (by name)"
46+
47+
for k in gguf_sources:
48+
source = to_float(gguf_sources[k])
49+
target = to_float(gguf_targets[k])
50+
51+
if not torch.allclose(source, target):
52+
print(f"After calling to_float on source tensor {k} of type {repr(gguf_sources[k].tensor_type)} it does not match its target.")
53+
print("First 5 elements of converted source: ", source.reshape(-1)[0:5])
54+
print("First 5 elements of target: ", target.reshape(-1)[0:5])
55+
assert False, "found mismatch"
56+
57+
print("All tensors match.")
58+
959

1060
class F16:
1161
@staticmethod
@@ -14,7 +64,7 @@ def unpack(gguf_tensor: gguf.gguf_reader.ReaderTensor):
1464
Unpacks GGUF F16 tensor.
1565
"""
1666
assert gguf_tensor.tensor_type == gguf.GGMLQuantizationType.F16
17-
reversed_shape = gguf_tensor.shape[::-1] # TODO: GGUF tensors are reversed
67+
reversed_shape = gguf_tensor.shape[::-1]
1868
new_tensor = gguf_tensor.data.reshape(reversed_shape)
1969
return torch.from_numpy(new_tensor).to(torch.float16)
2070

@@ -25,7 +75,7 @@ def unpack(gguf_tensor: gguf.gguf_reader.ReaderTensor):
2575
Unpacks GGUF F32 tensor.
2676
"""
2777
assert gguf_tensor.tensor_type == gguf.GGMLQuantizationType.F32
28-
reversed_shape = gguf_tensor.shape[::-1] # TODO: GGUF tensors are reversed
78+
reversed_shape = gguf_tensor.shape[::-1]
2979
new_tensor = gguf_tensor.data.reshape(reversed_shape)
3080
return torch.from_numpy(new_tensor).to(torch.float32)
3181

@@ -61,7 +111,7 @@ def unpack(gguf_tensor: gguf.gguf_reader.ReaderTensor):
61111

62112
assert gguf_tensor.tensor_type == gguf.GGMLQuantizationType.Q4_0
63113
assert len(gguf_tensor.shape) == 2
64-
nc, nr = gguf_tensor.shape # TODO: CHECK THIS. GGUF TENSOR REVERSED?
114+
nc, nr = gguf_tensor.shape # GGUF tensor has reversed shape
65115

66116
QK4_0 = 32 # groupsize
67117

@@ -84,7 +134,7 @@ def unpack(gguf_tensor: gguf.gguf_reader.ReaderTensor):
84134
# Check we finished parsing
85135
assert curr == block_q4_0_size
86136

87-
# Unpack quantized values. Unlike the code in ggml-quants.c, we do not subtract 16
137+
# Unpack quantized values. Unlike the code in ggml-quants.c, we do not subtract 8
88138
x0 = qs & 0x0F
89139
x1 = qs >> 4
90140

@@ -117,8 +167,6 @@ def unpack(gguf_tensor: gguf.gguf_reader.ReaderTensor):
117167
* s is a torch.float32 tensor of shape (nr, -1) with one scale per group
118168
* z is a torch.float32 tensor of shape (nr, -1) with one zero per group
119169
120-
There is one element of s/z per group of 32 elements of 4.
121-
122170
Note that z is always zero because Q6_k is a scale-only scheme.
123171
124172
See https://github.com/ggerganov/llama.cpp/blob/master/ggml-common.h for definition of block_q6_K:
@@ -142,7 +190,7 @@ def unpack(gguf_tensor: gguf.gguf_reader.ReaderTensor):
142190
"""
143191
assert gguf_tensor.tensor_type == gguf.GGMLQuantizationType.Q6_K
144192
assert len(gguf_tensor.shape) == 2
145-
nc, nr = gguf_tensor.shape # TODO: CHECK THIS. GGUF TENSOR REVERSED?
193+
nc, nr = gguf_tensor.shape # GGUF tensor has reversed shape
146194
QK_K = 256
147195

148196
# Parse block_q6_K

0 commit comments

Comments
 (0)