Skip to content

Commit cfd3be7

Browse files
ikawrakowKawrakow
andauthored
ggml : same IQ4_NL quantization for CPU/CUDA/Metal (#6196)
* Make quantize_row_iq4_nl do the same thing is quantization on CUDA * Make quantize_row_iq4_nl do the same thing is quantization on CUDA This time for real. backend-ops tests pass. * Now fix test-quantize-fns --------- Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent 5b7b0ac commit cfd3be7

File tree

1 file changed

+25
-13
lines changed

1 file changed

+25
-13
lines changed

ggml-quants.c

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11705,9 +11705,8 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1170511705
ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
1170611706
float * scales, float * weight, uint8_t * L,
1170711707
const int8_t * values,
11708-
const float * quant_weights) {
11709-
11710-
const int ntry = 7;
11708+
const float * quant_weights,
11709+
const int ntry) {
1171111710

1171211711
float sigma2 = 0;
1171311712
for (int j = 0; j < super_block_size; ++j) sigma2 += x[j]*x[j];
@@ -11719,6 +11718,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1171911718
float max_scale = 0, amax_scale = 0;
1172011719
for (int ib = 0; ib < super_block_size/block_size; ++ib) {
1172111720
const float * xb = x + ib*block_size;
11721+
uint8_t * Lb = L + ib*block_size;
1172211722
if (quant_weights) {
1172311723
const float * qw = quant_weights + ib*block_size;
1172411724
for (int j = 0; j < block_size; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
@@ -11736,12 +11736,13 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1173611736
scales[ib] = 0;
1173711737
continue;
1173811738
}
11739-
float d = -max/values[0];
11739+
float d = ntry > 0 ? -max/values[0] : max/values[0];
1174011740
float id = 1/d;
1174111741
float sumqx = 0, sumq2 = 0;
1174211742
for (int j = 0; j < block_size; ++j) {
1174311743
float al = id*xb[j];
1174411744
int l = best_index_int8(16, values, al);
11745+
Lb[j] = l;
1174511746
float q = values[l];
1174611747
float w = weight[j];
1174711748
sumqx += w*q*xb[j];
@@ -11796,9 +11797,11 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1179611797
}
1179711798
} else {
1179811799
dh[0] = GGML_FP32_TO_FP16(scales[0]);
11799-
float id = scales[0] ? 1/scales[0] : 0;
11800-
for (int j = 0; j < super_block_size; ++j) {
11801-
L[j] = best_index_int8(16, values, id*x[j]);
11800+
if (ntry > 0) {
11801+
float id = scales[0] ? 1/scales[0] : 0;
11802+
for (int j = 0; j < super_block_size; ++j) {
11803+
L[j] = best_index_int8(16, values, id*x[j]);
11804+
}
1180211805
}
1180311806
}
1180411807

@@ -11823,7 +11826,7 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int nrow
1182311826
for (int ibl = 0; ibl < nblock; ++ibl) {
1182411827
const float * qw = quant_weights ? quant_weights + QK4_NL*ibl : NULL;
1182511828
quantize_row_iq4_nl_impl(QK4_NL, 32, src + QK4_NL*ibl, &iq4[ibl].d, iq4[ibl].qs, &unused_h, unused_l,
11826-
&scale, weight, L, kvalues_iq4nl, qw);
11829+
&scale, weight, L, kvalues_iq4nl, qw, 7);
1182711830
}
1182811831
src += n_per_row;
1182911832
qrow += nblock*sizeof(block_iq4_nl);
@@ -11832,14 +11835,23 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int nrow
1183211835
}
1183311836

1183411837
void quantize_row_iq4_nl(const float * restrict x, void * restrict vy, int k) {
11835-
assert(k % QK4_NL == 0);
11836-
block_iq4_nl * restrict y = vy;
11837-
quantize_row_iq4_nl_reference(x, y, k);
11838+
GGML_ASSERT(k%QK4_NL == 0);
11839+
int nblock = k/QK4_NL;
11840+
uint8_t L[QK4_NL];
11841+
float weight[QK4_NL];
11842+
uint16_t unused_h;
11843+
uint8_t * unused_l = NULL;
11844+
float scale;
11845+
block_iq4_nl * iq4 = (block_iq4_nl *)vy;
11846+
for (int ibl = 0; ibl < nblock; ++ibl) {
11847+
quantize_row_iq4_nl_impl(QK4_NL, 32, x + QK4_NL*ibl, &iq4[ibl].d, iq4[ibl].qs, &unused_h, unused_l,
11848+
&scale, weight, L, kvalues_iq4nl, NULL, -1);
11849+
}
1183811850
}
1183911851

1184011852
void quantize_row_iq4_nl_reference(const float * restrict x, block_iq4_nl * restrict y, int k) {
1184111853
assert(k % QK4_NL == 0);
11842-
quantize_iq4_nl(x, y, 1, k, NULL);
11854+
quantize_row_iq4_nl(x, y, k);
1184311855
}
1184411856

1184511857
size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
@@ -11857,7 +11869,7 @@ size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int nrow
1185711869
for (int ibl = 0; ibl < nblock; ++ibl) {
1185811870
const float * qw = quant_weights ? quant_weights + QK_K*ibl : NULL;
1185911871
quantize_row_iq4_nl_impl(QK_K, 32, src + QK_K*ibl, &iq4[ibl].d, iq4[ibl].qs, &iq4[ibl].scales_h, iq4[ibl].scales_l,
11860-
scales, weight, L, kvalues_iq4nl, qw);
11872+
scales, weight, L, kvalues_iq4nl, qw, 7);
1186111873
}
1186211874
src += n_per_row;
1186311875
qrow += nblock*sizeof(block_iq4_xs);

0 commit comments

Comments
 (0)