Skip to content

Commit 0583484

Browse files
authored
ggml : fix quants nans when all the group weights are very close to zero (#7313)
1 parent ef277de commit 0583484

File tree

2 files changed

+36
-12
lines changed

2 files changed

+36
-12
lines changed

ggml-quants.c

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@
1414
#include <stdlib.h> // for qsort
1515
#include <stdio.h> // for GGML_ASSERT
1616

17+
#define GROUP_MAX_EPS 1e-15f
18+
#define GROUP_MAX_EPS_IQ3_XXS 1e-8f
19+
#define GROUP_MAX_EPS_IQ2_S 1e-8f
20+
#define GROUP_MAX_EPS_IQ1_M 1e-7f
21+
#define GROUP_MAX_EPS_IQ1_S 1e-12f
22+
1723
#if defined(_MSC_VER)
1824
// disable "possible loss of data" to avoid warnings for hundreds of casts
1925
// we should just be careful :)
@@ -1109,7 +1115,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
11091115
float ax = fabsf(x[i]);
11101116
if (ax > amax) { amax = ax; max = x[i]; }
11111117
}
1112-
if (amax < 1e-30f) { // all zero
1118+
if (amax < GROUP_MAX_EPS) { // all zero
11131119
for (int i = 0; i < n; ++i) {
11141120
L[i] = 0;
11151121
}
@@ -1177,7 +1183,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
11771183
float ax = fabsf(x[i]);
11781184
if (ax > amax) { amax = ax; max = x[i]; }
11791185
}
1180-
if (!amax) { // all zero
1186+
if (amax < GROUP_MAX_EPS) { // all zero
11811187
for (int i = 0; i < n; ++i) { L[i] = 0; }
11821188
return 0.f;
11831189
}
@@ -1646,7 +1652,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t *
16461652
break;
16471653
}
16481654
}
1649-
return sumlx / suml2;
1655+
return sumlx/suml2;
16501656
}
16511657

16521658
static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restrict y, int k, const float * restrict quant_weights) {
@@ -2653,7 +2659,7 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
26532659

26542660
}
26552661

2656-
if (!max_abs_scale) {
2662+
if (max_abs_scale < GROUP_MAX_EPS) {
26572663
memset(&y[i], 0, sizeof(block_q6_K));
26582664
y[i].d = GGML_FP32_TO_FP16(0.f);
26592665
x += QK_K;
@@ -2805,7 +2811,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
28052811

28062812
}
28072813

2808-
if (!max_abs_scale) {
2814+
if (max_abs_scale < GROUP_MAX_EPS) {
28092815
memset(&y[i], 0, sizeof(block_q6_K));
28102816
y[i].d = GGML_FP32_TO_FP16(0.f);
28112817
x += QK_K;
@@ -12599,7 +12605,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
1259912605
}
1260012606
float max = xval[0];
1260112607
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
12602-
if (!max) {
12608+
if (max < GROUP_MAX_EPS) {
1260312609
scales[ib] = 0;
1260412610
memset(L, 0, 32);
1260512611
continue;
@@ -12775,7 +12781,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
1277512781
}
1277612782
float max = xval[0];
1277712783
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
12778-
if (!max) {
12784+
if (max < GROUP_MAX_EPS) {
1277912785
scales[ib] = 0;
1278012786
memset(L, 0, 16);
1278112787
continue;
@@ -13216,7 +13222,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
1321613222
}
1321713223
float max = xval[0];
1321813224
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
13219-
if (!max) {
13225+
if (max < GROUP_MAX_EPS_IQ3_XXS) {
1322013226
scales[ib] = 0;
1322113227
memset(L, 0, 32);
1322213228
continue;
@@ -13756,7 +13762,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
1375613762
for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
1375713763
float max = fabsf(xb[0]);
1375813764
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
13759-
if (!max) {
13765+
if (max < GROUP_MAX_EPS_IQ1_S) {
1376013766
scales[ib] = 0;
1376113767
memset(L, 1, block_size);
1376213768
continue;
@@ -13944,7 +13950,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
1394413950
}
1394513951
float max = fabsf(xb[0]);
1394613952
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
13947-
if (!max) {
13953+
if (max < GROUP_MAX_EPS_IQ1_M) {
1394813954
scales[ib] = 0;
1394913955
memset(L, 1, block_size);
1395013956
continue;
@@ -14208,7 +14214,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
1420814214
amax = ax; max = xb[j];
1420914215
}
1421014216
}
14211-
if (!amax) {
14217+
if (amax < GROUP_MAX_EPS) {
1421214218
scales[ib] = 0;
1421314219
continue;
1421414220
}
@@ -14429,7 +14435,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
1442914435
}
1443014436
float max = xval[0];
1443114437
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
14432-
if (!max) {
14438+
if (max < GROUP_MAX_EPS_IQ2_S) {
1443314439
scales[ib] = 0;
1443414440
continue;
1443514441
}

tests/test-backend-ops.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <thread>
1717
#include <vector>
1818

19+
1920
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
2021
// static RNG initialization (revisit if n_threads stops being constant)
2122
static const size_t n_threads = std::thread::hardware_concurrency();
@@ -49,6 +50,22 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
4950
t.join();
5051
}
5152

53+
#if 0
54+
const char * val_str = getenv("GGML_TEST_EPS");
55+
float val = 1e-9f;
56+
if (val_str != nullptr) {
57+
val = std::stof(val_str);
58+
printf("GGML_TEST_EPS=%e\n", val);
59+
}
60+
61+
// test quantization with very small values that may result in nan scales due to division by zero
62+
if (ggml_is_quantized(tensor->type)) {
63+
for (int i = 0; i < 256; i++) {
64+
data[i] = val;
65+
}
66+
}
67+
#endif
68+
5269
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
5370
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
5471
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_BF16) {
@@ -64,6 +81,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
6481
}
6582
}
6683
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
84+
GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size()));
6785
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
6886
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
6987
// This is going to create some weird integers though.

0 commit comments

Comments
 (0)