Skip to content

Commit 61e8a0a

Browse files
committed
use higher eps only for the quants that need it
ggml-ci
1 parent f59edee commit 61e8a0a

File tree

2 files changed

+22
-9
lines changed

2 files changed

+22
-9
lines changed

ggml-quants.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@
1414
#include <stdlib.h> // for qsort
1515
#include <stdio.h> // for GGML_ASSERT
1616

17-
#define GROUP_MAX_EPS 1e-7f
17+
#define GROUP_MAX_EPS 1e-15f
18+
#define GROUP_MAX_EPS_IQ3_XXS 1e-8f
19+
#define GROUP_MAX_EPS_IQ2_S 1e-8f
20+
#define GROUP_MAX_EPS_IQ1_M 1e-7f
21+
#define GROUP_MAX_EPS_IQ1_S 1e-12f
1822

1923
#if defined(_MSC_VER)
2024
// disable "possible loss of data" to avoid warnings for hundreds of casts
@@ -1648,7 +1652,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t *
16481652
break;
16491653
}
16501654
}
1651-
return sumlx / suml2;
1655+
return sumlx/suml2;
16521656
}
16531657

16541658
static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restrict y, int k, const float * restrict quant_weights) {
@@ -12598,7 +12602,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
1259812602
}
1259912603
float max = xval[0];
1260012604
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
12601-
if (!max) {
12605+
if (max < GROUP_MAX_EPS) {
1260212606
scales[ib] = 0;
1260312607
memset(L, 0, 32);
1260412608
continue;
@@ -13215,7 +13219,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
1321513219
}
1321613220
float max = xval[0];
1321713221
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
13218-
if (max < GROUP_MAX_EPS) {
13222+
if (max < GROUP_MAX_EPS_IQ3_XXS) {
1321913223
scales[ib] = 0;
1322013224
memset(L, 0, 32);
1322113225
continue;
@@ -13755,7 +13759,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
1375513759
for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
1375613760
float max = fabsf(xb[0]);
1375713761
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
13758-
if (max < GROUP_MAX_EPS) {
13762+
if (max < GROUP_MAX_EPS_IQ1_S) {
1375913763
scales[ib] = 0;
1376013764
memset(L, 1, block_size);
1376113765
continue;
@@ -13943,7 +13947,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
1394313947
}
1394413948
float max = fabsf(xb[0]);
1394513949
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
13946-
if (max < GROUP_MAX_EPS) {
13950+
if (max < GROUP_MAX_EPS_IQ1_M) {
1394713951
scales[ib] = 0;
1394813952
memset(L, 1, block_size);
1394913953
continue;
@@ -14428,7 +14432,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
1442814432
}
1442914433
float max = xval[0];
1443014434
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
14431-
if (max < GROUP_MAX_EPS) {
14435+
if (max < GROUP_MAX_EPS_IQ2_S) {
1443214436
scales[ib] = 0;
1443314437
continue;
1443414438
}

tests/test-backend-ops.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
#include <thread>
1717
#include <vector>
1818

19+
1920
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
2021
// static RNG initialization (revisit if n_threads stops being constant)
21-
static const size_t n_threads = std::thread::hardware_concurrency();
22+
static const size_t n_threads = 1; //std::thread::hardware_concurrency();
2223
static std::vector<std::default_random_engine> generators = []() {
2324
std::random_device rd;
2425
std::vector<std::default_random_engine> vec;
@@ -50,10 +51,17 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
5051
}
5152

5253
#if 0
54+
const char * val_str = getenv("GGML_TEST_EPS");
55+
float val = 1e-9f;
56+
if (val_str != nullptr) {
57+
val = std::stof(val_str);
58+
printf("GGML_TEST_EPS=%e\n", val);
59+
}
60+
5361
// test quantization with very small values that may result in nan scales due to division by zero
5462
if (ggml_is_quantized(tensor->type)) {
5563
for (int i = 0; i < 256; i++) {
56-
data[i] = 1e-7f;
64+
data[i] = val;
5765
}
5866
}
5967
#endif
@@ -73,6 +81,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
7381
}
7482
}
7583
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
84+
GGML_ASSERT(ggml_validate_row_data(tensor->type, dataq.data(), dataq.size()));
7685
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
7786
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
7887
// This is going to create some weird integers though.

0 commit comments

Comments
 (0)