Skip to content

Commit e2da115

Browse files
committed
Update stats tool for unbounded's method
1 parent 4dc62e7 commit e2da115

File tree

3 files changed

+13
-10
lines changed

3 files changed

+13
-10
lines changed

examples/quantize-stats/quantize-stats.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" };
1818
static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
1919

20-
static const char * impl_strs[] = { "simd", "reference", "rmse" };
20+
static const char * impl_strs[] = { "simd", "reference", "rmse-sw", "rmse-unbounded" };
2121
static_assert(sizeof(impl_strs) == GGML_QUANTIZE_IMPL_COUNT * sizeof(char *), "Incomplete implementation list");
2222

2323
struct quantize_stats_params {
@@ -52,7 +52,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
5252
fprintf(stderr, " -m FNAME, --model FNAME\n");
5353
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
5454
fprintf(stderr, " -i, --implementation\n");
55-
fprintf(stderr, " select implementation (simd, reference, rmse)\n");
55+
fprintf(stderr, " select implementation (simd, reference, rmse-sw, rmse-unbounded)\n");
5656
fprintf(stderr, " -v, --verbose\n");
5757
fprintf(stderr, " verbose output (default: false)\n");
5858
fprintf(stderr, " -p, --per-layer-stats\n");
@@ -111,7 +111,7 @@ void print_error_stats(const std::string & name, ggml_quantize_impl_t impl, cons
111111
double rmse = sqrt(stats.total_error / (double) stats.num_samples);
112112
double median = find_quantile(stats, .5);
113113
double pct95 = find_quantile(stats, .95);
114-
printf("%-4s %-10s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n",
114+
printf("%-4s %-15s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n",
115115
name.c_str(), impl_strs[impl], rmse, stats.max_error, pct95, median);
116116
if (print_histogram) {
117117
printf("Error distribution:\n");

ggml.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6799,18 +6799,20 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
67996799
[GGML_TYPE_Q4_0] = {
68006800
.dequantize_row_q = dequantize_row_q4_0,
68016801
.quantize_row_q = {
6802-
[GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_0,
6803-
[GGML_QUANTIZE_IMPL_REFERENCE] = (quantize_row_q_t)quantize_row_q4_0_reference,
6804-
[GGML_QUANTIZE_IMPL_RMSE] = (quantize_row_q_t)quantize_row_q4_0_rmse,
6802+
[GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_0,
6803+
[GGML_QUANTIZE_IMPL_REFERENCE] = (quantize_row_q_t)quantize_row_q4_0_reference,
6804+
[GGML_QUANTIZE_IMPL_RMSE_SW] = (quantize_row_q_t)quantize_row_q4_0_rmse,
6805+
[GGML_QUANTIZE_IMPL_RMSE_UNBOUNDED] = (quantize_row_q_t)quantize_row_q4_0_slow,
68056806
},
68066807
.vec_dot_q = ggml_vec_dot_q4_0,
68076808
},
68086809
[GGML_TYPE_Q4_1] = {
68096810
.dequantize_row_q = dequantize_row_q4_1,
68106811
.quantize_row_q = {
6811-
[GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_1,
6812-
[GGML_QUANTIZE_IMPL_REFERENCE] = quantize_row_q4_1_reference,
6813-
[GGML_QUANTIZE_IMPL_RMSE] = quantize_row_q_missing,
6812+
[GGML_QUANTIZE_IMPL_SIMD] = quantize_row_q4_1,
6813+
[GGML_QUANTIZE_IMPL_REFERENCE] = quantize_row_q4_1_reference,
6814+
[GGML_QUANTIZE_IMPL_RMSE_SW] = quantize_row_q_missing,
6815+
[GGML_QUANTIZE_IMPL_RMSE_UNBOUNDED] = quantize_row_q_missing,
68146816
},
68156817
.vec_dot_q = ggml_vec_dot_q4_1,
68166818
},

ggml.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,8 @@ typedef void (*vec_dot_q_t)(const int n, float * s, const void * x, const void *
795795
typedef enum {
796796
GGML_QUANTIZE_IMPL_SIMD,
797797
GGML_QUANTIZE_IMPL_REFERENCE,
798-
GGML_QUANTIZE_IMPL_RMSE,
798+
GGML_QUANTIZE_IMPL_RMSE_SW,
799+
GGML_QUANTIZE_IMPL_RMSE_UNBOUNDED,
799800
GGML_QUANTIZE_IMPL_COUNT
800801
} ggml_quantize_impl_t;
801802

0 commit comments

Comments
 (0)