|
17 | 17 | static const char * type_strs[] = { "q4_0", "q4_1", "i8", "i16", "i32", "f16", "f32" };
|
18 | 18 | static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
|
19 | 19 |
|
20 |
| -static const char * impl_strs[] = { "simd", "reference", "rmse" }; |
| 20 | +static const char * impl_strs[] = { "simd", "reference", "rmse-sw", "rmse-unbounded" }; |
21 | 21 | static_assert(sizeof(impl_strs) == GGML_QUANTIZE_IMPL_COUNT * sizeof(char *), "Incomplete implementation list");
|
22 | 22 |
|
23 | 23 | struct quantize_stats_params {
|
@@ -52,7 +52,7 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
|
52 | 52 | fprintf(stderr, " -m FNAME, --model FNAME\n");
|
53 | 53 | fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
54 | 54 | fprintf(stderr, " -i, --implementation\n");
|
55 |
| - fprintf(stderr, " select implementation (simd, reference, rmse)\n"); |
| 55 | + fprintf(stderr, " select implementation (simd, reference, rmse-sw, rmse-unbounded)\n"); |
56 | 56 | fprintf(stderr, " -v, --verbose\n");
|
57 | 57 | fprintf(stderr, " verbose output (default: false)\n");
|
58 | 58 | fprintf(stderr, " -p, --per-layer-stats\n");
|
@@ -111,7 +111,7 @@ void print_error_stats(const std::string & name, ggml_quantize_impl_t impl, cons
|
111 | 111 | double rmse = sqrt(stats.total_error / (double) stats.num_samples);
|
112 | 112 | double median = find_quantile(stats, .5);
|
113 | 113 | double pct95 = find_quantile(stats, .95);
|
114 |
| - printf("%-4s %-10s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n", |
| 114 | + printf("%-4s %-15s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n", |
115 | 115 | name.c_str(), impl_strs[impl], rmse, stats.max_error, pct95, median);
|
116 | 116 | if (print_histogram) {
|
117 | 117 | printf("Error distribution:\n");
|
@@ -321,12 +321,12 @@ int main(int argc, char ** argv) {
|
321 | 321 | continue;
|
322 | 322 | }
|
323 | 323 | quantize_fns_t qfns = ggml_internal_get_quantize_fn(type);
|
324 |
| - if (qfns.quantize_row_q && qfns.dequantize_row_q) { |
325 |
| - for (int impl = 0; impl < GGML_QUANTIZE_IMPL_COUNT; impl++) { |
326 |
| - if (!params.include_impl.empty() && std::find(params.include_impl.begin(), params.include_impl.end(), impl) == params.include_impl.end()) { |
327 |
| - continue; |
328 |
| - } |
| 324 | + for (int impl = 0; impl < GGML_QUANTIZE_IMPL_COUNT; impl++) { |
| 325 | + if (!params.include_impl.empty() && std::find(params.include_impl.begin(), params.include_impl.end(), impl) == params.include_impl.end()) { |
| 326 | + continue; |
| 327 | + } |
329 | 328 |
|
| 329 | + if (qfns.quantize_row_q[impl] && qfns.dequantize_row_q) { |
330 | 330 | if (params.verbose) {
|
331 | 331 | printf("testing %s %s ...\n", type_strs[type], impl_strs[impl]);
|
332 | 332 | }
|
|
0 commit comments