quantize-stats : fix test + add it to Makefile default

ggerganov · ggerganov · commit f1607bfe1061 · 2023-04-14T21:35:19.000+03:00
diff --git a/Makefile b/Makefile
@@ -133,7 +133,7 @@ $(info I CC:       $(CCV))
 $(info I CXX:      $(CXXV))
 $(info )
 
-default: main quantize perplexity embedding
+default: main quantize quantize-stats perplexity embedding
 
 #
 # Build library
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
@@ -16,6 +16,9 @@
 #include <unordered_map>
 #include <vector>
 
+static const char * type_strs[] = { "f32", "f16", "q4_0", "q4_1", "q8_0", "i8", "i16", "i32", };
+static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
+
 struct quantize_stats_params {
     std::string model = "models/7B/ggml-model-f16.bin";
     bool verbose = false;
diff --git a/ggml.c b/ggml.c
@@ -7142,14 +7142,16 @@ static void ggml_compute_forward_mul_mat_f16_f32(
 static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
     [GGML_TYPE_Q4_0] = {
         .dequantize_row_q         = dequantize_row_q4_0,
-        .quantize_row_q           = quantize_row_q8_0,
+        .quantize_row_q           = quantize_row_q4_0,
         .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
+        .quantize_row_q_dot       = quantize_row_q8_0,
         .vec_dot_q                = ggml_vec_dot_q4_0_q8_0,
     },
     [GGML_TYPE_Q4_1] = {
         .dequantize_row_q         = dequantize_row_q4_1,
         .quantize_row_q           = quantize_row_q4_1,
         .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
+        .quantize_row_q_dot       = quantize_row_q8_0,
         .vec_dot_q                = ggml_vec_dot_q4_1,
     },
     // TODO: GGML_TYPE_Q8_0
@@ -7208,8 +7210,8 @@ static void ggml_compute_forward_mul_mat_q_f32(
     GGML_ASSERT(ne3  == ne13);
 
     const enum ggml_type type = src0->type;
-    quantize_row_q_t const quantize_row_q = quantize_fns[type].quantize_row_q;
-    vec_dot_q_t      const vec_dot_q      = quantize_fns[type].vec_dot_q;
+    quantize_row_q_t const quantize_row_q_dot = quantize_fns[type].quantize_row_q_dot;
+    vec_dot_q_t      const vec_dot_q          = quantize_fns[type].vec_dot_q;
 
     // we don't support permuted src0 or src1
     GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
@@ -7283,7 +7285,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
         for (int64_t i13 = 0; i13 < ne13; ++i13) {
             for (int64_t i12 = 0; i12 < ne12; ++i12) {
                 for (int64_t i11 = 0; i11 < ne11; ++i11) {
-                    quantize_row_q((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10);
+                    quantize_row_q_dot((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10);
                     wdata += row_size;
                 }
             }
diff --git a/ggml.h b/ggml.h
@@ -837,6 +837,7 @@ typedef struct {
     dequantize_row_q_t dequantize_row_q;
     quantize_row_q_t   quantize_row_q;
     quantize_row_q_t   quantize_row_q_reference;
+    quantize_row_q_t   quantize_row_q_dot;
     vec_dot_q_t        vec_dot_q;
 } quantize_fns_t;
 

Original file line number	Diff line number	Diff line change
`@@ -133,7 +133,7 @@ $(info I CC: $(CCV))`
`133`	`133`	`$(info I CXX: $(CXXV))`
`134`	`134`	`$(info )`
`135`	`135`
`136`		`-default: main quantize perplexity embedding`
	`136`	`+default: main quantize quantize-stats perplexity embedding`
`137`	`137`
`138`	`138`	`#`
`139`	`139`	`# Build library`