Skip to content

Commit 19e7a65

Browse files
committed
quantize-stats : fix test + add it to Makefile default
1 parent 3b894ec commit 19e7a65

File tree

4 files changed

+11
-5
lines changed

4 files changed

+11
-5
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ $(info I CC: $(CCV))
133133
$(info I CXX: $(CXXV))
134134
$(info )
135135

136-
default: main quantize perplexity embedding
136+
default: main quantize quantize-stats perplexity embedding
137137

138138
#
139139
# Build library

examples/quantize-stats/quantize-stats.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#include <unordered_map>
1717
#include <vector>
1818

19+
static const char * type_strs[] = { "f32", "f16", "q4_0", "q4_1", "q8_0", "i8", "i16", "i32", };
20+
static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
21+
1922
struct quantize_stats_params {
2023
std::string model = "models/7B/ggml-model-f16.bin";
2124
bool verbose = false;

ggml.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7151,14 +7151,16 @@ static void ggml_compute_forward_mul_mat_f16_f32(
71517151
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
71527152
[GGML_TYPE_Q4_0] = {
71537153
.dequantize_row_q = dequantize_row_q4_0,
7154-
.quantize_row_q = quantize_row_q8_0,
7154+
.quantize_row_q = quantize_row_q4_0,
71557155
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
7156+
.quantize_row_q_dot = quantize_row_q8_0,
71567157
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
71577158
},
71587159
[GGML_TYPE_Q4_1] = {
71597160
.dequantize_row_q = dequantize_row_q4_1,
71607161
.quantize_row_q = quantize_row_q4_1,
71617162
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
7163+
.quantize_row_q_dot = quantize_row_q8_0,
71627164
.vec_dot_q = ggml_vec_dot_q4_1,
71637165
},
71647166
// TODO: GGML_TYPE_Q8_0
@@ -7217,8 +7219,8 @@ static void ggml_compute_forward_mul_mat_q_f32(
72177219
GGML_ASSERT(ne3 == ne13);
72187220

72197221
const enum ggml_type type = src0->type;
7220-
quantize_row_q_t const quantize_row_q = quantize_fns[type].quantize_row_q;
7221-
vec_dot_q_t const vec_dot_q = quantize_fns[type].vec_dot_q;
7222+
quantize_row_q_t const quantize_row_q_dot = quantize_fns[type].quantize_row_q_dot;
7223+
vec_dot_q_t const vec_dot_q = quantize_fns[type].vec_dot_q;
72227224

72237225
// we don't support permuted src0 or src1
72247226
GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
@@ -7292,7 +7294,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
72927294
for (int64_t i13 = 0; i13 < ne13; ++i13) {
72937295
for (int64_t i12 = 0; i12 < ne12; ++i12) {
72947296
for (int64_t i11 = 0; i11 < ne11; ++i11) {
7295-
quantize_row_q((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10);
7297+
quantize_row_q_dot((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10);
72967298
wdata += row_size;
72977299
}
72987300
}

ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ typedef struct {
837837
dequantize_row_q_t dequantize_row_q;
838838
quantize_row_q_t quantize_row_q;
839839
quantize_row_q_t quantize_row_q_reference;
840+
quantize_row_q_t quantize_row_q_dot;
840841
vec_dot_q_t vec_dot_q;
841842
} quantize_fns_t;
842843

0 commit comments

Comments
 (0)