Skip to content

Commit 281ef73

Browse files
authored
k-quants : fix quantization ranges (#3646)
1 parent 940efa9 commit 281ef73

File tree

1 file changed

+11
-19
lines changed

1 file changed

+11
-19
lines changed

k_quants.c

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -462,12 +462,9 @@ void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
462462
}
463463

464464
size_t ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
465-
const int nb = k / QK_K;
466-
467-
// TODO - collect histograms - although, at a second thought, I don't really care about them
468-
(void)hist;
465+
(void)hist; // TODO: collect histograms
469466

470-
for (int j = 0; j < nb; j += k) {
467+
for (int j = 0; j < n; j += k) {
471468
block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
472469
quantize_row_q2_K_reference(src + j, y, k);
473470
}
@@ -678,12 +675,9 @@ void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
678675
}
679676

680677
size_t ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
681-
const int nb = k / QK_K;
682-
683-
// TODO - collect histograms - although, at a second thought, I don't really care about them
684-
(void)hist;
678+
(void)hist; // TODO: collect histograms
685679

686-
for (int j = 0; j < nb; j += k) {
680+
for (int j = 0; j < n; j += k) {
687681
block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
688682
quantize_row_q3_K_reference(src + j, y, k);
689683
}
@@ -846,9 +840,9 @@ void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
846840

847841
size_t ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
848842
assert(k % QK_K == 0);
849-
const int nb = k / QK_K;
850843
(void)hist; // TODO: collect histograms
851-
for (int j = 0; j < nb; j += k) {
844+
845+
for (int j = 0; j < n; j += k) {
852846
block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
853847
quantize_row_q4_K_reference(src + j, y, k);
854848
}
@@ -1052,9 +1046,9 @@ void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
10521046

10531047
size_t ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
10541048
assert(k % QK_K == 0);
1055-
const int nb = k / QK_K;
1056-
(void)hist;
1057-
for (int j = 0; j < nb; j += k) {
1049+
(void)hist; // TODO: collect histograms
1050+
1051+
for (int j = 0; j < n; j += k) {
10581052
block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
10591053
quantize_row_q5_K_reference(src + j, y, k);
10601054
}
@@ -1200,11 +1194,9 @@ void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
12001194

12011195
size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
12021196
assert(k % QK_K == 0);
1203-
const int nb = k / QK_K;
1204-
1205-
(void)hist; // TODO
1197+
(void)hist; // TODO: collect histograms
12061198

1207-
for (int j = 0; j < nb; j += k) {
1199+
for (int j = 0; j < n; j += k) {
12081200
block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
12091201
quantize_row_q6_K_reference(src + j, y, k);
12101202
}

0 commit comments

Comments
 (0)