Skip to content

Commit 95dced0

Browse files
i2_s to absmax
1 parent 7a8961f commit 95dced0

File tree

1 file changed

+6
-9
lines changed

1 file changed

+6
-9
lines changed

ggml-quants.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3333,12 +3333,11 @@ size_t quantize_i2_s(const float * restrict src, void * restrict dst, int64_t nr
33333333
int n = nrow * n_per_row;
33343334

33353335
// f32 -> q8
3336-
double i2_scale = 0;
3337-
for (int i=0; i<n; i++) {
3338-
if (fabs((double)(src[i])) > 1e-6) {
3339-
i2_scale = (double)src[i];
3340-
}
3336+
double max = 0;
3337+
for (int i = 0; i < n; ++i) {
3338+
max = MAX(max, (double)fabs((double)src[i]));
33413339
}
3340+
double i2_scale = max;
33423341

33433342
uint8_t* q8 = (uint8_t*)dst;
33443343
for (int i=0; i<n; i++) {
@@ -3363,11 +3362,9 @@ size_t quantize_i2_s(const float * restrict src, void * restrict dst, int64_t nr
33633362
}
33643363

33653364
float* scale_ptr = (float*)((char*)i2_weight + n / 4);
3366-
for (int i=0; i<8; i++) {
3367-
scale_ptr[i] = i2_scale;
3368-
}
3365+
scale_ptr[0] = i2_scale;
33693366

3370-
// 32B for scale
3367+
// 32B for alignment
33713368
return nrow * row_size / 4 + 32;
33723369
}
33733370

0 commit comments

Comments
 (0)