Skip to content

Commit d53f767

Browse files
committed
q4_0c: disable prefetching on M1
1 parent 2949725 commit d53f767

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

ggml.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,13 +1154,17 @@ static void quantize_row_q4_0c_reference(const float * restrict x, uint8_t * res
11541154
float id[2];
11551155
for (int j = 0; j < 2; j++) {
11561156
float amax = 0.0f; // absolute max
1157+
float max = 0.0f;
11571158

11581159
for (int l = 0; l < QK4_0; l++) {
11591160
const float v = xb[j][l];
1160-
amax = MAX(amax, fabsf(v));
1161+
if (amax < fabsf(v)) {
1162+
amax = fabsf(v);
1163+
max = v;
1164+
}
11611165
}
11621166

1163-
d[j] = amax / ((1 << 3) - 1);
1167+
d[j] = max / -8;
11641168
id[j] = d[j] ? 1.0f/d[j] : 0.0f;
11651169
}
11661170

@@ -1169,10 +1173,10 @@ static void quantize_row_q4_0c_reference(const float * restrict x, uint8_t * res
11691173

11701174
for (int l = 0; l < QK4_0; l++) {
11711175
const float v0 = xb[0][l]*id[0];
1172-
const uint8_t vi0 = (int8_t)roundf(v0) + 8;
1176+
const uint8_t vi0 = MIN(15, (int8_t)roundf(v0) + 8);
11731177

11741178
const float v1 = xb[1][l]*id[1];
1175-
const uint8_t vi1 = (int8_t)roundf(v1) + 8;
1179+
const uint8_t vi1 = MIN(15, (int8_t)roundf(v1) + 8);
11761180

11771181
assert(vi0 < 16);
11781182
assert(vi1 < 16);
@@ -3126,16 +3130,19 @@ static void ggml_vec_dot_q4_0c_q8_0c(const int n, float * restrict s, const void
31263130
float sumf = 0.0;
31273131

31283132
#if defined(__ARM_NEON)
3129-
const int ahead=80;
31303133
float32x4_t sumv0 = vdupq_n_f32(0.0f);
31313134
float32x4_t sumv1 = vdupq_n_f32(0.0f);
31323135

31333136
for (int i = 0; i < nb/2; i++) {
3137+
// Disable prefetching on M1 for now.
3138+
#ifndef __APPLE__
3139+
const int ahead=80;
31343140
__builtin_prefetch(&xqs[i*QK4_0 + 64*ahead]);
31353141
__builtin_prefetch(&yqs[2*i*QK8_0C + 64*ahead]);
31363142
__builtin_prefetch(&yqs[2*i*QK8_0C + 64*ahead + 64]);
31373143
__builtin_prefetch(&xds[2*i + 64/4*ahead]);
31383144
__builtin_prefetch(&yds[2*i + 64/4*ahead]);
3145+
#endif
31393146

31403147
const int dst0 = i + i/2*2; // 0, 1, 4, 5, 8, 9, ...
31413148
const int dst1 = i + i/2*2 + 2; // 2, 3, 6, 7, 10, 11 ...
@@ -9738,11 +9745,13 @@ static void ggml_compute_forward_alibi(
97389745
ggml_compute_forward_alibi_f32(params, src0, src1, dst);
97399746
} break;
97409747
case GGML_TYPE_Q4_0:
9748+
case GGML_TYPE_Q4_0C:
97419749
case GGML_TYPE_Q4_1:
97429750
case GGML_TYPE_Q4_2:
97439751
case GGML_TYPE_Q5_0:
97449752
case GGML_TYPE_Q5_1:
97459753
case GGML_TYPE_Q8_0:
9754+
case GGML_TYPE_Q8_0C:
97469755
case GGML_TYPE_Q8_1:
97479756
case GGML_TYPE_I8:
97489757
case GGML_TYPE_I16:

0 commit comments

Comments
 (0)