@@ -1154,13 +1154,17 @@ static void quantize_row_q4_0c_reference(const float * restrict x, uint8_t * res
1154
1154
float id [2 ];
1155
1155
for (int j = 0 ; j < 2 ; j ++ ) {
1156
1156
float amax = 0.0f ; // absolute max
1157
+ float max = 0.0f ;
1157
1158
1158
1159
for (int l = 0 ; l < QK4_0 ; l ++ ) {
1159
1160
const float v = xb [j ][l ];
1160
- amax = MAX (amax , fabsf (v ));
1161
+ if (amax < fabsf (v )) {
1162
+ amax = fabsf (v );
1163
+ max = v ;
1164
+ }
1161
1165
}
1162
1166
1163
- d [j ] = amax / (( 1 << 3 ) - 1 ) ;
1167
+ d [j ] = max / -8 ;
1164
1168
id [j ] = d [j ] ? 1.0f /d [j ] : 0.0f ;
1165
1169
}
1166
1170
@@ -1169,10 +1173,10 @@ static void quantize_row_q4_0c_reference(const float * restrict x, uint8_t * res
1169
1173
1170
1174
for (int l = 0 ; l < QK4_0 ; l ++ ) {
1171
1175
const float v0 = xb [0 ][l ]* id [0 ];
1172
- const uint8_t vi0 = ( int8_t )roundf (v0 ) + 8 ;
1176
+ const uint8_t vi0 = MIN ( 15 , ( int8_t )roundf (v0 ) + 8 ) ;
1173
1177
1174
1178
const float v1 = xb [1 ][l ]* id [1 ];
1175
- const uint8_t vi1 = ( int8_t )roundf (v1 ) + 8 ;
1179
+ const uint8_t vi1 = MIN ( 15 , ( int8_t )roundf (v1 ) + 8 ) ;
1176
1180
1177
1181
assert (vi0 < 16 );
1178
1182
assert (vi1 < 16 );
@@ -3126,16 +3130,19 @@ static void ggml_vec_dot_q4_0c_q8_0c(const int n, float * restrict s, const void
3126
3130
float sumf = 0.0 ;
3127
3131
3128
3132
#if defined(__ARM_NEON )
3129
- const int ahead = 80 ;
3130
3133
float32x4_t sumv0 = vdupq_n_f32 (0.0f );
3131
3134
float32x4_t sumv1 = vdupq_n_f32 (0.0f );
3132
3135
3133
3136
for (int i = 0 ; i < nb /2 ; i ++ ) {
3137
+ // Disable prefetching on M1 for now.
3138
+ #ifndef __APPLE__
3139
+ const int ahead = 80 ;
3134
3140
__builtin_prefetch (& xqs [i * QK4_0 + 64 * ahead ]);
3135
3141
__builtin_prefetch (& yqs [2 * i * QK8_0C + 64 * ahead ]);
3136
3142
__builtin_prefetch (& yqs [2 * i * QK8_0C + 64 * ahead + 64 ]);
3137
3143
__builtin_prefetch (& xds [2 * i + 64 /4 * ahead ]);
3138
3144
__builtin_prefetch (& yds [2 * i + 64 /4 * ahead ]);
3145
+ #endif
3139
3146
3140
3147
const int dst0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
3141
3148
const int dst1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
@@ -9738,11 +9745,13 @@ static void ggml_compute_forward_alibi(
9738
9745
ggml_compute_forward_alibi_f32 (params , src0 , src1 , dst );
9739
9746
} break ;
9740
9747
case GGML_TYPE_Q4_0 :
9748
+ case GGML_TYPE_Q4_0C :
9741
9749
case GGML_TYPE_Q4_1 :
9742
9750
case GGML_TYPE_Q4_2 :
9743
9751
case GGML_TYPE_Q5_0 :
9744
9752
case GGML_TYPE_Q5_1 :
9745
9753
case GGML_TYPE_Q8_0 :
9754
+ case GGML_TYPE_Q8_0C :
9746
9755
case GGML_TYPE_Q8_1 :
9747
9756
case GGML_TYPE_I8 :
9748
9757
case GGML_TYPE_I16 :
0 commit comments