Skip to content

Commit 76b27d2

Browse files
authored
ggml : fix row condition for i8mm kernels (ggml-org#10561)
ggml-ci
1 parent eea986f commit 76b27d2

File tree

2 files changed

+13
-10
lines changed

2 files changed

+13
-10
lines changed

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,11 +1813,13 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
18131813
sumv0 = vmlaq_f32(sumv0,(vcvtq_f32_s32(vmmlaq_s32((vmmlaq_s32((vmmlaq_s32((vmmlaq_s32(vdupq_n_s32(0), l0, r0)),
18141814
l1, r1)), l2, r2)), l3, r3))), scale);
18151815
}
1816-
float32x4_t sumv1 = vextq_f32(sumv0, sumv0, 2);
1816+
1817+
float32x4_t sumv1 = vextq_f32 (sumv0, sumv0, 2);
18171818
float32x4_t sumv2 = vzip1q_f32(sumv0, sumv1);
18181819

1819-
vst1_f32(s, vget_low_f32(sumv2));
1820+
vst1_f32(s, vget_low_f32 (sumv2));
18201821
vst1_f32(s + bs, vget_high_f32(sumv2));
1822+
18211823
return;
18221824
}
18231825
#endif

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7576,14 +7576,6 @@ UseGgmlGemm2:;
75767576
// This is the size of the rest of the dimensions of the result
75777577
const int64_t nr1 = ne1 * ne2 * ne3;
75787578

7579-
// dot kernels can handle 1 row and col at a time, but mmla kernels can process 2 rows and cols
7580-
int64_t num_rows_per_vec_dot = vec_dot_num_rows;
7581-
// TODO: currently the mmla kernels support only even numbered rows/cols.
7582-
// this check can be removed once they are extended to support odd numbered rows/cols too
7583-
if ((nr0 % 2 != 0) || (ne11 % 2 != 0)) {
7584-
num_rows_per_vec_dot = 1;
7585-
}
7586-
75877579
// Now select a reasonable chunk size.
75887580
int chunk_size = 16;
75897581

@@ -7646,6 +7638,15 @@ UseGgmlGemm2:;
76467638
const int64_t ir1_start = dr1 * ith1;
76477639
const int64_t ir1_end = MIN(ir1_start + dr1, nr1);
76487640

7641+
// dot kernels can handle 1 row and col at a time, but mmla kernels can process 2 rows and cols
7642+
int64_t num_rows_per_vec_dot = vec_dot_num_rows;
7643+
7644+
// TODO: currently the mmla kernels support only even numbered rows/cols.
7645+
// this check can be removed once they are extended to support odd numbered rows/cols too
7646+
if ((nr0 % 2 != 0) || (ne11 % 2 != 0) || ((ir0_end - ir0_start) % 2 != 0) || ((ir1_end - ir1_start) % 2 != 0)) {
7647+
num_rows_per_vec_dot = 1;
7648+
}
7649+
76497650
ggml_compute_forward_mul_mat_one_chunk(params, dst, type, num_rows_per_vec_dot, ir0_start, ir0_end, ir1_start, ir1_end);
76507651

76517652
if (nth >= nchunk0 * nchunk1) {

0 commit comments

Comments
 (0)