Skip to content

Commit 42ad59f

Browse files
committed
Bugfix: We can handle the situation where matrix rows / thread count is not a multiple of TILESIZE_X
1 parent a33cbbe commit 42ad59f

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

ggml.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6941,15 +6941,23 @@ static void ggml_compute_forward_mul_mat_q_f32(
69416941
#endif
69426942

69436943
//void *p = (void *) src0->data;
6944-
assert((ir1-ir0) % EXPERIMENT_TILESIZE_X == 0);
69456944

69466945
int x_stride = EXPERIMENT_TILESIZE_X;
6946+
6947+
// if the second matrix is two small, we cannot use the tiled code
69476948
if (ne11 < EXPERIMENT_TILESIZE_Y) {
69486949
x_stride = 1;
69496950
}
69506951

6951-
69526952
for (int ir = ir0; ir < ir1; ir+=x_stride) {
6953+
// check if we can advance with x_stride = EXPERIMENT_TILESIZE_X
6954+
//printf("ir0=%i -> ir1 - ir=%i\n", ir0, ir1-ir);
6955+
if ((ir1-ir) < EXPERIMENT_TILESIZE_X) {
6956+
// we do not have enough rows left - we need to go step by step
6957+
//printf("ir0=%i - switching to stride 1\n", ir0, ir1-ir);
6958+
x_stride = 1;
6959+
}
6960+
69536961
// src0 indices
69546962
const int i03 = ir/(ne02*ne01);
69556963
const int i02 = (ir - i03*ne02*ne01)/ne01;
@@ -6988,7 +6996,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
69886996

69896997
assert(ne00 % 32 == 0);
69906998

6991-
if (ne11 < EXPERIMENT_TILESIZE_Y) {
6999+
if ((x_stride != EXPERIMENT_TILESIZE_X) || (ne11 < EXPERIMENT_TILESIZE_Y)) {
69927000
//printf("using legacy tile size implementation\n");
69937001
// existing implementation tiled implementation
69947002
for (int64_t ic = 0; ic < ne11; ++ic) {

0 commit comments

Comments
 (0)