File tree Expand file tree Collapse file tree 1 file changed +11
-3
lines changed Expand file tree Collapse file tree 1 file changed +11
-3
lines changed Original file line number Diff line number Diff line change @@ -6941,15 +6941,23 @@ static void ggml_compute_forward_mul_mat_q_f32(
6941
6941
#endif
6942
6942
6943
6943
//void *p = (void *) src0->data;
6944
- assert ((ir1 - ir0 ) % EXPERIMENT_TILESIZE_X == 0 );
6945
6944
6946
6945
int x_stride = EXPERIMENT_TILESIZE_X ;
6946
+
6947
+ // if the second matrix is two small, we cannot use the tiled code
6947
6948
if (ne11 < EXPERIMENT_TILESIZE_Y ) {
6948
6949
x_stride = 1 ;
6949
6950
}
6950
6951
6951
-
6952
6952
for (int ir = ir0 ; ir < ir1 ; ir += x_stride ) {
6953
+ // check if we can advance with x_stride = EXPERIMENT_TILESIZE_X
6954
+ //printf("ir0=%i -> ir1 - ir=%i\n", ir0, ir1-ir);
6955
+ if ((ir1 - ir ) < EXPERIMENT_TILESIZE_X ) {
6956
+ // we do not have enough rows left - we need to go step by step
6957
+ //printf("ir0=%i - switching to stride 1\n", ir0, ir1-ir);
6958
+ x_stride = 1 ;
6959
+ }
6960
+
6953
6961
// src0 indices
6954
6962
const int i03 = ir /(ne02 * ne01 );
6955
6963
const int i02 = (ir - i03 * ne02 * ne01 )/ne01 ;
@@ -6988,7 +6996,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
6988
6996
6989
6997
assert (ne00 % 32 == 0 );
6990
6998
6991
- if (ne11 < EXPERIMENT_TILESIZE_Y ) {
6999
+ if (( x_stride != EXPERIMENT_TILESIZE_X ) || ( ne11 < EXPERIMENT_TILESIZE_Y ) ) {
6992
7000
//printf("using legacy tile size implementation\n");
6993
7001
// existing implementation tiled implementation
6994
7002
for (int64_t ic = 0 ; ic < ne11 ; ++ ic ) {
You can’t perform that action at this time.
0 commit comments