Skip to content

Commit df54d2f

Browse files
committed
ggml : use less ggml_mul tasks when src0 rows are few
1 parent 253eab8 commit df54d2f

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

ggml.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9329,11 +9329,12 @@ static void ggml_compute_forward_mul_f32(
93299329
struct ggml_tensor * dst) {
93309330
GGML_ASSERT(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst));
93319331

9332+
const int ith = params->ith;
9333+
const int nth = params->nth;
9334+
93329335
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
93339336
return;
93349337
}
9335-
const int ith = params->ith;
9336-
const int nth = params->nth;
93379338

93389339
#ifdef GGML_USE_CLBLAST
93399340
if (src1->backend == GGML_BACKEND_GPU) {
@@ -17229,7 +17230,13 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
1722917230
}
1723017231
} break;
1723117232
case GGML_OP_SILU_BACK:
17233+
{
17234+
n_tasks = n_threads;
17235+
} break;
1723217236
case GGML_OP_MUL:
17237+
{
17238+
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
17239+
} break;
1723317240
case GGML_OP_NORM:
1723417241
case GGML_OP_RMS_NORM:
1723517242
case GGML_OP_RMS_NORM_BACK:

0 commit comments

Comments
 (0)