We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ea4402b commit 90c12e6Copy full SHA for 90c12e6
ggml.c
@@ -9508,8 +9508,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(
9508
const int64_t ne0 = dst->ne[0];
9509
const int64_t ne1 = dst->ne[1];
9510
9511
+ // NOTE: with GGML_OP_MUL_MAT_ID we don't want to go through the BLAS branch because it will dequantize (to_float)
9512
+ // all the experts for each batch element and the processing would become incredibly slow
9513
// TODO: find the optimal values for these
- if (ggml_is_contiguous(src0) &&
9514
+ if (dst->op != GGML_OP_MUL_MAT_ID &&
9515
+ ggml_is_contiguous(src0) &&
9516
ggml_is_contiguous(src1) &&
9517
//src0->type == GGML_TYPE_F32 &&
9518
src1->type == GGML_TYPE_F32 &&
0 commit comments