@@ -2380,25 +2380,41 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
2380
2380
if (src0->ne [3 ] == 1 && src1->ne [3 ] == 1 ) {
2381
2381
// KQ single-batch
2382
2382
// mmv p021 was specific for these dimensions
2383
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_vec_p021\n " , __func__);
2383
2384
ggml_sycl_mul_mat_vec_p021 (ctx, src0, src1, dst);
2385
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_vec_p021 done\n " , __func__);
2384
2386
} else {
2385
2387
// The kernel from the if path is faster for that specific case, but does not support all mul mats.
2388
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_batched_sycl\n " , __func__);
2386
2389
ggml_sycl_mul_mat_batched_sycl (ctx, src0, src1, dst);
2390
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_batched_sycl done\n " , __func__);
2387
2391
}
2388
2392
} else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous (src0) && !ggml_is_transposed (src1) && src1->ne [1 ] == 1 ) {
2389
2393
// KQV single-batch
2394
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_vec_nc\n " , __func__);
2390
2395
ggml_sycl_mul_mat_vec_nc (ctx, src0, src1, dst);
2396
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_vec_nc done\n " , __func__);
2391
2397
} else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed (src0) && !ggml_is_transposed (src1) && src1->ne [2 ]*src1->ne [3 ] > 1 ) {
2392
2398
// KQ + KQV multi-batch
2399
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_batched_sycl\n " , __func__);
2393
2400
ggml_sycl_mul_mat_batched_sycl (ctx, src0, src1, dst);
2401
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_batched_sycl done\n " , __func__);
2394
2402
} else if (use_dequantize_mul_mat_vec) {
2403
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_dequantize_mul_mat_vec\n " , __func__);
2395
2404
ggml_sycl_op_mul_mat (ctx, src0, src1, dst, ggml_sycl_op_dequantize_mul_mat_vec, false );
2405
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_dequantize_mul_mat_vec done\n " , __func__);
2396
2406
} else if (use_mul_mat_vec_q) {
2407
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_vec_q\n " , __func__);
2397
2408
ggml_sycl_op_mul_mat (ctx, src0, src1, dst, ggml_sycl_op_mul_mat_vec_q, true );
2409
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_vec_q done\n " , __func__);
2398
2410
} else if (use_mul_mat_q) {
2411
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_q\n " , __func__);
2399
2412
ggml_sycl_op_mul_mat (ctx, src0, src1, dst, ggml_sycl_op_mul_mat_q, true );
2413
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_q done\n " , __func__);
2400
2414
} else {
2415
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_sycl\n " , __func__);
2401
2416
ggml_sycl_op_mul_mat (ctx, src0, src1, dst, ggml_sycl_op_mul_mat_sycl, false );
2417
+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_sycl done\n " , __func__);
2402
2418
}
2403
2419
}
2404
2420
0 commit comments