Skip to content

Commit 877d91b

Browse files
committed
mmvq in cuda path
1 parent 8b96a69 commit 877d91b

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

ggml/src/ggml-sycl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3581,7 +3581,8 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
35813581

35823582
bool use_mul_mat_vec_q = ggml_is_quantized(src0->type)
35833583
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32
3584-
&& src1->ne[1] <= MMVQ_MAX_BATCH_SIZE && src1->ne[1] > MMVQ_MIN_BATCH_SIZE;
3584+
&& src1->ne[1] <= MMVQ_MAX_BATCH_SIZE
3585+
&& ctx.stream()->get_backend() == sycl::backend::ext_oneapi_cuda ? true: src1->ne[1] > MMVQ_MIN_BATCH_SIZE;
35853586

35863587
bool use_mul_mat_q = ggml_sycl_supports_mmq(src0->type)
35873588
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32;

0 commit comments

Comments
 (0)