Skip to content

Commit b5e7285

Browse files
CUDA: fix matrix multiplication logic for tests (#6667)
1 parent 4bd0f93 commit b5e7285

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1946,7 +1946,7 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
19461946
} else if (!split && !fp16_performance_good && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
19471947
// KQV single-batch
19481948
ggml_cuda_mul_mat_vec_nc(ctx, src0, src1, dst);
1949-
} else if (!split && fp16_performance_good && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
1949+
} else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || fp16_performance_good) && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
19501950
// KQ + KQV multi-batch
19511951
ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst);
19521952
} else if (use_dequantize_mul_mat_vec) {

0 commit comments

Comments
 (0)