Skip to content

Commit c6b3d19

Browse files
committed
cuda : restore lost changes
1 parent 213a4e2 commit c6b3d19

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

ggml-cuda.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <algorithm>
22
#include <cstddef>
33
#include <cstdint>
4+
#include <cinttypes>
45
#include <float.h>
56
#include <limits>
67
#include <stdint.h>
@@ -8016,10 +8017,11 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
80168017
#ifdef GGML_CUDA_FORCE_DMMV
80178018
const bool use_mul_mat_vec_q = false;
80188019
#else
8019-
const bool use_mul_mat_vec_q = min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type);
8020+
const bool use_mul_mat_vec_q = min_compute_capability >= MIN_CC_DP4A && ggml_is_quantized(src0->type) && ggml_nrows(src1) == 1;
80208021
#endif // GGML_CUDA_FORCE_DMMV
80218022

80228023
if (use_mul_mat_vec_q) {
8024+
// NOTE: this kernel does not support ggml_nrows(src1) > 1
80238025
ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_mul_mat_vec_q, true);
80248026
} else {
80258027
ggml_cuda_op_mul_mat(src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, false);

0 commit comments

Comments
 (0)