Skip to content

Commit 2bb97fc

Browse files
fix AMD
1 parent 97f8a7a commit 2bb97fc

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

ggml-cuda.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5319,13 +5319,13 @@ static __global__ void mul_mat_vec_q(
53195319
const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst,
53205320
const int ncols_x, const int nrows_x, const int nrows_y, const int nrows_dst) {
53215321

5322-
#if __CUDA_ARCH__ < CC_RDNA2
5323-
constexpr int nwarps = ncols_y <= 4 ? 4 : 2;
5324-
constexpr int rows_per_cuda_block = ncols_y == 1 ? 1 : 2;
5325-
#else
5322+
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && (defined(RDNA2) || defined(RDNA3))
53265323
constexpr int nwarps = 1;
53275324
constexpr int rows_per_cuda_block = 1;
5328-
#endif // __CUDA_ARCH__ < CC_RDNA2
5325+
#else
5326+
constexpr int nwarps = ncols_y <= 4 ? 4 : 2;
5327+
constexpr int rows_per_cuda_block = ncols_y == 1 ? 1 : 2;
5328+
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && !defined(RDNA2) && !defined(RDNA3)
53295329

53305330
constexpr int blocks_per_iter = vdr * nwarps*WARP_SIZE / qi;
53315331

0 commit comments

Comments
 (0)