Skip to content

Commit 1d1754f

Browse files
committed
Define and optimize RDNA1
1 parent e112b61 commit 1d1754f

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

ggml-cuda/common.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,10 @@ typedef float2 dfloat2;
243243
#define RDNA2
244244
#endif
245245

246+
#if defined(__gfx1010__) || defined(__gfx1012__)
247+
#define RDNA1
248+
#endif
249+
246250
#ifndef __has_builtin
247251
#define __has_builtin(x) 0
248252
#endif

ggml-cuda/mmq.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,9 +1972,9 @@ static __device__ void mul_mat_q_process_tile(
19721972

19731973
template <ggml_type type, int mmq_x, int nwarps, bool need_check>
19741974
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
1975-
#if defined(RDNA3) || defined(RDNA2)
1975+
#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
19761976
__launch_bounds__(WARP_SIZE*nwarps, 2)
1977-
#endif // defined(RDNA3) || defined(RDNA2)
1977+
#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
19781978
#else
19791979
#if __CUDA_ARCH__ >= CC_VOLTA
19801980
__launch_bounds__(WARP_SIZE*nwarps, 1)

0 commit comments

Comments
 (0)