Skip to content

Commit 46923c6

Browse files
committed
Define and optimize RDNA1
1 parent fadde67 commit 46923c6

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,10 @@ typedef float2 dfloat2;
227227
#define RDNA2
228228
#endif
229229

230+
#if defined(__gfx1010__) || defined(__gfx1012__)
231+
#define RDNA1
232+
#endif
233+
230234
#ifndef __has_builtin
231235
#define __has_builtin(x) 0
232236
#endif

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2259,9 +2259,9 @@ static __device__ void mul_mat_q_process_tile(
22592259

22602260
template <ggml_type type, int mmq_x, int nwarps, bool need_check>
22612261
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
2262-
#if defined(RDNA3) || defined(RDNA2)
2262+
#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
22632263
__launch_bounds__(WARP_SIZE*nwarps, 2)
2264-
#endif // defined(RDNA3) || defined(RDNA2)
2264+
#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
22652265
#else
22662266
#if __CUDA_ARCH__ >= CC_VOLTA
22672267
__launch_bounds__(WARP_SIZE*nwarps, 1)

0 commit comments

Comments
 (0)