Skip to content

Commit 7acf2b3

Browse files
daniandthewebNexesenex
authored andcommitted
Define and optimize RDNA1 (ggml-org#8085)
1 parent de503e3 commit 7acf2b3

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

ggml-cuda/common.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,10 @@ typedef float2 dfloat2;
227227
#define RDNA2
228228
#endif
229229

230+
#if defined(__gfx1010__) || defined(__gfx1012__)
231+
#define RDNA1
232+
#endif
233+
230234
#ifndef __has_builtin
231235
#define __has_builtin(x) 0
232236
#endif

ggml-cuda/mmq.cuh

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,16 @@ static constexpr __device__ int get_mmq_x_max_device() {
6161
}
6262

6363
static constexpr int get_mmq_y_host(const int cc) {
64-
return int8_mma_available(cc) || cc >= CC_VOLTA ? 128 : 64;
64+
return cc >= CC_OFFSET_AMD ? (cc == CC_RDNA1 ? 64 : 128) : (cc >= CC_VOLTA ? 128 : 64);
6565
}
6666

6767
static constexpr __device__ int get_mmq_y_device() {
6868
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
69+
#if defined(RDNA1)
70+
return 64;
71+
#else
6972
return 128;
73+
#endif // defined RDNA1
7074
#else
7175
#if __CUDA_ARCH__ >= CC_VOLTA
7276
return 128;
@@ -2400,9 +2404,9 @@ static __device__ void mul_mat_q_process_tile(
24002404

24012405
template <ggml_type type, int mmq_x, int nwarps, bool need_check>
24022406
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
2403-
#if defined(RDNA3) || defined(RDNA2)
2407+
#if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
24042408
__launch_bounds__(WARP_SIZE*nwarps, 2)
2405-
#endif // defined(RDNA3) || defined(RDNA2)
2409+
#endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
24062410
#else
24072411
#if __CUDA_ARCH__ >= CC_VOLTA
24082412
__launch_bounds__(WARP_SIZE*nwarps, 1)

0 commit comments

Comments
 (0)