File tree Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Original file line number Diff line number Diff line change @@ -227,6 +227,10 @@ typedef float2 dfloat2;
227
227
#define RDNA2
228
228
#endif
229
229
230
+ #if defined(__gfx1010__) || defined(__gfx1012__)
231
+ #define RDNA1
232
+ #endif
233
+
230
234
#ifndef __has_builtin
231
235
#define __has_builtin (x ) 0
232
236
#endif
Original file line number Diff line number Diff line change @@ -61,12 +61,16 @@ static constexpr __device__ int get_mmq_x_max_device() {
61
61
}
62
62
63
63
static constexpr int get_mmq_y_host (const int cc) {
64
- return int8_mma_available (cc) || cc >= CC_VOLTA ? 128 : 64 ;
64
+ return cc >= CC_OFFSET_AMD ? (cc == CC_RDNA1 ? 64 : 128 ) : ( cc >= CC_VOLTA ? 128 : 64 ) ;
65
65
}
66
66
67
67
static constexpr __device__ int get_mmq_y_device () {
68
68
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
69
+ #if defined(RDNA1)
70
+ return 64 ;
71
+ #else
69
72
return 128 ;
73
+ #endif // defined RDNA1
70
74
#else
71
75
#if __CUDA_ARCH__ >= CC_VOLTA
72
76
return 128 ;
@@ -2400,9 +2404,9 @@ static __device__ void mul_mat_q_process_tile(
2400
2404
2401
2405
template <ggml_type type, int mmq_x, int nwarps, bool need_check>
2402
2406
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
2403
- #if defined(RDNA3) || defined(RDNA2)
2407
+ #if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
2404
2408
__launch_bounds__ (WARP_SIZE*nwarps, 2 )
2405
- #endif // defined(RDNA3) || defined(RDNA2)
2409
+ #endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
2406
2410
#else
2407
2411
#if __CUDA_ARCH__ >= CC_VOLTA
2408
2412
__launch_bounds__ (WARP_SIZE*nwarps, 1 )
You can’t perform that action at this time.
0 commit comments