File tree Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Original file line number Diff line number Diff line change @@ -227,6 +227,10 @@ typedef float2 dfloat2;
227
227
#define RDNA2
228
228
#endif
229
229
230
+ #if defined(__gfx1010__) || defined(__gfx1012__)
231
+ #define RDNA1
232
+ #endif
233
+
230
234
#ifndef __has_builtin
231
235
#define __has_builtin (x ) 0
232
236
#endif
Original file line number Diff line number Diff line change @@ -60,12 +60,16 @@ static constexpr __device__ int get_mmq_x_max_device() {
60
60
}
61
61
62
62
static constexpr int get_mmq_y_host (const int cc) {
63
- return int8_mma_available (cc) || cc >= CC_VOLTA ? 128 : 64 ;
63
+ return cc >= CC_OFFSET_AMD ? (cc == CC_RDNA1 ? 64 : 128 ) : ( cc >= CC_VOLTA ? 128 : 64 ) ;
64
64
}
65
65
66
66
static constexpr __device__ int get_mmq_y_device () {
67
67
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
68
+ #if defined(RDNA1)
69
+ return 64 ;
70
+ #else
68
71
return 128 ;
72
+ #endif // defined RDNA1
69
73
#else
70
74
#if __CUDA_ARCH__ >= CC_VOLTA
71
75
return 128 ;
@@ -2259,9 +2263,9 @@ static __device__ void mul_mat_q_process_tile(
2259
2263
2260
2264
template <ggml_type type, int mmq_x, int nwarps, bool need_check>
2261
2265
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
2262
- #if defined(RDNA3) || defined(RDNA2)
2266
+ #if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
2263
2267
__launch_bounds__ (WARP_SIZE*nwarps, 2 )
2264
- #endif // defined(RDNA3) || defined(RDNA2)
2268
+ #endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
2265
2269
#else
2266
2270
#if __CUDA_ARCH__ >= CC_VOLTA
2267
2271
__launch_bounds__ (WARP_SIZE*nwarps, 1 )
You can’t perform that action at this time.
0 commit comments