File tree Expand file tree Collapse file tree 3 files changed +7
-2
lines changed Expand file tree Collapse file tree 3 files changed +7
-2
lines changed Original file line number Diff line number Diff line change @@ -172,6 +172,7 @@ option(GGML_HIP "ggml: use HIP"
172
172
option (GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF )
173
173
option (GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON )
174
174
option (GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF )
175
+ option (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF )
175
176
option (GGML_VULKAN "ggml: use Vulkan" OFF )
176
177
option (GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF )
177
178
option (GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF )
Original file line number Diff line number Diff line change @@ -207,9 +207,9 @@ typedef float2 dfloat2;
207
207
#define FP16_MMA_AVAILABLE
208
208
#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
209
209
210
- #if defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4))
210
+ #if defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || ( defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined( RDNA4) ))
211
211
#define FP16_MMA_AVAILABLE
212
- #endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4))
212
+ #endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || ( defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined( RDNA4) ))
213
213
214
214
#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING
215
215
#define NEW_MMA_AVAILABLE
Original file line number Diff line number Diff line change @@ -113,6 +113,10 @@ if (GGML_HIP_ROCWMMA_FATTN)
113
113
add_compile_definitions (GGML_HIP_ROCWMMA_FATTN )
114
114
endif ()
115
115
116
+ if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0 )
117
+ add_compile_definitions (GGML_HIP_ROCWMMA_FATTN_GFX12 )
118
+ endif ()
119
+
116
120
if (NOT GGML_CUDA_FA )
117
121
add_compile_definitions (GGML_CUDA_NO_FA )
118
122
endif ()
You can’t perform that action at this time.
0 commit comments