Remove an incorrect assert in MFMASmallGemmSingleWaveOpt. (#130131)

anjenner · web-flow · commit a3d05e898736 · 2025-04-24T09:22:24.000+01:00
This assert was failing in a fuzzing test. I consulted with @jrbyrnes who said: The MFMASmallGemmSingleWaveOpt::apply() method is invoked if and only if the user has inserted an intrinsic llvm.amdgcn.iglp.opt(i32 1) into their source code. This intrinsic applies a highly specialized DAG mutation to result in specific scheduling for a specific set of kernels. These assertions are really just confirming that the characteristics of the kernel match what is expected (i.e. The kernels are similar to the ones this DAG mutation strategy were designed against). However, if we apply this DAG mutation to kernels for which is was not designed, then we may not find the types of instructions we are looking for, and may end up with empty caches. I think it should be fine to just return false if the cache is empty instead of the assert.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -1885,7 +1885,6 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
         }
       }
 
-      assert(Cache->size());
       auto *DAG = SyncPipe[0].DAG;
       for (auto &Elt : *Cache) {
         if (DAG->IsReachable(Elt, const_cast<SUnit *>(SU)))
@@ -1922,8 +1921,6 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
         return FitsInGroup;
       }
 
-      assert(Cache->size());
-
       // Does the VALU have a DS_WRITE successor that is the same as other
       // VALU already in the group. The V_PERMs will all share 1 DS_W succ
       return llvm::any_of(*Cache, [&SU](SUnit *Elt) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.AFLCustomIRMutator.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.AFLCustomIRMutator.opt.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -O1 < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(<1 x i64> %L1) {
+; GCN-LABEL: test_iglp_opt_rev_mfma_gemm:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    v_mov_b32_e32 v32, 0
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; GCN-NEXT:    ds_read_b128 v[28:31], v32 offset:112
+; GCN-NEXT:    ds_read_b128 v[24:27], v32 offset:96
+; GCN-NEXT:    ds_read_b128 v[20:23], v32 offset:80
+; GCN-NEXT:    ds_read_b128 v[16:19], v32 offset:64
+; GCN-NEXT:    ds_read_b128 v[0:3], v32
+; GCN-NEXT:    ds_read_b128 v[4:7], v32 offset:16
+; GCN-NEXT:    ds_read_b128 v[8:11], v32 offset:32
+; GCN-NEXT:    ds_read_b128 v[12:15], v32 offset:48
+; GCN-NEXT:    v_mov_b32_e32 v34, 0
+; GCN-NEXT:    v_mov_b32_e32 v35, v34
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
+; GCN-NEXT:    ; iglp_opt mask(0x00000001)
+; GCN-NEXT:    ds_write_b128 v32, v[28:31] offset:112
+; GCN-NEXT:    ds_write_b128 v32, v[24:27] offset:96
+; GCN-NEXT:    ds_write_b128 v32, v[20:23] offset:80
+; GCN-NEXT:    ds_write_b128 v32, v[16:19] offset:64
+; GCN-NEXT:    ds_write_b128 v32, v[12:15] offset:48
+; GCN-NEXT:    ds_write_b128 v32, v[8:11] offset:32
+; GCN-NEXT:    ds_write_b128 v32, v[4:7] offset:16
+; GCN-NEXT:    ds_write_b128 v32, v[0:3]
+; GCN-NEXT:    ds_write_b64 v32, v[34:35]
+; GCN-NEXT:    s_endpgm
+entry:
+  call void @llvm.amdgcn.iglp.opt(i32 1)
+  %load.4 = load <32 x float>, ptr addrspace(3) null, align 128
+  %B = urem <1 x i64> zeroinitializer, %L1
+  store <32 x float> %load.4, ptr addrspace(3) null, align 128
+  store <1 x i64> %B, ptr addrspace(3) null, align 8
+  ret void
+}
+
+declare void @llvm.amdgcn.iglp.opt(i32 immarg) #0
+
+attributes #0 = { convergent nocallback nofree nounwind willreturn }

Original file line number	Diff line number	Diff line change
`@@ -1885,7 +1885,6 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {`
`1885`	`1885`	`}`
`1886`	`1886`	`}`
`1887`	`1887`
`1888`		`- assert(Cache->size());`
`1889`	`1888`	`auto *DAG = SyncPipe[0].DAG;`
`1890`	`1889`	`for (auto &Elt : *Cache) {`
`1891`	`1890`	`if (DAG->IsReachable(Elt, const_cast<SUnit *>(SU)))`
`@@ -1922,8 +1921,6 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {`
`1922`	`1921`	`return FitsInGroup;`
`1923`	`1922`	`}`
`1924`	`1923`
`1925`		`- assert(Cache->size());`
`1926`		`-`
`1927`	`1924`	`// Does the VALU have a DS_WRITE successor that is the same as other`
`1928`	`1925`	`// VALU already in the group. The V_PERMs will all share 1 DS_W succ`
`1929`	`1926`	`return llvm::any_of(Cache, [&SU](SUnit Elt) {`