Skip to content

Commit a3d05e8

Browse files
authored
Remove an incorrect assert in MFMASmallGemmSingleWaveOpt. (#130131)
This assert was failing in a fuzzing test. I consulted with @jrbyrnes who said: The MFMASmallGemmSingleWaveOpt::apply() method is invoked if and only if the user has inserted an intrinsic llvm.amdgcn.iglp.opt(i32 1) into their source code. This intrinsic applies a highly specialized DAG mutation to result in specific scheduling for a specific set of kernels. These assertions are really just confirming that the characteristics of the kernel match what is expected (i.e. The kernels are similar to the ones this DAG mutation strategy were designed against). However, if we apply this DAG mutation to kernels for which is was not designed, then we may not find the types of instructions we are looking for, and may end up with empty caches. I think it should be fine to just return false if the cache is empty instead of the assert.
1 parent e268f71 commit a3d05e8

File tree

2 files changed

+43
-3
lines changed

2 files changed

+43
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,7 +1885,6 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
18851885
}
18861886
}
18871887

1888-
assert(Cache->size());
18891888
auto *DAG = SyncPipe[0].DAG;
18901889
for (auto &Elt : *Cache) {
18911890
if (DAG->IsReachable(Elt, const_cast<SUnit *>(SU)))
@@ -1922,8 +1921,6 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
19221921
return FitsInGroup;
19231922
}
19241923

1925-
assert(Cache->size());
1926-
19271924
// Does the VALU have a DS_WRITE successor that is the same as other
19281925
// VALU already in the group. The V_PERMs will all share 1 DS_W succ
19291926
return llvm::any_of(*Cache, [&SU](SUnit *Elt) {
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -O1 < %s | FileCheck -check-prefix=GCN %s
3+
4+
define amdgpu_kernel void @test_iglp_opt_rev_mfma_gemm(<1 x i64> %L1) {
5+
; GCN-LABEL: test_iglp_opt_rev_mfma_gemm:
6+
; GCN: ; %bb.0: ; %entry
7+
; GCN-NEXT: v_mov_b32_e32 v32, 0
8+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
9+
; GCN-NEXT: ds_read_b128 v[28:31], v32 offset:112
10+
; GCN-NEXT: ds_read_b128 v[24:27], v32 offset:96
11+
; GCN-NEXT: ds_read_b128 v[20:23], v32 offset:80
12+
; GCN-NEXT: ds_read_b128 v[16:19], v32 offset:64
13+
; GCN-NEXT: ds_read_b128 v[0:3], v32
14+
; GCN-NEXT: ds_read_b128 v[4:7], v32 offset:16
15+
; GCN-NEXT: ds_read_b128 v[8:11], v32 offset:32
16+
; GCN-NEXT: ds_read_b128 v[12:15], v32 offset:48
17+
; GCN-NEXT: v_mov_b32_e32 v34, 0
18+
; GCN-NEXT: v_mov_b32_e32 v35, v34
19+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
20+
; GCN-NEXT: s_cmp_lg_u64 s[0:1], 0
21+
; GCN-NEXT: ; iglp_opt mask(0x00000001)
22+
; GCN-NEXT: ds_write_b128 v32, v[28:31] offset:112
23+
; GCN-NEXT: ds_write_b128 v32, v[24:27] offset:96
24+
; GCN-NEXT: ds_write_b128 v32, v[20:23] offset:80
25+
; GCN-NEXT: ds_write_b128 v32, v[16:19] offset:64
26+
; GCN-NEXT: ds_write_b128 v32, v[12:15] offset:48
27+
; GCN-NEXT: ds_write_b128 v32, v[8:11] offset:32
28+
; GCN-NEXT: ds_write_b128 v32, v[4:7] offset:16
29+
; GCN-NEXT: ds_write_b128 v32, v[0:3]
30+
; GCN-NEXT: ds_write_b64 v32, v[34:35]
31+
; GCN-NEXT: s_endpgm
32+
entry:
33+
call void @llvm.amdgcn.iglp.opt(i32 1)
34+
%load.4 = load <32 x float>, ptr addrspace(3) null, align 128
35+
%B = urem <1 x i64> zeroinitializer, %L1
36+
store <32 x float> %load.4, ptr addrspace(3) null, align 128
37+
store <1 x i64> %B, ptr addrspace(3) null, align 8
38+
ret void
39+
}
40+
41+
declare void @llvm.amdgcn.iglp.opt(i32 immarg) #0
42+
43+
attributes #0 = { convergent nocallback nofree nounwind willreturn }

0 commit comments

Comments
 (0)