Skip to content

Commit aab651c

Browse files
mmereckifda0
authored andcommitted
Fix crash in AddRequiredMemoryFences pass
In kernels with `unreachable` instructions, it may be impossible to find a post common dominator for all "unfenced" SLM stores. In such case, add the fence immediately before the return. (cherry picked from commit ac0e352)
1 parent 7c71ec5 commit aab651c

File tree

2 files changed

+65
-10
lines changed

2 files changed

+65
-10
lines changed

IGC/Compiler/Legalizer/AddRequiredMemoryFences.cpp

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,10 @@ bool AddRequiredMemoryFences::runOnFunction(Function& F)
163163
SmallPtrSet<BasicBlock*, 16> seen{ rootBB };
164164
SmallVector<BasicBlock*, 16> worklist{ rootBB };
165165
SmallVector<BasicBlock*, 8> unfenced;
166-
bool hasUnfencedSlmStore = false;
167-
bool hasSlmFence = false;
168166
while (!worklist.empty())
169167
{
168+
bool hasUnfencedSlmStore = false;
169+
bool hasSlmFence = false;
170170
BasicBlock* BB = worklist.back();
171171
worklist.pop_back();
172172
seen.insert(BB);
@@ -205,23 +205,32 @@ bool AddRequiredMemoryFences::runOnFunction(Function& F)
205205
{
206206
auto it = blocks.begin();
207207
BasicBlock* postDomBB = *it++;
208-
for (; it != blocks.end(); ++it)
208+
for (; it != blocks.end() && postDomBB != nullptr; ++it)
209209
{
210210
postDomBB = PDT->findNearestCommonDominator(postDomBB, *it);
211211
}
212212
return postDomBB;
213213
};
214214
BasicBlock* postDomBB = FindPostDominator(unfenced);
215-
Loop* L = LI->getLoopFor(postDomBB);
216-
if (L)
215+
if (postDomBB != nullptr)
217216
{
218-
while (!L->isOutermost())
217+
Loop* L = LI->getLoopFor(postDomBB);
218+
if (L)
219219
{
220-
L = L->getParentLoop();
220+
while (!L->isOutermost())
221+
{
222+
L = L->getParentLoop();
223+
}
224+
SmallVector<BasicBlock*, 4> exitBlocks;
225+
L->getUniqueExitBlocks(exitBlocks);
226+
postDomBB = FindPostDominator(exitBlocks);
221227
}
222-
SmallVector<BasicBlock*, 4> exitBlocks;
223-
L->getUniqueExitBlocks(exitBlocks);
224-
postDomBB = FindPostDominator(exitBlocks);
228+
}
229+
if (postDomBB == nullptr)
230+
{
231+
// Common post-dominator may not exist if kernel has the unreachable
232+
// instruction.
233+
postDomBB = rootBB;
225234
}
226235
IGC_ASSERT(postDomBB);
227236
IGCIRBuilder<> IRB(postDomBB->getTerminator());
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
10+
; REQUIRES: llvm-14-plus
11+
; RUN: igc_opt --opaque-pointers %s -S --platformdg2 --inputcs --igc-add-required-memory-fences | FileCheck %s
12+
13+
14+
; Test that pass added an SLM fence in the last basic block
15+
define void @f3(ptr addrspace(3) %address, i1 %cond, i1 %cond1) {
16+
Label-0:
17+
store i32 0, ptr addrspace(3) %address
18+
br i1 %cond, label %Label-True, label %Label-Merge
19+
Label-True:
20+
store i32 0, ptr addrspace(3) %address
21+
br i1 %cond1, label %Label-Unreachable, label %Label-Merge
22+
Label-Unreachable:
23+
unreachable
24+
Label-Merge:
25+
br label %Label-End
26+
Label-End:
27+
ret void
28+
}
29+
; CHECK-LABEL: define void @f3
30+
; CHECK-NOT: call void @llvm.genx.GenISA.LSCFence({{.*}})
31+
; CHECK-LABEL: Label-End:
32+
; CHECK-NEXT: call void @llvm.genx.GenISA.LSCFence(i32 3, i32 0, i32 0)
33+
; CHECK-NOT: call void @llvm.genx.GenISA.LSCFence({{.*}})
34+
; CHECK: ret void
35+
36+
37+
38+
; Function Attrs: convergent nounwind
39+
declare void @llvm.genx.GenISA.LSCFence(i32, i32, i32) #2
40+
; Function Attrs: argmemonly nounwind
41+
declare i32 @llvm.genx.GenISA.intatomicraw.i32.p3(ptr addrspace(3), i32, i32, i32) #3
42+
43+
attributes #1 = { "null-pointer-is-valid"="true" }
44+
attributes #2 = { convergent nounwind }
45+
attributes #3 = { argmemonly nounwind }
46+
attributes #4 = { nounwind }

0 commit comments

Comments
 (0)