Skip to content

Commit 2a5fbdf

Browse files
committed
[AMDGPU] Don't unify divergent exit nodes with musttail calls
Fixes SWDEV-512254.
1 parent 10ed0e4 commit 2a5fbdf

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,10 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
215215
PDT.roots(), [&](auto BB) { return !isUniformlyReached(UA, *BB); });
216216

217217
for (BasicBlock *BB : PDT.roots()) {
218-
if (isa<ReturnInst>(BB->getTerminator())) {
218+
if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
219+
auto *CI = dyn_cast_or_null<CallInst>(RI->getPrevNode());
220+
if (CI && CI->isMustTailCall())
221+
continue;
219222
if (HasDivergentExitBlock)
220223
ReturningBlocks.push_back(BB);
221224
} else if (isa<UnreachableInst>(BB->getTerminator())) {
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=amdgpu-unify-divergent-exit-nodes -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
3+
4+
define void @spill_sgpr_with_tail_call() {
5+
; CHECK-LABEL: define void @spill_sgpr_with_tail_call(
6+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[L1:%.*]] = load i1, ptr null, align 1
8+
; CHECK-NEXT: br i1 [[L1]], label %[[SW_C:.*]], label %[[SW_D:.*]]
9+
; CHECK: [[SW_D]]:
10+
; CHECK-NEXT: musttail call void null()
11+
; CHECK-NEXT: ret void
12+
; CHECK: [[SW_C]]:
13+
; CHECK-NEXT: ret void
14+
;
15+
%L1 = load i1, ptr null, align 1
16+
br i1 %L1, label %SW_C, label %SW_D
17+
18+
SW_D:
19+
musttail call void null()
20+
ret void
21+
22+
SW_C:
23+
ret void
24+
}
25+

0 commit comments

Comments
 (0)