Skip to content

Commit d832a1c

Browse files
[NVPTX] Only run LowerUnreachable when necessary (#109868)
Before CUDA 12.3 `ptxas` did not recognize that the trap instruction terminates a basic block. Instead, it would assume that control flow continued to the next instruction. The next instruction could be in the block that's lexically below it. This would lead to phantom CFG edges being created within ptxas. [NVPTX: Lower unreachable to exit to allow ptxas to accurately reconstruct the CFG.](1ee4d88) added the LowerUnreachable pass to NVPTX to work around this. Several other WAR patches followed. This bug in `ptxas` was fixed in CUDA 12.3 and is thus impossible to encounter when targeting PTX ISA v8.3+ This commit reverts the WARs for the `ptxas` bug when targeting PTX ISA v8.3+ CC @maleadt
1 parent a4916d2 commit d832a1c

File tree

4 files changed

+100
-26
lines changed

4 files changed

+100
-26
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ def hasVote : Predicate<"Subtarget->hasVote()">;
139139
def hasDouble : Predicate<"Subtarget->hasDouble()">;
140140
def hasLDG : Predicate<"Subtarget->hasLDG()">;
141141
def hasLDU : Predicate<"Subtarget->hasLDU()">;
142+
def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
143+
def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">;
142144

143145
def doF32FTZ : Predicate<"useF32FTZ()">;
144146
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
@@ -3736,9 +3738,10 @@ def Callseq_End :
37363738
[(callseq_end timm:$amt1, timm:$amt2)]>;
37373739

37383740
// trap instruction
3741+
def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
37393742
// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
37403743
// This won't be necessary in a future version of ptxas.
3741-
def trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>;
3744+
def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
37423745
// brkpt instruction
37433746
def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>;
37443747

llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,14 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
9595
bool hasDotInstructions() const {
9696
return SmVersion >= 61 && PTXVersion >= 50;
9797
}
98+
// Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
99+
// terminates a basic block. Instead, it would assume that control flow
100+
// continued to the next instruction. The next instruction could be in the
101+
// block that's lexically below it. This would lead to a phantom CFG edges
102+
// being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when
103+
// PTX ISA versions 8.3+ we can confidently say that the bug will not be
104+
// present.
105+
bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
98106
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
99107
unsigned int getFullSmVersion() const { return FullSmVersion; }
100108
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,9 +367,13 @@ void NVPTXPassConfig::addIRPasses() {
367367
addPass(createSROAPass());
368368
}
369369

370-
const auto &Options = getNVPTXTargetMachine().Options;
371-
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
372-
Options.NoTrapAfterNoreturn));
370+
if (ST.hasPTXASUnreachableBug()) {
371+
// Run LowerUnreachable to WAR a ptxas bug. See the commit description of
372+
// 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details.
373+
const auto &Options = getNVPTXTargetMachine().Options;
374+
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
375+
Options.NoTrapAfterNoreturn));
376+
}
373377
}
374378

375379
bool NVPTXPassConfig::addInstSelector() {
Lines changed: 81 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,107 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
2-
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
3+
; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE
34
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
4-
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
5+
; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE
56
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
6-
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
7+
; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN
78
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
8-
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
9+
; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN
910
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
10-
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
11+
; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP
1112
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
12-
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
13+
; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP
14+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
15+
; RUN: | FileCheck %s --check-prefixes=BUG-FIXED
1316
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
1417
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
1518

16-
; CHECK: .extern .func throw
19+
target triple = "nvptx-unknown-cuda"
20+
1721
declare void @throw() #0
1822
declare void @llvm.trap() #0
1923

20-
; CHECK-LABEL: .entry kernel_func
2124
define void @kernel_func() {
22-
; CHECK: call.uni
23-
; CHECK: throw,
25+
; NO-TRAP-UNREACHABLE-LABEL: kernel_func(
26+
; NO-TRAP-UNREACHABLE: {
27+
; NO-TRAP-UNREACHABLE-EMPTY:
28+
; NO-TRAP-UNREACHABLE-EMPTY:
29+
; NO-TRAP-UNREACHABLE-NEXT: // %bb.0:
30+
; NO-TRAP-UNREACHABLE-NEXT: { // callseq 0, 0
31+
; NO-TRAP-UNREACHABLE-NEXT: call.uni
32+
; NO-TRAP-UNREACHABLE-NEXT: throw,
33+
; NO-TRAP-UNREACHABLE-NEXT: (
34+
; NO-TRAP-UNREACHABLE-NEXT: );
35+
; NO-TRAP-UNREACHABLE-NEXT: } // callseq 0
36+
; NO-TRAP-UNREACHABLE-NEXT: // begin inline asm
37+
; NO-TRAP-UNREACHABLE-NEXT: exit;
38+
; NO-TRAP-UNREACHABLE-NEXT: // end inline asm
39+
;
40+
; NO-TRAP-AFTER-NORETURN-LABEL: kernel_func(
41+
; NO-TRAP-AFTER-NORETURN: {
42+
; NO-TRAP-AFTER-NORETURN-EMPTY:
43+
; NO-TRAP-AFTER-NORETURN-EMPTY:
44+
; NO-TRAP-AFTER-NORETURN-NEXT: // %bb.0:
45+
; NO-TRAP-AFTER-NORETURN-NEXT: { // callseq 0, 0
46+
; NO-TRAP-AFTER-NORETURN-NEXT: call.uni
47+
; NO-TRAP-AFTER-NORETURN-NEXT: throw,
48+
; NO-TRAP-AFTER-NORETURN-NEXT: (
49+
; NO-TRAP-AFTER-NORETURN-NEXT: );
50+
; NO-TRAP-AFTER-NORETURN-NEXT: } // callseq 0
51+
; NO-TRAP-AFTER-NORETURN-NEXT: // begin inline asm
52+
; NO-TRAP-AFTER-NORETURN-NEXT: exit;
53+
; NO-TRAP-AFTER-NORETURN-NEXT: // end inline asm
54+
; NO-TRAP-AFTER-NORETURN-NEXT: trap; exit;
55+
;
56+
; TRAP-LABEL: kernel_func(
57+
; TRAP: {
58+
; TRAP-EMPTY:
59+
; TRAP-EMPTY:
60+
; TRAP-NEXT: // %bb.0:
61+
; TRAP-NEXT: { // callseq 0, 0
62+
; TRAP-NEXT: call.uni
63+
; TRAP-NEXT: throw,
64+
; TRAP-NEXT: (
65+
; TRAP-NEXT: );
66+
; TRAP-NEXT: } // callseq 0
67+
; TRAP-NEXT: trap; exit;
68+
;
69+
; BUG-FIXED-LABEL: kernel_func(
70+
; BUG-FIXED: {
71+
; BUG-FIXED-EMPTY:
72+
; BUG-FIXED-EMPTY:
73+
; BUG-FIXED-NEXT: // %bb.0:
74+
; BUG-FIXED-NEXT: { // callseq 0, 0
75+
; BUG-FIXED-NEXT: call.uni
76+
; BUG-FIXED-NEXT: throw,
77+
; BUG-FIXED-NEXT: (
78+
; BUG-FIXED-NEXT: );
79+
; BUG-FIXED-NEXT: } // callseq 0
80+
; BUG-FIXED-NEXT: trap;
2481
call void @throw()
25-
; CHECK-TRAP-NOT: exit;
26-
; CHECK-TRAP: trap;
27-
; CHECK-NOTRAP-NOT: trap;
28-
; CHECK: exit;
2982
unreachable
3083
}
3184

32-
; CHECK-LABEL: kernel_func_2
3385
define void @kernel_func_2() {
34-
; CHECK: trap; exit;
86+
; CHECK-LABEL: kernel_func_2(
87+
; CHECK: {
88+
; CHECK-EMPTY:
89+
; CHECK-EMPTY:
90+
; CHECK-NEXT: // %bb.0:
91+
; CHECK-NEXT: trap; exit;
92+
;
93+
; BUG-FIXED-LABEL: kernel_func_2(
94+
; BUG-FIXED: {
95+
; BUG-FIXED-EMPTY:
96+
; BUG-FIXED-EMPTY:
97+
; BUG-FIXED-NEXT: // %bb.0:
98+
; BUG-FIXED-NEXT: trap;
3599
call void @llvm.trap()
36-
37-
;; Make sure we avoid emitting two trap instructions.
38-
; CHECK-NOT: trap;
39-
; CHECK-NOT: exit;
100+
; Make sure we avoid emitting two trap instructions.
40101
unreachable
41102
}
42103

43104
attributes #0 = { noreturn }
44105

45-
46106
!nvvm.annotations = !{!1}
47-
48107
!1 = !{ptr @kernel_func, !"kernel", i32 1}

0 commit comments

Comments
 (0)