Skip to content

Commit 58ffde6

Browse files
[NVPTX] Only run LowerUnreachable on PTX version <= 7.4
1 parent d905b1c commit 58ffde6

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,15 @@ void NVPTXPassConfig::addIRPasses() {
368368
addPass(createSROAPass());
369369
}
370370

371-
const auto &Options = getNVPTXTargetMachine().Options;
372-
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
373-
Options.NoTrapAfterNoreturn));
371+
if (ST.getPTXVersion() <= 74) {
372+
// This pass is a WAR for a bug that's present in `ptxas` binaries that are
373+
// shipped in or prior to CUDA Toolkit 11.4. The highest version that's
374+
// supported by `ptxas` in CUDA 11.4 is 7.4. Limit this pass to only run
375+
// when targeting PTX 7.4 or lower.
376+
const auto &Options = getNVPTXTargetMachine().Options;
377+
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
378+
Options.NoTrapAfterNoreturn));
379+
}
374380
}
375381

376382
bool NVPTXPassConfig::addInstSelector() {

llvm/test/CodeGen/NVPTX/unreachable.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
1111
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
1212
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
13+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -mattr=+ptx75 \
14+
; RUN: | FileCheck %s --check-prefixes=CHECK-BUG-FIXED
1315
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
1416
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
1517

@@ -26,12 +28,14 @@ define void @kernel_func() {
2628
; CHECK-TRAP: trap;
2729
; CHECK-NOTRAP-NOT: trap;
2830
; CHECK: exit;
31+
; CHECK-BUG-FIXED-NOT: exit;
2932
unreachable
3033
}
3134

3235
; CHECK-LABEL: kernel_func_2
3336
define void @kernel_func_2() {
3437
; CHECK: trap; exit;
38+
; CHECK-BUG-FIXED-NOT: exit;
3539
call void @llvm.trap()
3640

3741
;; Make sure we avoid emitting two trap instructions.
@@ -42,7 +46,5 @@ define void @kernel_func_2() {
4246

4347
attributes #0 = { noreturn }
4448

45-
4649
!nvvm.annotations = !{!1}
47-
4850
!1 = !{ptr @kernel_func, !"kernel", i32 1}

0 commit comments

Comments
 (0)