-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[NVPTX] Only run LowerUnreachable when necessary #109868
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
58ffde6
5424a9d
529062c
b88976b
29acb3c
328e12c
5d2902d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -368,9 +368,13 @@ void NVPTXPassConfig::addIRPasses() { | |
addPass(createSROAPass()); | ||
} | ||
|
||
const auto &Options = getNVPTXTargetMachine().Options; | ||
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable, | ||
Options.NoTrapAfterNoreturn)); | ||
if (ST.hasPTXASUnreachableBug()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Topic for discussion: should this be a CLI option? I added the check for PTX Version <= 7.4 because it's the best proxy for querying whether Alternatively, we could put the onus on the user to set a flag indicating that they're using a copy of CC @Artem-B for comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For PTX > 8.3 it's not needed. For PTX versions older than that, we should assume that it may be given to the ptxas with a bug. The problem is that we make the decision on how to generate code at the build time, but we may not know which ptxas will be used until the run time, in general case. We should conservatively assume that we do need to produce If we eventually find a legitimate use case for such an option, we can add it then. For now, I do not see much point. |
||
// Run LowerUnreachable to WAR a ptxas bug. See the commit description of | ||
// 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details. | ||
const auto &Options = getNVPTXTargetMachine().Options; | ||
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable, | ||
Options.NoTrapAfterNoreturn)); | ||
} | ||
} | ||
|
||
bool NVPTXPassConfig::addInstSelector() { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,107 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \ | ||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP | ||
; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE | ||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \ | ||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP | ||
; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE | ||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \ | ||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP | ||
; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN | ||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \ | ||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP | ||
; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN | ||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \ | ||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP | ||
; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP | ||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \ | ||
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP | ||
; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP | ||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \ | ||
; RUN: | FileCheck %s --check-prefixes=BUG-FIXED | ||
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} | ||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} | ||
|
||
; CHECK: .extern .func throw | ||
target triple = "nvptx-unknown-cuda" | ||
|
||
declare void @throw() #0 | ||
declare void @llvm.trap() #0 | ||
|
||
; CHECK-LABEL: .entry kernel_func | ||
define void @kernel_func() { | ||
; CHECK: call.uni | ||
; CHECK: throw, | ||
; NO-TRAP-UNREACHABLE-LABEL: kernel_func( | ||
; NO-TRAP-UNREACHABLE: { | ||
; NO-TRAP-UNREACHABLE-EMPTY: | ||
; NO-TRAP-UNREACHABLE-EMPTY: | ||
; NO-TRAP-UNREACHABLE-NEXT: // %bb.0: | ||
; NO-TRAP-UNREACHABLE-NEXT: { // callseq 0, 0 | ||
; NO-TRAP-UNREACHABLE-NEXT: call.uni | ||
; NO-TRAP-UNREACHABLE-NEXT: throw, | ||
; NO-TRAP-UNREACHABLE-NEXT: ( | ||
; NO-TRAP-UNREACHABLE-NEXT: ); | ||
; NO-TRAP-UNREACHABLE-NEXT: } // callseq 0 | ||
; NO-TRAP-UNREACHABLE-NEXT: // begin inline asm | ||
; NO-TRAP-UNREACHABLE-NEXT: exit; | ||
; NO-TRAP-UNREACHABLE-NEXT: // end inline asm | ||
; | ||
; NO-TRAP-AFTER-NORETURN-LABEL: kernel_func( | ||
; NO-TRAP-AFTER-NORETURN: { | ||
; NO-TRAP-AFTER-NORETURN-EMPTY: | ||
; NO-TRAP-AFTER-NORETURN-EMPTY: | ||
; NO-TRAP-AFTER-NORETURN-NEXT: // %bb.0: | ||
; NO-TRAP-AFTER-NORETURN-NEXT: { // callseq 0, 0 | ||
; NO-TRAP-AFTER-NORETURN-NEXT: call.uni | ||
; NO-TRAP-AFTER-NORETURN-NEXT: throw, | ||
; NO-TRAP-AFTER-NORETURN-NEXT: ( | ||
; NO-TRAP-AFTER-NORETURN-NEXT: ); | ||
; NO-TRAP-AFTER-NORETURN-NEXT: } // callseq 0 | ||
; NO-TRAP-AFTER-NORETURN-NEXT: // begin inline asm | ||
; NO-TRAP-AFTER-NORETURN-NEXT: exit; | ||
; NO-TRAP-AFTER-NORETURN-NEXT: // end inline asm | ||
; NO-TRAP-AFTER-NORETURN-NEXT: trap; exit; | ||
; | ||
; TRAP-LABEL: kernel_func( | ||
; TRAP: { | ||
; TRAP-EMPTY: | ||
; TRAP-EMPTY: | ||
; TRAP-NEXT: // %bb.0: | ||
; TRAP-NEXT: { // callseq 0, 0 | ||
; TRAP-NEXT: call.uni | ||
; TRAP-NEXT: throw, | ||
; TRAP-NEXT: ( | ||
; TRAP-NEXT: ); | ||
; TRAP-NEXT: } // callseq 0 | ||
; TRAP-NEXT: trap; exit; | ||
; | ||
; BUG-FIXED-LABEL: kernel_func( | ||
; BUG-FIXED: { | ||
; BUG-FIXED-EMPTY: | ||
; BUG-FIXED-EMPTY: | ||
; BUG-FIXED-NEXT: // %bb.0: | ||
; BUG-FIXED-NEXT: { // callseq 0, 0 | ||
; BUG-FIXED-NEXT: call.uni | ||
; BUG-FIXED-NEXT: throw, | ||
; BUG-FIXED-NEXT: ( | ||
; BUG-FIXED-NEXT: ); | ||
; BUG-FIXED-NEXT: } // callseq 0 | ||
; BUG-FIXED-NEXT: trap; | ||
call void @throw() | ||
; CHECK-TRAP-NOT: exit; | ||
; CHECK-TRAP: trap; | ||
; CHECK-NOTRAP-NOT: trap; | ||
; CHECK: exit; | ||
unreachable | ||
} | ||
|
||
; CHECK-LABEL: kernel_func_2 | ||
define void @kernel_func_2() { | ||
; CHECK: trap; exit; | ||
; CHECK-LABEL: kernel_func_2( | ||
; CHECK: { | ||
; CHECK-EMPTY: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: // %bb.0: | ||
; CHECK-NEXT: trap; exit; | ||
; | ||
; BUG-FIXED-LABEL: kernel_func_2( | ||
; BUG-FIXED: { | ||
; BUG-FIXED-EMPTY: | ||
; BUG-FIXED-EMPTY: | ||
; BUG-FIXED-NEXT: // %bb.0: | ||
; BUG-FIXED-NEXT: trap; | ||
call void @llvm.trap() | ||
|
||
;; Make sure we avoid emitting two trap instructions. | ||
; CHECK-NOT: trap; | ||
; CHECK-NOT: exit; | ||
; Make sure we avoid emitting two trap instructions. | ||
unreachable | ||
} | ||
|
||
attributes #0 = { noreturn } | ||
|
||
|
||
!nvvm.annotations = !{!1} | ||
|
||
!1 = !{ptr @kernel_func, !"kernel", i32 1} |
Uh oh!
There was an error while loading. Please reload this page.