File tree Expand file tree Collapse file tree 6 files changed +18
-12
lines changed Expand file tree Collapse file tree 6 files changed +18
-12
lines changed Original file line number Diff line number Diff line change @@ -255,7 +255,6 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
255
255
PB.registerPipelineStartEPCallback (
256
256
[this ](ModulePassManager &PM, OptimizationLevel Level) {
257
257
FunctionPassManager FPM;
258
- FPM.addPass (NVVMReflectPass (Subtarget.getSmVersion ()));
259
258
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
260
259
// point, if issues crop up, consider disabling.
261
260
FPM.addPass (NVVMIntrRangePass ());
Original file line number Diff line number Diff line change 21
21
#include " NVPTX.h"
22
22
#include " llvm/ADT/SmallVector.h"
23
23
#include " llvm/Analysis/ConstantFolding.h"
24
+ #include " llvm/CodeGen/CommandFlags.h"
24
25
#include " llvm/IR/Constants.h"
25
26
#include " llvm/IR/DerivedTypes.h"
26
27
#include " llvm/IR/Function.h"
@@ -219,7 +220,12 @@ bool NVVMReflect::runOnFunction(Function &F) {
219
220
return runNVVMReflect (F, SmVersion);
220
221
}
221
222
222
- NVVMReflectPass::NVVMReflectPass () : NVVMReflectPass(0 ) {}
223
+ NVVMReflectPass::NVVMReflectPass () {
224
+ // Get the CPU string from the command line if not provided.
225
+ StringRef SM = codegen::getMCPU ();
226
+ if (!SM.consume_front (" sm_" ) || SM.consumeInteger (10 , SmVersion))
227
+ SmVersion = 0 ;
228
+ }
223
229
224
230
PreservedAnalyses NVVMReflectPass::run (Function &F,
225
231
FunctionAnalysisManager &AM) {
Original file line number Diff line number Diff line change 1
1
; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
2
2
; Verify that __nvvm_reflect() is replaced with an appropriate value.
3
3
;
4
- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
4
+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
5
5
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
6
- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
6
+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
7
7
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
8
8
9
9
@"$str" = private addrspace (1 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 1
1
; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
2
2
;
3
- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
3
+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
4
4
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
5
- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
5
+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
6
6
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
7
7
8
8
@"$str" = private addrspace (4 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 3
3
4
4
; RUN: cat %s > %t.noftz
5
5
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6
- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
6
+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
7
7
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
8
8
9
9
; RUN: cat %s > %t.ftz
10
10
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11
- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
11
+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
12
12
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
13
13
14
14
@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
43
43
44
44
declare i32 @llvm.nvvm.reflect (ptr )
45
45
46
- ; CHECK-LABEL: define noundef i32 @intrinsic
46
+ ; CHECK-LABEL: define i32 @intrinsic
47
47
define i32 @intrinsic () {
48
48
; CHECK-NOT: call i32 @llvm.nvvm.reflect
49
49
; USE_FTZ_0: ret i32 0
Original file line number Diff line number Diff line change 3
3
4
4
; RUN: cat %s > %t.noftz
5
5
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6
- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
6
+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
7
7
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
8
8
9
9
; RUN: cat %s > %t.ftz
10
10
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11
- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
11
+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
12
12
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
13
13
14
14
@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
43
43
44
44
declare i32 @llvm.nvvm.reflect (ptr )
45
45
46
- ; CHECK-LABEL: define noundef i32 @intrinsic
46
+ ; CHECK-LABEL: define i32 @intrinsic
47
+
47
48
define i32 @intrinsic () {
48
49
; CHECK-NOT: call i32 @llvm.nvvm.reflect
49
50
; USE_FTZ_0: ret i32 0
You can’t perform that action at this time.
0 commit comments