File tree Expand file tree Collapse file tree 8 files changed +23
-37
lines changed Expand file tree Collapse file tree 8 files changed +23
-37
lines changed Original file line number Diff line number Diff line change @@ -34,18 +34,19 @@ void NVPTXSubtarget::anchor() {}
34
34
35
35
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies (StringRef CPU,
36
36
StringRef FS) {
37
- TargetName = std::string (CPU);
37
+ // Provide the default CPU if we don't have one.
38
+ TargetName = std::string (CPU.empty () ? " sm_30" : CPU);
38
39
39
- ParseSubtargetFeatures (getTargetName () , /* TuneCPU= */ getTargetName () , FS);
40
+ ParseSubtargetFeatures (TargetName , /* TuneCPU*/ TargetName , FS);
40
41
41
- // Re-map SM version numbers, SmVersion carries the regular SMs which do
42
- // have relative order, while FullSmVersion allows distinguishing sm_90 from
43
- // sm_90a, which would *not* be a subset of sm_91.
44
- SmVersion = getSmVersion ();
42
+ // Re-map SM version numbers, SmVersion carries the regular SMs which do
43
+ // have relative order, while FullSmVersion allows distinguishing sm_90 from
44
+ // sm_90a, which would *not* be a subset of sm_91.
45
+ SmVersion = getSmVersion ();
45
46
46
- // Set default to PTX 6.0 (CUDA 9.0)
47
- if (PTXVersion == 0 ) {
48
- PTXVersion = 60 ;
47
+ // Set default to PTX 6.0 (CUDA 9.0)
48
+ if (PTXVersion == 0 ) {
49
+ PTXVersion = 60 ;
49
50
}
50
51
51
52
return *this ;
Original file line number Diff line number Diff line change @@ -111,12 +111,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
111
111
// - 0 represents base GPU model,
112
112
// - non-zero value identifies particular architecture-accelerated variant.
113
113
bool hasAAFeatures () const { return getFullSmVersion () % 10 ; }
114
-
115
- // If the user did not provide a target we default to the `sm_30` target.
116
- std::string getTargetName () const {
117
- return TargetName.empty () ? " sm_30" : TargetName;
118
- }
119
- bool hasTargetName () const { return !TargetName.empty (); }
114
+ std::string getTargetName () const { return TargetName; }
120
115
121
116
bool hasNativeBF16Support (int Opcode) const ;
122
117
Original file line number Diff line number Diff line change @@ -255,10 +255,7 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
255
255
PB.registerPipelineStartEPCallback (
256
256
[this ](ModulePassManager &PM, OptimizationLevel Level) {
257
257
FunctionPassManager FPM;
258
- // We do not want to fold out calls to nvvm.reflect early if the user
259
- // has not provided a target architecture just yet.
260
- if (Subtarget.hasTargetName ())
261
- FPM.addPass (NVVMReflectPass (Subtarget.getSmVersion ()));
258
+ FPM.addPass (NVVMReflectPass (Subtarget.getSmVersion ()));
262
259
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
263
260
// point, if issues crop up, consider disabling.
264
261
FPM.addPass (NVVMIntrRangePass ());
Original file line number Diff line number Diff line change 21
21
#include " NVPTX.h"
22
22
#include " llvm/ADT/SmallVector.h"
23
23
#include " llvm/Analysis/ConstantFolding.h"
24
- #include " llvm/CodeGen/CommandFlags.h"
25
24
#include " llvm/IR/Constants.h"
26
25
#include " llvm/IR/DerivedTypes.h"
27
26
#include " llvm/IR/Function.h"
@@ -220,12 +219,7 @@ bool NVVMReflect::runOnFunction(Function &F) {
220
219
return runNVVMReflect (F, SmVersion);
221
220
}
222
221
223
- NVVMReflectPass::NVVMReflectPass () {
224
- // Get the CPU string from the command line if not provided.
225
- StringRef SM = codegen::getMCPU ();
226
- if (!SM.consume_front (" sm_" ) || SM.consumeInteger (10 , SmVersion))
227
- SmVersion = 0 ;
228
- }
222
+ NVVMReflectPass::NVVMReflectPass () : NVVMReflectPass(0 ) {}
229
223
230
224
PreservedAnalyses NVVMReflectPass::run (Function &F,
231
225
FunctionAnalysisManager &AM) {
Original file line number Diff line number Diff line change 1
1
; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
2
2
; Verify that __nvvm_reflect() is replaced with an appropriate value.
3
3
;
4
- ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
4
+ ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
5
5
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
6
- ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
6
+ ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
7
7
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
8
8
9
9
@"$str" = private addrspace (1 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 1
1
; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
2
2
;
3
- ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
3
+ ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
4
4
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
5
- ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
5
+ ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
6
6
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
7
7
8
8
@"$str" = private addrspace (4 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 3
3
4
4
; RUN: cat %s > %t.noftz
5
5
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6
- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
6
+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
7
7
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
8
8
9
9
; RUN: cat %s > %t.ftz
10
10
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11
- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
11
+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
12
12
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
13
13
14
14
@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
43
43
44
44
declare i32 @llvm.nvvm.reflect (ptr )
45
45
46
- ; CHECK-LABEL: define i32 @intrinsic
46
+ ; CHECK-LABEL: define noundef i32 @intrinsic
47
47
define i32 @intrinsic () {
48
48
; CHECK-NOT: call i32 @llvm.nvvm.reflect
49
49
; USE_FTZ_0: ret i32 0
Original file line number Diff line number Diff line change 3
3
4
4
; RUN: cat %s > %t.noftz
5
5
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6
- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
6
+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
7
7
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
8
8
9
9
; RUN: cat %s > %t.ftz
10
10
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11
- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
11
+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
12
12
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
13
13
14
14
@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
43
43
44
44
declare i32 @llvm.nvvm.reflect (ptr )
45
45
46
- ; CHECK-LABEL: define i32 @intrinsic
47
-
46
+ ; CHECK-LABEL: define noundef i32 @intrinsic
48
47
define i32 @intrinsic () {
49
48
; CHECK-NOT: call i32 @llvm.nvvm.reflect
50
49
; USE_FTZ_0: ret i32 0
You can’t perform that action at this time.
0 commit comments