Skip to content

Commit 9b39b61

Browse files
krzysz00nirvedhmeshram
authored andcommitted
Revert "[NVPTX] Do not run the NVVMReflect pass as part of the normal pipeline (llvm#121834)"
This reverts commit 29b5c18. Breaks ASan build Signed-off-by: Krzysztof Drewniak <[email protected]>
1 parent cd56d51 commit 9b39b61

File tree

8 files changed

+23
-37
lines changed

8 files changed

+23
-37
lines changed

llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,19 @@ void NVPTXSubtarget::anchor() {}
3434

3535
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
3636
StringRef FS) {
37-
TargetName = std::string(CPU);
37+
// Provide the default CPU if we don't have one.
38+
TargetName = std::string(CPU.empty() ? "sm_30" : CPU);
3839

39-
ParseSubtargetFeatures(getTargetName(), /*TuneCPU=*/getTargetName(), FS);
40+
ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
4041

41-
// Re-map SM version numbers, SmVersion carries the regular SMs which do
42-
// have relative order, while FullSmVersion allows distinguishing sm_90 from
43-
// sm_90a, which would *not* be a subset of sm_91.
44-
SmVersion = getSmVersion();
42+
// Re-map SM version numbers, SmVersion carries the regular SMs which do
43+
// have relative order, while FullSmVersion allows distinguishing sm_90 from
44+
// sm_90a, which would *not* be a subset of sm_91.
45+
SmVersion = getSmVersion();
4546

46-
// Set default to PTX 6.0 (CUDA 9.0)
47-
if (PTXVersion == 0) {
48-
PTXVersion = 60;
47+
// Set default to PTX 6.0 (CUDA 9.0)
48+
if (PTXVersion == 0) {
49+
PTXVersion = 60;
4950
}
5051

5152
return *this;

llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
111111
// - 0 represents base GPU model,
112112
// - non-zero value identifies particular architecture-accelerated variant.
113113
bool hasAAFeatures() const { return getFullSmVersion() % 10; }
114-
115-
// If the user did not provide a target we default to the `sm_30` target.
116-
std::string getTargetName() const {
117-
return TargetName.empty() ? "sm_30" : TargetName;
118-
}
119-
bool hasTargetName() const { return !TargetName.empty(); }
114+
std::string getTargetName() const { return TargetName; }
120115

121116
bool hasNativeBF16Support(int Opcode) const;
122117

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,10 +255,7 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
255255
PB.registerPipelineStartEPCallback(
256256
[this](ModulePassManager &PM, OptimizationLevel Level) {
257257
FunctionPassManager FPM;
258-
// We do not want to fold out calls to nvvm.reflect early if the user
259-
// has not provided a target architecture just yet.
260-
if (Subtarget.hasTargetName())
261-
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
258+
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
262259
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
263260
// point, if issues crop up, consider disabling.
264261
FPM.addPass(NVVMIntrRangePass());

llvm/lib/Target/NVPTX/NVVMReflect.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
#include "NVPTX.h"
2222
#include "llvm/ADT/SmallVector.h"
2323
#include "llvm/Analysis/ConstantFolding.h"
24-
#include "llvm/CodeGen/CommandFlags.h"
2524
#include "llvm/IR/Constants.h"
2625
#include "llvm/IR/DerivedTypes.h"
2726
#include "llvm/IR/Function.h"
@@ -220,12 +219,7 @@ bool NVVMReflect::runOnFunction(Function &F) {
220219
return runNVVMReflect(F, SmVersion);
221220
}
222221

223-
NVVMReflectPass::NVVMReflectPass() {
224-
// Get the CPU string from the command line if not provided.
225-
StringRef SM = codegen::getMCPU();
226-
if (!SM.consume_front("sm_") || SM.consumeInteger(10, SmVersion))
227-
SmVersion = 0;
228-
}
222+
NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
229223

230224
PreservedAnalyses NVVMReflectPass::run(Function &F,
231225
FunctionAnalysisManager &AM) {

llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
22
; Verify that __nvvm_reflect() is replaced with an appropriate value.
33
;
4-
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
4+
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
55
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
6-
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
6+
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
77
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
88

99
@"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"

llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
22
;
3-
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
3+
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
44
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
5-
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
5+
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
66
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
77

88
@"$str" = private addrspace(4) constant [12 x i8] c"__CUDA_ARCH\00"

llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33

44
; RUN: cat %s > %t.noftz
55
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6-
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
6+
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
77
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
88

99
; RUN: cat %s > %t.ftz
1010
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11-
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
11+
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
1212
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
1313

1414
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
@@ -43,7 +43,7 @@ exit:
4343

4444
declare i32 @llvm.nvvm.reflect(ptr)
4545

46-
; CHECK-LABEL: define i32 @intrinsic
46+
; CHECK-LABEL: define noundef i32 @intrinsic
4747
define i32 @intrinsic() {
4848
; CHECK-NOT: call i32 @llvm.nvvm.reflect
4949
; USE_FTZ_0: ret i32 0

llvm/test/CodeGen/NVPTX/nvvm-reflect.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33

44
; RUN: cat %s > %t.noftz
55
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6-
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
6+
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
77
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
88

99
; RUN: cat %s > %t.ftz
1010
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11-
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
11+
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
1212
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
1313

1414
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
@@ -43,8 +43,7 @@ exit:
4343

4444
declare i32 @llvm.nvvm.reflect(ptr)
4545

46-
; CHECK-LABEL: define i32 @intrinsic
47-
46+
; CHECK-LABEL: define noundef i32 @intrinsic
4847
define i32 @intrinsic() {
4948
; CHECK-NOT: call i32 @llvm.nvvm.reflect
5049
; USE_FTZ_0: ret i32 0

0 commit comments

Comments
 (0)