Skip to content

Commit 94a02b2

Browse files
committed
[NVPTX] Pull invariant load identification into IR pass
1 parent b02f2e8 commit 94a02b2

File tree

7 files changed

+266
-52
lines changed

7 files changed

+266
-52
lines changed

llvm/lib/Target/NVPTX/CMakeLists.txt

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,35 @@ add_public_tablegen_target(NVPTXCommonTableGen)
1313
set(NVPTXCodeGen_sources
1414
NVPTXAliasAnalysis.cpp
1515
NVPTXAllocaHoisting.cpp
16-
NVPTXAtomicLower.cpp
1716
NVPTXAsmPrinter.cpp
1817
NVPTXAssignValidGlobalNames.cpp
18+
NVPTXAtomicLower.cpp
19+
NVPTXCtorDtorLowering.cpp
1920
NVPTXForwardParams.cpp
2021
NVPTXFrameLowering.cpp
2122
NVPTXGenericToNVVM.cpp
22-
NVPTXISelDAGToDAG.cpp
23-
NVPTXISelLowering.cpp
2423
NVPTXImageOptimizer.cpp
2524
NVPTXInstrInfo.cpp
25+
NVPTXISelDAGToDAG.cpp
26+
NVPTXISelLowering.cpp
2627
NVPTXLowerAggrCopies.cpp
27-
NVPTXLowerArgs.cpp
2828
NVPTXLowerAlloca.cpp
29+
NVPTXLowerArgs.cpp
2930
NVPTXLowerUnreachable.cpp
30-
NVPTXPeephole.cpp
3131
NVPTXMCExpr.cpp
32+
NVPTXPeephole.cpp
3233
NVPTXPrologEpilogPass.cpp
34+
NVPTXProxyRegErasure.cpp
3335
NVPTXRegisterInfo.cpp
3436
NVPTXReplaceImageHandles.cpp
3537
NVPTXSelectionDAGInfo.cpp
3638
NVPTXSubtarget.cpp
39+
NVPTXTagInvariantLoads.cpp
3740
NVPTXTargetMachine.cpp
3841
NVPTXTargetTransformInfo.cpp
3942
NVPTXUtilities.cpp
4043
NVVMIntrRange.cpp
4144
NVVMReflect.cpp
42-
NVPTXProxyRegErasure.cpp
43-
NVPTXCtorDtorLowering.cpp
4445
)
4546

4647
add_llvm_target(NVPTXCodeGen

llvm/lib/Target/NVPTX/NVPTX.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ FunctionPass *createNVPTXLowerArgsPass();
5151
FunctionPass *createNVPTXLowerAllocaPass();
5252
FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable,
5353
bool NoTrapAfterNoreturn);
54+
FunctionPass *createNVPTXTagInvariantLoadsPass();
5455
MachineFunctionPass *createNVPTXPeephole();
5556
MachineFunctionPass *createNVPTXProxyRegErasurePass();
5657
MachineFunctionPass *createNVPTXForwardParamsPass();
@@ -73,6 +74,7 @@ void initializeNVVMReflectPass(PassRegistry &);
7374
void initializeNVPTXAAWrapperPassPass(PassRegistry &);
7475
void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
7576
void initializeNVPTXPeepholePass(PassRegistry &);
77+
void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &);
7678

7779
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
7880
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -104,6 +106,10 @@ struct NVPTXLowerArgsPass : PassInfoMixin<NVPTXLowerArgsPass> {
104106
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
105107
};
106108

109+
struct NVPTXTagInvariantLoadsPass : PassInfoMixin<NVPTXTagInvariantLoadsPass> {
110+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
111+
};
112+
107113
namespace NVPTX {
108114
enum DrvInterface {
109115
NVCL,

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 8 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -766,46 +766,12 @@ NVPTX::Scope NVPTXDAGToDAGISel::getOperationScope(MemSDNode *N,
766766
llvm_unreachable("unhandled ordering");
767767
}
768768

769-
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
770-
unsigned CodeAddrSpace, MachineFunction *F) {
769+
static bool canLowerToLDG(const MemSDNode *N, const NVPTXSubtarget &Subtarget,
770+
unsigned CodeAddrSpace) {
771771
// We use ldg (i.e. ld.global.nc) for invariant loads from the global address
772772
// space.
773-
//
774-
// We have two ways of identifying invariant loads: Loads may be explicitly
775-
// marked as invariant, or we may infer them to be invariant.
776-
//
777-
// We currently infer invariance for loads from
778-
// - constant global variables, and
779-
// - kernel function pointer params that are noalias (i.e. __restrict) and
780-
// never written to.
781-
//
782-
// TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
783-
// not during the SelectionDAG phase).
784-
//
785-
// TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
786-
// explicitly invariant loads because these are how clang tells us to use ldg
787-
// when the user uses a builtin.
788-
if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::AddressSpace::Global)
789-
return false;
790-
791-
if (N->isInvariant())
792-
return true;
793-
794-
bool IsKernelFn = isKernelFunction(F->getFunction());
795-
796-
// We use getUnderlyingObjects() here instead of getUnderlyingObject() mainly
797-
// because the former looks through phi nodes while the latter does not. We
798-
// need to look through phi nodes to handle pointer induction variables.
799-
SmallVector<const Value *, 8> Objs;
800-
getUnderlyingObjects(N->getMemOperand()->getValue(), Objs);
801-
802-
return all_of(Objs, [&](const Value *V) {
803-
if (auto *A = dyn_cast<const Argument>(V))
804-
return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
805-
if (auto *GV = dyn_cast<const GlobalVariable>(V))
806-
return GV->isConstant();
807-
return false;
808-
});
773+
return Subtarget.hasLDG() && CodeAddrSpace == NVPTX::AddressSpace::Global &&
774+
N->isInvariant();
809775
}
810776

811777
static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
@@ -1106,10 +1072,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
11061072
return false;
11071073

11081074
// Address Space Setting
1109-
unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
1110-
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
1075+
const unsigned CodeAddrSpace = getCodeAddrSpace(LD);
1076+
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace))
11111077
return tryLDGLDU(N);
1112-
}
11131078

11141079
SDLoc DL(N);
11151080
SDValue Chain = N->getOperand(0);
@@ -1192,10 +1157,9 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
11921157
const MVT MemVT = MemEVT.getSimpleVT();
11931158

11941159
// Address Space Setting
1195-
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
1196-
if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
1160+
const unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
1161+
if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace))
11971162
return tryLDGLDU(N);
1198-
}
11991163

12001164
EVT EltVT = N->getValueType(0);
12011165
SDLoc DL(N);

llvm/lib/Target/NVPTX/NVPTXPassRegistry.def

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,6 @@ FUNCTION_ALIAS_ANALYSIS("nvptx-aa", NVPTXAA())
3838
#endif
3939
FUNCTION_PASS("nvvm-intr-range", NVVMIntrRangePass())
4040
FUNCTION_PASS("nvptx-copy-byval-args", NVPTXCopyByValArgsPass())
41-
FUNCTION_PASS("nvptx-lower-args", NVPTXLowerArgsPass(*this));
41+
FUNCTION_PASS("nvptx-lower-args", NVPTXLowerArgsPass(*this))
42+
FUNCTION_PASS("nvptx-tag-invariant-loads", NVPTXTagInvariantLoadsPass())
4243
#undef FUNCTION_PASS
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
//===------ NVPTXTagInvariantLoads.cpp - Tag invariant loads --------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements invaraint load tagging. It traverses load instructions
10+
// in a function, and determines if each load can be tagged as invariant.
11+
//
12+
// We currently infer invariance for loads from
13+
// - constant global variables, and
14+
// - kernel function pointer params that are noalias (i.e. __restrict) and
15+
// never written to.
16+
//
17+
// TODO: Perform a more powerful invariance analysis (ideally IPO).
18+
//
19+
//===----------------------------------------------------------------------===//
20+
21+
#include "NVPTXUtilities.h"
22+
#include "llvm/Analysis/ValueTracking.h"
23+
#include "llvm/IR/InstIterator.h"
24+
#include "llvm/IR/Instructions.h"
25+
#include "llvm/IR/Metadata.h"
26+
#include "llvm/Support/NVPTXAddrSpace.h"
27+
28+
using namespace llvm;
29+
30+
static void markLoadsAsInvariant(LoadInst *LI) {
31+
LI->setMetadata(LLVMContext::MD_invariant_load,
32+
MDNode::get(LI->getContext(), {}));
33+
}
34+
35+
static bool tagInvariantLoads(Function &F) {
36+
const bool IsKernelFn = isKernelFunction(F);
37+
38+
bool Changed = false;
39+
for (auto &I : instructions(F)) {
40+
if (auto *LI = dyn_cast<LoadInst>(&I)) {
41+
42+
// Don't bother with non-global loads
43+
if (LI->getPointerAddressSpace() != NVPTXAS::ADDRESS_SPACE_GLOBAL)
44+
continue;
45+
46+
if (LI->getMetadata(LLVMContext::MD_invariant_load))
47+
continue;
48+
49+
SmallVector<const Value *, 8> Objs;
50+
51+
// We use getUnderlyingObjects() here instead of getUnderlyingObject()
52+
// mainly because the former looks through phi nodes while the latter does
53+
// not. We need to look through phi nodes to handle pointer induction
54+
// variables.
55+
56+
getUnderlyingObjects(LI->getPointerOperand(), Objs);
57+
58+
const bool IsInvariant = all_of(Objs, [&](const Value *V) {
59+
if (const auto *A = dyn_cast<const Argument>(V))
60+
return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
61+
if (const auto *GV = dyn_cast<const GlobalVariable>(V))
62+
return GV->isConstant();
63+
return false;
64+
});
65+
66+
if (IsInvariant) {
67+
markLoadsAsInvariant(LI);
68+
Changed = true;
69+
}
70+
}
71+
}
72+
73+
return Changed;
74+
}
75+
76+
namespace {
77+
78+
struct NVPTXTagInvariantLoadLegacyPass : public FunctionPass {
79+
static char ID;
80+
81+
NVPTXTagInvariantLoadLegacyPass() : FunctionPass(ID) {}
82+
bool runOnFunction(Function &F) override;
83+
};
84+
85+
} // namespace
86+
87+
INITIALIZE_PASS(NVPTXTagInvariantLoadLegacyPass, "nvptx-tag-invariant-loads",
88+
"NVPTX Tag Invariant Loads", false, false)
89+
90+
bool NVPTXTagInvariantLoadLegacyPass::runOnFunction(Function &F) {
91+
return tagInvariantLoads(F);
92+
}
93+
94+
char NVPTXTagInvariantLoadLegacyPass::ID = 0;
95+
96+
FunctionPass *llvm::createNVPTXTagInvariantLoadsPass() {
97+
return new NVPTXTagInvariantLoadLegacyPass();
98+
}
99+
100+
PreservedAnalyses NVPTXTagInvariantLoadsPass::run(Function &F, FunctionAnalysisManager &) {
101+
return tagInvariantLoads(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
102+
}

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
112112
initializeNVPTXAAWrapperPassPass(PR);
113113
initializeNVPTXExternalAAWrapperPass(PR);
114114
initializeNVPTXPeepholePass(PR);
115+
initializeNVPTXTagInvariantLoadLegacyPassPass(PR);
115116
}
116117

117118
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
@@ -395,6 +396,7 @@ void NVPTXPassConfig::addIRPasses() {
395396
if (!DisableLoadStoreVectorizer)
396397
addPass(createLoadStoreVectorizerPass());
397398
addPass(createSROAPass());
399+
addPass(createNVPTXTagInvariantLoadsPass());
398400
}
399401

400402
if (ST.hasPTXASUnreachableBug()) {

0 commit comments

Comments
 (0)