Skip to content

Commit bf22593

Browse files
committed
[InferAddressSpaces] Support assumed addrspaces from addrspace predicates.
- CUDA cannot associate memory space with pointer types. Even though Clang could add extra attributes to specify the address space explicitly on a pointer type, it breaks the portability between Clang and NVCC. - This change proposes to assume the address space from a pointer from the assumption built upon target-specific address space predicates, such as `__isGlobal` from CUDA. E.g., ``` foo(float *p) { __builtin_assume(__isGlobal(p)); // From there, we could assume p is a global pointer instead of a // generic one. } ``` This makes the code portable without introducing the implementation-specific features. Note that NVCC starts to support __builtin_assume from version 11. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D112041
1 parent f3798ad commit bf22593

21 files changed

+478
-69
lines changed

clang/test/CodeGen/thinlto-distributed-newpm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@
4747
; CHECK-O: Running pass: PromotePass
4848
; CHECK-O: Running analysis: DominatorTreeAnalysis on main
4949
; CHECK-O: Running analysis: AssumptionAnalysis on main
50+
; CHECK-O: Running analysis: TargetIRAnalysis on main
5051
; CHECK-O: Running pass: DeadArgumentEliminationPass
5152
; CHECK-O: Running pass: InstCombinePass on main
5253
; CHECK-O: Running analysis: TargetLibraryAnalysis on main
5354
; CHECK-O: Running analysis: OptimizationRemarkEmitterAnalysis on main
54-
; CHECK-O: Running analysis: TargetIRAnalysis on main
5555
; CHECK-O: Running analysis: AAManager on main
5656
; CHECK-O: Running analysis: BasicAA on main
5757
; CHECK-O: Running analysis: ScopedNoAliasAA on main

llvm/include/llvm/Analysis/AssumptionCache.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ namespace llvm {
2929
class AssumeInst;
3030
class Function;
3131
class raw_ostream;
32+
class TargetTransformInfo;
3233
class Value;
3334

3435
/// A cache of \@llvm.assume calls within a function.
@@ -59,6 +60,8 @@ class AssumptionCache {
5960
/// We track this to lazily populate our assumptions.
6061
Function &F;
6162

63+
TargetTransformInfo *TTI;
64+
6265
/// Vector of weak value handles to calls of the \@llvm.assume
6366
/// intrinsic.
6467
SmallVector<ResultElem, 4> AssumeHandles;
@@ -103,7 +106,8 @@ class AssumptionCache {
103106
public:
104107
/// Construct an AssumptionCache from a function by scanning all of
105108
/// its instructions.
106-
AssumptionCache(Function &F) : F(F) {}
109+
AssumptionCache(Function &F, TargetTransformInfo *TTI = nullptr)
110+
: F(F), TTI(TTI) {}
107111

108112
/// This cache is designed to be self-updating and so it should never be
109113
/// invalidated.
@@ -174,9 +178,7 @@ class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> {
174178
public:
175179
using Result = AssumptionCache;
176180

177-
AssumptionCache run(Function &F, FunctionAnalysisManager &) {
178-
return AssumptionCache(F);
179-
}
181+
AssumptionCache run(Function &F, FunctionAnalysisManager &);
180182
};
181183

182184
/// Printer pass for the \c AssumptionAnalysis results.

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/Support/DataTypes.h"
3131
#include "llvm/Support/InstructionCost.h"
3232
#include <functional>
33+
#include <utility>
3334

3435
namespace llvm {
3536

@@ -389,6 +390,9 @@ class TargetTransformInfo {
389390

390391
unsigned getAssumedAddrSpace(const Value *V) const;
391392

393+
std::pair<const Value *, unsigned>
394+
getPredicatedAddrSpace(const Value *V) const;
395+
392396
/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
393397
/// NewV, which has a different address space. This should happen for every
394398
/// operand index that collectFlatAddressOperands returned for the intrinsic.
@@ -1481,6 +1485,8 @@ class TargetTransformInfo::Concept {
14811485
virtual bool
14821486
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
14831487
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1488+
virtual std::pair<const Value *, unsigned>
1489+
getPredicatedAddrSpace(const Value *V) const = 0;
14841490
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
14851491
Value *OldV,
14861492
Value *NewV) const = 0;
@@ -1824,6 +1830,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
18241830
return Impl.getAssumedAddrSpace(V);
18251831
}
18261832

1833+
std::pair<const Value *, unsigned>
1834+
getPredicatedAddrSpace(const Value *V) const override {
1835+
return Impl.getPredicatedAddrSpace(V);
1836+
}
1837+
18271838
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
18281839
Value *NewV) const override {
18291840
return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/IR/Operator.h"
2525
#include "llvm/IR/PatternMatch.h"
2626
#include "llvm/IR/Type.h"
27+
#include <utility>
2728

2829
using namespace llvm::PatternMatch;
2930

@@ -110,6 +111,11 @@ class TargetTransformInfoImplBase {
110111

111112
unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
112113

114+
std::pair<const Value *, unsigned>
115+
getPredicatedAddrSpace(const Value *V) const {
116+
return std::make_pair(nullptr, -1);
117+
}
118+
113119
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
114120
Value *NewV) const {
115121
return nullptr;

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
283283
return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
284284
}
285285

286+
std::pair<const Value *, unsigned>
287+
getPredicatedAddrSpace(const Value *V) const {
288+
return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
289+
}
290+
286291
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
287292
Value *NewV) const {
288293
return nullptr;

llvm/include/llvm/Target/TargetMachine.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/Target/CGPassBuilderOption.h"
2626
#include "llvm/Target/TargetOptions.h"
2727
#include <string>
28+
#include <utility>
2829

2930
namespace llvm {
3031

@@ -319,6 +320,18 @@ class TargetMachine {
319320
/// properties.
320321
virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
321322

323+
/// If the specified predicate checks whether a generic pointer falls within
324+
/// a specified address space, return that generic pointer and the address
325+
/// space being queried.
326+
///
327+
/// Such predicates could be specified in @llvm.assume intrinsics for the
328+
/// optimizer to assume that the given generic pointer always falls within
329+
/// the address space based on that predicate.
330+
virtual std::pair<const Value *, unsigned>
331+
getPredicatedAddrSpace(const Value *V) const {
332+
return std::make_pair(nullptr, -1);
333+
}
334+
322335
/// Get a \c TargetIRAnalysis appropriate for the target.
323336
///
324337
/// This is used to construct the new pass manager's target IR analysis pass,

llvm/lib/Analysis/AssumptionCache.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/STLExtras.h"
1717
#include "llvm/ADT/SmallPtrSet.h"
1818
#include "llvm/ADT/SmallVector.h"
19+
#include "llvm/Analysis/TargetTransformInfo.h"
1920
#include "llvm/IR/BasicBlock.h"
2021
#include "llvm/IR/Function.h"
2122
#include "llvm/IR/InstrTypes.h"
@@ -56,7 +57,7 @@ AssumptionCache::getOrInsertAffectedValues(Value *V) {
5657
}
5758

5859
static void
59-
findAffectedValues(CallBase *CI,
60+
findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
6061
SmallVectorImpl<AssumptionCache::ResultElem> &Affected) {
6162
// Note: This code must be kept in-sync with the code in
6263
// computeKnownBitsFromAssume in ValueTracking.
@@ -124,11 +125,19 @@ findAffectedValues(CallBase *CI,
124125
match(B, m_ConstantInt()))
125126
AddAffected(X);
126127
}
128+
129+
if (TTI) {
130+
const Value *Ptr;
131+
unsigned AS;
132+
std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(Cond);
133+
if (Ptr)
134+
AddAffected(const_cast<Value *>(Ptr->stripInBoundsOffsets()));
135+
}
127136
}
128137

129138
void AssumptionCache::updateAffectedValues(AssumeInst *CI) {
130139
SmallVector<AssumptionCache::ResultElem, 16> Affected;
131-
findAffectedValues(CI, Affected);
140+
findAffectedValues(CI, TTI, Affected);
132141

133142
for (auto &AV : Affected) {
134143
auto &AVV = getOrInsertAffectedValues(AV.Assume);
@@ -141,7 +150,7 @@ void AssumptionCache::updateAffectedValues(AssumeInst *CI) {
141150

142151
void AssumptionCache::unregisterAssumption(AssumeInst *CI) {
143152
SmallVector<AssumptionCache::ResultElem, 16> Affected;
144-
findAffectedValues(CI, Affected);
153+
findAffectedValues(CI, TTI, Affected);
145154

146155
for (auto &AV : Affected) {
147156
auto AVI = AffectedValues.find_as(AV.Assume);
@@ -248,6 +257,12 @@ void AssumptionCache::registerAssumption(AssumeInst *CI) {
248257
updateAffectedValues(CI);
249258
}
250259

260+
AssumptionCache AssumptionAnalysis::run(Function &F,
261+
FunctionAnalysisManager &FAM) {
262+
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
263+
return AssumptionCache(F, &TTI);
264+
}
265+
251266
AnalysisKey AssumptionAnalysis::Key;
252267

253268
PreservedAnalyses AssumptionPrinterPass::run(Function &F,
@@ -278,10 +293,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
278293
if (I != AssumptionCaches.end())
279294
return *I->second;
280295

296+
auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
297+
auto *TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
298+
281299
// Ok, build a new cache by scanning the function, insert it and the value
282300
// handle into our map, and return the newly populated cache.
283301
auto IP = AssumptionCaches.insert(std::make_pair(
284-
FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F)));
302+
FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F, TTI)));
285303
assert(IP.second && "Scanning function already in the map?");
286304
return *IP.first->second;
287305
}

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,11 @@ unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
268268
return TTIImpl->getAssumedAddrSpace(V);
269269
}
270270

271+
std::pair<const Value *, unsigned>
272+
TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
273+
return TTIImpl->getPredicatedAddrSpace(V);
274+
}
275+
271276
Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
272277
IntrinsicInst *II, Value *OldV, Value *NewV) const {
273278
return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@
3636
#include "llvm/CodeGen/Passes.h"
3737
#include "llvm/CodeGen/RegAllocRegistry.h"
3838
#include "llvm/CodeGen/TargetPassConfig.h"
39+
#include "llvm/IR/IntrinsicsAMDGPU.h"
3940
#include "llvm/IR/LegacyPassManager.h"
4041
#include "llvm/IR/PassManager.h"
42+
#include "llvm/IR/PatternMatch.h"
4143
#include "llvm/InitializePasses.h"
4244
#include "llvm/MC/TargetRegistry.h"
4345
#include "llvm/Passes/PassBuilder.h"
@@ -780,6 +782,33 @@ unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
780782
return AMDGPUAS::GLOBAL_ADDRESS;
781783
}
782784

785+
std::pair<const Value *, unsigned>
786+
AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const {
787+
if (auto *II = dyn_cast<IntrinsicInst>(V)) {
788+
switch (II->getIntrinsicID()) {
789+
case Intrinsic::amdgcn_is_shared:
790+
return std::make_pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS);
791+
case Intrinsic::amdgcn_is_private:
792+
return std::make_pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS);
793+
default:
794+
break;
795+
}
796+
return std::make_pair(nullptr, -1);
797+
}
798+
// Check the global pointer predication based on
799+
// (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
800+
// the order of 'is_shared' and 'is_private' is not significant.
801+
Value *Ptr;
802+
if (match(
803+
const_cast<Value *>(V),
804+
m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
805+
m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
806+
m_Deferred(Ptr))))))
807+
return std::make_pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS);
808+
809+
return std::make_pair(nullptr, -1);
810+
}
811+
783812
//===----------------------------------------------------------------------===//
784813
// GCN Target Machine (SI+)
785814
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "GCNSubtarget.h"
1818
#include "llvm/CodeGen/TargetPassConfig.h"
1919
#include "llvm/Target/TargetMachine.h"
20+
#include <utility>
2021

2122
namespace llvm {
2223

@@ -63,6 +64,9 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
6364
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
6465

6566
unsigned getAssumedAddrSpace(const Value *V) const override;
67+
68+
std::pair<const Value *, unsigned>
69+
getPredicatedAddrSpace(const Value *V) const override;
6670
};
6771

6872
//===----------------------------------------------------------------------===//

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/Analysis/TargetTransformInfo.h"
2424
#include "llvm/CodeGen/Passes.h"
2525
#include "llvm/CodeGen/TargetPassConfig.h"
26+
#include "llvm/IR/IntrinsicsNVPTX.h"
2627
#include "llvm/IR/LegacyPassManager.h"
2728
#include "llvm/MC/TargetRegistry.h"
2829
#include "llvm/Pass.h"
@@ -240,6 +241,25 @@ NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
240241
return TargetTransformInfo(NVPTXTTIImpl(this, F));
241242
}
242243

244+
std::pair<const Value *, unsigned>
245+
NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
246+
if (auto *II = dyn_cast<IntrinsicInst>(V)) {
247+
switch (II->getIntrinsicID()) {
248+
case Intrinsic::nvvm_isspacep_const:
249+
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST);
250+
case Intrinsic::nvvm_isspacep_global:
251+
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL);
252+
case Intrinsic::nvvm_isspacep_local:
253+
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
254+
case Intrinsic::nvvm_isspacep_shared:
255+
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
256+
default:
257+
break;
258+
}
259+
}
260+
return std::make_pair(nullptr, -1);
261+
}
262+
243263
void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
244264
if (getOptLevel() == CodeGenOpt::Aggressive)
245265
addPass(createGVNPass());

llvm/lib/Target/NVPTX/NVPTXTargetMachine.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "ManagedStringPool.h"
1717
#include "NVPTXSubtarget.h"
1818
#include "llvm/Target/TargetMachine.h"
19+
#include <utility>
1920

2021
namespace llvm {
2122

@@ -69,6 +70,9 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
6970
bool isMachineVerifierClean() const override {
7071
return false;
7172
}
73+
74+
std::pair<const Value *, unsigned>
75+
getPredicatedAddrSpace(const Value *V) const override;
7276
}; // NVPTXTargetMachine.
7377

7478
class NVPTXTargetMachine32 : public NVPTXTargetMachine {

0 commit comments

Comments
 (0)