21
21
#include " llvm/ADT/SmallPtrSet.h"
22
22
#include " llvm/ADT/SmallSet.h"
23
23
#include " llvm/ADT/SmallVector.h"
24
+ #include " llvm/ADT/Statistic.h"
24
25
#include " llvm/Analysis/AliasAnalysis.h"
25
26
#include " llvm/Analysis/AliasSetTracker.h"
26
27
#include " llvm/Analysis/LoopAnalysisManager.h"
@@ -70,6 +71,8 @@ using namespace llvm::PatternMatch;
70
71
71
72
#define DEBUG_TYPE " loop-accesses"
72
73
74
+ STATISTIC (HistogramsDetected, " Number of Histograms detected" );
75
+
73
76
static cl::opt<unsigned , true >
74
77
VectorizationFactor (" force-vector-width" , cl::Hidden,
75
78
cl::desc (" Sets the SIMD width. Zero is autoselect." ),
@@ -731,6 +734,23 @@ class AccessAnalysis {
731
734
return UnderlyingObjects;
732
735
}
733
736
737
+ // / Find Histogram counts that match high-level code in loops:
738
+ // / \code
739
+ // / buckets[indices[i]]+=step;
740
+ // / \endcode
741
+ // /
742
+ // / It matches a pattern starting from \p HSt, which Stores to the 'buckets'
743
+ // / array the computed histogram. It uses a BinOp to sum all counts, storing
744
+ // / them using a loop-variant index Load from the 'indices' input array.
745
+ // /
746
+ // / On successful matches it updates the STATISTIC 'HistogramsDetected',
747
+ // / regardless of hardware support. When there is support, it additionally
748
+ // / stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
749
+ // / used to update histogram in \p HistogramPtrs.
750
+ void findHistograms (StoreInst *HSt,
751
+ SmallVectorImpl<HistogramInfo> &Histograms,
752
+ SmallPtrSetImpl<const Value *> &HistogramPtrs);
753
+
734
754
private:
735
755
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1 >> PtrAccessMap;
736
756
@@ -1948,7 +1968,8 @@ getDependenceDistanceStrideAndSize(
1948
1968
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
1949
1969
const DenseMap<Value *, const SCEV *> &Strides,
1950
1970
const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
1951
- PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) {
1971
+ PredicatedScalarEvolution &PSE, const Loop *InnermostLoop,
1972
+ const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
1952
1973
auto &DL = InnermostLoop->getHeader ()->getModule ()->getDataLayout ();
1953
1974
auto &SE = *PSE.getSE ();
1954
1975
auto [APtr, AIsWrite] = A;
@@ -1966,6 +1987,15 @@ getDependenceDistanceStrideAndSize(
1966
1987
BPtr->getType ()->getPointerAddressSpace ())
1967
1988
return MemoryDepChecker::Dependence::Unknown;
1968
1989
1990
+ // Ignore Histogram count updates as they are handled by the Intrinsic. This
1991
+ // happens when the same pointer is first used to read from and then is used
1992
+ // to write to.
1993
+ if (!AIsWrite && BIsWrite && APtr == BPtr && HistogramPtrs.contains (APtr)) {
1994
+ LLVM_DEBUG (dbgs () << " LAA: Histogram: Update is safely ignored. Pointer: "
1995
+ << *APtr);
1996
+ return MemoryDepChecker::Dependence::NoDep;
1997
+ }
1998
+
1969
1999
int64_t StrideAPtr =
1970
2000
getPtrStride (PSE, ATy, APtr, InnermostLoop, Strides, true ).value_or (0 );
1971
2001
int64_t StrideBPtr =
@@ -2022,15 +2052,15 @@ getDependenceDistanceStrideAndSize(
2022
2052
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent (
2023
2053
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
2024
2054
unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides,
2025
- const DenseMap<Value *, SmallVector<const Value *, 16 >>
2026
- &UnderlyingObjects ) {
2055
+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
2056
+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
2027
2057
assert (AIdx < BIdx && " Must pass arguments in program order" );
2028
2058
2029
2059
// Get the dependence distance, stride, type size and what access writes for
2030
2060
// the dependence between A and B.
2031
2061
auto Res = getDependenceDistanceStrideAndSize (
2032
2062
A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE,
2033
- InnermostLoop);
2063
+ InnermostLoop, HistogramPtrs );
2034
2064
if (std::holds_alternative<Dependence::DepType>(Res))
2035
2065
return std::get<Dependence::DepType>(Res);
2036
2066
@@ -2266,8 +2296,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2266
2296
bool MemoryDepChecker::areDepsSafe (
2267
2297
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
2268
2298
const DenseMap<Value *, const SCEV *> &Strides,
2269
- const DenseMap<Value *, SmallVector<const Value *, 16 >>
2270
- &UnderlyingObjects ) {
2299
+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
2300
+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
2271
2301
2272
2302
MinDepDistBytes = -1 ;
2273
2303
SmallPtrSet<MemAccessInfo, 8 > Visited;
@@ -2312,7 +2342,7 @@ bool MemoryDepChecker::areDepsSafe(
2312
2342
2313
2343
Dependence::DepType Type =
2314
2344
isDependent (*A.first , A.second , *B.first , B.second , Strides,
2315
- UnderlyingObjects);
2345
+ UnderlyingObjects, HistogramPtrs );
2316
2346
mergeInStatus (Dependence::isSafeForVectorization (Type));
2317
2347
2318
2348
// Gather dependences unless we accumulated MaxDependences
@@ -2648,6 +2678,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2648
2678
// check.
2649
2679
Accesses.buildDependenceSets ();
2650
2680
2681
+ for (StoreInst *ST : Stores)
2682
+ Accesses.findHistograms (ST, Histograms, HistogramPtrs);
2683
+
2651
2684
// Find pointers with computable bounds. We are going to use this information
2652
2685
// to place a runtime bound check.
2653
2686
Value *UncomputablePtr = nullptr ;
@@ -2672,7 +2705,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2672
2705
LLVM_DEBUG (dbgs () << " LAA: Checking memory dependencies\n " );
2673
2706
CanVecMem = DepChecker->areDepsSafe (
2674
2707
DependentAccesses, Accesses.getDependenciesToCheck (), SymbolicStrides,
2675
- Accesses.getUnderlyingObjects ());
2708
+ Accesses.getUnderlyingObjects (), HistogramPtrs );
2676
2709
2677
2710
if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck ()) {
2678
2711
LLVM_DEBUG (dbgs () << " LAA: Retrying with memory checks\n " );
@@ -3127,6 +3160,99 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
3127
3160
return *I.first ->second ;
3128
3161
}
3129
3162
3163
+ void AccessAnalysis::findHistograms (
3164
+ StoreInst *HSt, SmallVectorImpl<HistogramInfo> &Histograms,
3165
+ SmallPtrSetImpl<const Value *> &HistogramPtrs) {
3166
+ LLVM_DEBUG (dbgs () << " LAA: Attempting to match histogram from " << *HSt
3167
+ << " \n " );
3168
+ // Store value must come from a Binary Operation.
3169
+ Instruction *HPtrInstr = nullptr ;
3170
+ BinaryOperator *HBinOp = nullptr ;
3171
+ if (!match (HSt, m_Store (m_BinOp (HBinOp), m_Instruction (HPtrInstr)))) {
3172
+ LLVM_DEBUG (dbgs () << " \t No BinOp\n " );
3173
+ return ;
3174
+ }
3175
+
3176
+ // BinOp must be an Add or a Sub operating modifying the bucket value by a
3177
+ // loop invariant amount.
3178
+ // FIXME: We assume the loop invariant term is on the RHS.
3179
+ // Fine for an immediate/constant, but maybe not a generic value?
3180
+ Value *HIncVal = nullptr ;
3181
+ if (!match (HBinOp, m_Add (m_Load (m_Specific (HPtrInstr)), m_Value (HIncVal))) &&
3182
+ !match (HBinOp, m_Sub (m_Load (m_Specific (HPtrInstr)), m_Value (HIncVal)))) {
3183
+ LLVM_DEBUG (dbgs () << " \t No matching load\n " );
3184
+ return ;
3185
+ }
3186
+ Instruction *IndexedLoad = cast<Instruction>(HBinOp->getOperand (0 ));
3187
+
3188
+ // The address to store is calculated through a GEP Instruction.
3189
+ // FIXME: Support GEPs with more operands.
3190
+ GetElementPtrInst *HPtr = dyn_cast<GetElementPtrInst>(HPtrInstr);
3191
+ if (!HPtr || HPtr->getNumOperands () > 2 ) {
3192
+ LLVM_DEBUG (dbgs () << " \t Too many GEP operands\n " );
3193
+ return ;
3194
+ }
3195
+
3196
+ // Check that the index is calculated by loading from another array. Ignore
3197
+ // any extensions.
3198
+ // FIXME: Support indices from other sources that a linear load from memory?
3199
+ Value *HIdx = HPtr->getOperand (1 );
3200
+ Instruction *IdxInst = nullptr ;
3201
+ // FIXME: Can this fail? Maybe if IdxInst isn't an instruction. Just need to
3202
+ // look through extensions, find another way?
3203
+ if (!match (HIdx, m_ZExtOrSExtOrSelf (m_Instruction (IdxInst))))
3204
+ return ;
3205
+
3206
+ // Currently restricting this to linear addressing when loading indices.
3207
+ LoadInst *VLoad = dyn_cast<LoadInst>(IdxInst);
3208
+ Value *VPtrVal;
3209
+ if (!VLoad || !match (VLoad, m_Load (m_Value (VPtrVal)))) {
3210
+ LLVM_DEBUG (dbgs () << " \t Bad Index Load\n " );
3211
+ return ;
3212
+ }
3213
+
3214
+ if (!isa<SCEVAddRecExpr>(PSE.getSCEV (VPtrVal))) {
3215
+ LLVM_DEBUG (dbgs () << " \t Cannot determine index load stride\n " );
3216
+ return ;
3217
+ }
3218
+
3219
+ // FIXME: support smaller types of input arrays. Integers can be promoted
3220
+ // for codegen.
3221
+ Type *VLoadTy = VLoad->getType ();
3222
+ if (!VLoadTy->isIntegerTy () || (VLoadTy->getScalarSizeInBits () != 32 &&
3223
+ VLoadTy->getScalarSizeInBits () != 64 )) {
3224
+ LLVM_DEBUG (dbgs () << " \t Unsupported bucket type: " << *VLoadTy << " \n " );
3225
+ return ;
3226
+ }
3227
+
3228
+ // Ensure we'll have the same mask by checking that all parts of the histogram
3229
+ // are in the same block.
3230
+ // FIXME: Could use dominance checks instead?
3231
+ if (IndexedLoad->getParent () != HBinOp->getParent () ||
3232
+ IndexedLoad->getParent () != HSt->getParent ()) {
3233
+ LLVM_DEBUG (dbgs () << " \t Different parent blocks\n " );
3234
+ return ;
3235
+ }
3236
+
3237
+ // A histogram pointer may only alias to itself, and must only have two uses,
3238
+ // the load and the store.
3239
+ for (AliasSet &AS : AST)
3240
+ if (AS.isMustAlias () || AS.isMayAlias ())
3241
+ if ((is_contained (AS.getPointers (), HPtr) && AS.size () > 1 ) ||
3242
+ HPtr->getNumUses () != 2 ) {
3243
+ LLVM_DEBUG (dbgs () << " \t Aliasing problem\n " );
3244
+ return ;
3245
+ }
3246
+
3247
+ LLVM_DEBUG (dbgs () << " LAA: Found Histogram Operation: " << *HBinOp << " \n " );
3248
+ HistogramsDetected++;
3249
+
3250
+ // Store the operations that make up the histogram.
3251
+ Histograms.emplace_back (IndexedLoad, HBinOp, HSt);
3252
+ // Store pointers used to write those counts in the computed histogram.
3253
+ HistogramPtrs.insert (HPtr);
3254
+ }
3255
+
3130
3256
bool LoopAccessInfoManager::invalidate (
3131
3257
Function &F, const PreservedAnalyses &PA,
3132
3258
FunctionAnalysisManager::Invalidator &Inv) {
0 commit comments