21
21
#include " llvm/ADT/SmallPtrSet.h"
22
22
#include " llvm/ADT/SmallSet.h"
23
23
#include " llvm/ADT/SmallVector.h"
24
+ #include " llvm/ADT/Statistic.h"
24
25
#include " llvm/Analysis/AliasAnalysis.h"
25
26
#include " llvm/Analysis/AliasSetTracker.h"
26
27
#include " llvm/Analysis/LoopAnalysisManager.h"
@@ -70,6 +71,8 @@ using namespace llvm::PatternMatch;
70
71
71
72
#define DEBUG_TYPE " loop-accesses"
72
73
74
+ STATISTIC (HistogramsDetected, " Number of Histograms detected" );
75
+
73
76
static cl::opt<unsigned , true >
74
77
VectorizationFactor (" force-vector-width" , cl::Hidden,
75
78
cl::desc (" Sets the SIMD width. Zero is autoselect." ),
@@ -732,6 +735,23 @@ class AccessAnalysis {
732
735
return UnderlyingObjects;
733
736
}
734
737
738
+ // / Find Histogram counts that match high-level code in loops:
739
+ // / \code
740
+ // / buckets[indices[i]]+=step;
741
+ // / \endcode
742
+ // /
743
+ // / It matches a pattern starting from \p HSt, which Stores to the 'buckets'
744
+ // / array the computed histogram. It uses a BinOp to sum all counts, storing
745
+ // / them using a loop-variant index Load from the 'indices' input array.
746
+ // /
747
+ // / On successful matches it updates the STATISTIC 'HistogramsDetected',
748
+ // / regardless of hardware support. When there is support, it additionally
749
+ // / stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
750
+ // / used to update histogram in \p HistogramPtrs.
751
+ void findHistograms (StoreInst *HSt, Loop *TheLoop,
752
+ SmallVectorImpl<HistogramInfo> &Histograms,
753
+ SmallPtrSetImpl<const Value *> &HistogramPtrs);
754
+
735
755
private:
736
756
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1 >> PtrAccessMap;
737
757
@@ -1698,6 +1718,7 @@ MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
1698
1718
case NoDep:
1699
1719
case Forward:
1700
1720
case BackwardVectorizable:
1721
+ case Histogram:
1701
1722
return VectorizationSafetyStatus::Safe;
1702
1723
1703
1724
case Unknown:
@@ -1718,6 +1739,7 @@ bool MemoryDepChecker::Dependence::isBackward() const {
1718
1739
case ForwardButPreventsForwarding:
1719
1740
case Unknown:
1720
1741
case IndirectUnsafe:
1742
+ case Histogram:
1721
1743
return false ;
1722
1744
1723
1745
case BackwardVectorizable:
@@ -1729,7 +1751,7 @@ bool MemoryDepChecker::Dependence::isBackward() const {
1729
1751
}
1730
1752
1731
1753
bool MemoryDepChecker::Dependence::isPossiblyBackward () const {
1732
- return isBackward () || Type == Unknown;
1754
+ return isBackward () || Type == Unknown || Type == Histogram ;
1733
1755
}
1734
1756
1735
1757
bool MemoryDepChecker::Dependence::isForward () const {
@@ -1744,6 +1766,7 @@ bool MemoryDepChecker::Dependence::isForward() const {
1744
1766
case Backward:
1745
1767
case BackwardVectorizableButPreventsForwarding:
1746
1768
case IndirectUnsafe:
1769
+ case Histogram:
1747
1770
return false ;
1748
1771
}
1749
1772
llvm_unreachable (" unexpected DepType!" );
@@ -1913,8 +1936,8 @@ std::variant<MemoryDepChecker::Dependence::DepType,
1913
1936
MemoryDepChecker::getDependenceDistanceStrideAndSize (
1914
1937
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
1915
1938
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
1916
- const DenseMap<Value *, SmallVector<const Value *, 16 >>
1917
- &UnderlyingObjects ) {
1939
+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
1940
+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
1918
1941
auto &DL = InnermostLoop->getHeader ()->getDataLayout ();
1919
1942
auto &SE = *PSE.getSE ();
1920
1943
auto [APtr, AIsWrite] = A;
@@ -1932,6 +1955,12 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
1932
1955
BPtr->getType ()->getPointerAddressSpace ())
1933
1956
return MemoryDepChecker::Dependence::Unknown;
1934
1957
1958
+ // Ignore Histogram count updates as they are handled by the Intrinsic. This
1959
+ // happens when the same pointer is first used to read from and then is used
1960
+ // to write to.
1961
+ if (!AIsWrite && BIsWrite && APtr == BPtr && HistogramPtrs.contains (APtr))
1962
+ return MemoryDepChecker::Dependence::Histogram;
1963
+
1935
1964
int64_t StrideAPtr =
1936
1965
getPtrStride (PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true )
1937
1966
.value_or (0 );
@@ -2008,14 +2037,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
2008
2037
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent (
2009
2038
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
2010
2039
unsigned BIdx,
2011
- const DenseMap<Value *, SmallVector<const Value *, 16 >>
2012
- &UnderlyingObjects ) {
2040
+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
2041
+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
2013
2042
assert (AIdx < BIdx && " Must pass arguments in program order" );
2014
2043
2015
2044
// Get the dependence distance, stride, type size and what access writes for
2016
2045
// the dependence between A and B.
2017
2046
auto Res = getDependenceDistanceStrideAndSize (
2018
- A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
2047
+ A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects, HistogramPtrs );
2019
2048
if (std::holds_alternative<Dependence::DepType>(Res))
2020
2049
return std::get<Dependence::DepType>(Res);
2021
2050
@@ -2251,8 +2280,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2251
2280
2252
2281
bool MemoryDepChecker::areDepsSafe (
2253
2282
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
2254
- const DenseMap<Value *, SmallVector<const Value *, 16 >>
2255
- &UnderlyingObjects ) {
2283
+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
2284
+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
2256
2285
2257
2286
MinDepDistBytes = -1 ;
2258
2287
SmallPtrSet<MemAccessInfo, 8 > Visited;
@@ -2295,8 +2324,9 @@ bool MemoryDepChecker::areDepsSafe(
2295
2324
if (*I1 > *I2)
2296
2325
std::swap (A, B);
2297
2326
2298
- Dependence::DepType Type = isDependent (*A.first , A.second , *B.first ,
2299
- B.second , UnderlyingObjects);
2327
+ Dependence::DepType Type =
2328
+ isDependent (*A.first , A.second , *B.first , B.second ,
2329
+ UnderlyingObjects, HistogramPtrs);
2300
2330
mergeInStatus (Dependence::isSafeForVectorization (Type));
2301
2331
2302
2332
// Gather dependences unless we accumulated MaxDependences
@@ -2347,7 +2377,8 @@ const char *MemoryDepChecker::Dependence::DepName[] = {
2347
2377
" ForwardButPreventsForwarding" ,
2348
2378
" Backward" ,
2349
2379
" BackwardVectorizable" ,
2350
- " BackwardVectorizableButPreventsForwarding" };
2380
+ " BackwardVectorizableButPreventsForwarding" ,
2381
+ " Histogram" };
2351
2382
2352
2383
void MemoryDepChecker::Dependence::print (
2353
2384
raw_ostream &OS, unsigned Depth,
@@ -2629,6 +2660,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2629
2660
// check.
2630
2661
Accesses.buildDependenceSets ();
2631
2662
2663
+ for (StoreInst *ST : Stores)
2664
+ Accesses.findHistograms (ST, TheLoop, Histograms, HistogramPtrs);
2665
+
2632
2666
// Find pointers with computable bounds. We are going to use this information
2633
2667
// to place a runtime bound check.
2634
2668
Value *UncomputablePtr = nullptr ;
@@ -2650,9 +2684,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2650
2684
bool DepsAreSafe = true ;
2651
2685
if (Accesses.isDependencyCheckNeeded ()) {
2652
2686
LLVM_DEBUG (dbgs () << " LAA: Checking memory dependencies\n " );
2653
- DepsAreSafe = DepChecker->areDepsSafe (DependentAccesses,
2654
- Accesses.getDependenciesToCheck (),
2655
- Accesses.getUnderlyingObjects ());
2687
+ DepsAreSafe = DepChecker->areDepsSafe (
2688
+ DependentAccesses, Accesses.getDependenciesToCheck (),
2689
+ Accesses.getUnderlyingObjects (), HistogramPtrs );
2656
2690
2657
2691
if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck ()) {
2658
2692
LLVM_DEBUG (dbgs () << " LAA: Retrying with memory checks\n " );
@@ -2757,6 +2791,9 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() {
2757
2791
case MemoryDepChecker::Dependence::Unknown:
2758
2792
R << " \n Unknown data dependence." ;
2759
2793
break ;
2794
+ case MemoryDepChecker::Dependence::Histogram:
2795
+ R << " \n Histogram data dependence." ;
2796
+ break ;
2760
2797
}
2761
2798
2762
2799
if (Instruction *I = Dep.getSource (getDepChecker ())) {
@@ -3085,6 +3122,79 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
3085
3122
return *It->second ;
3086
3123
}
3087
3124
3125
+ void AccessAnalysis::findHistograms (
3126
+ StoreInst *HSt, Loop *TheLoop, SmallVectorImpl<HistogramInfo> &Histograms,
3127
+ SmallPtrSetImpl<const Value *> &HistogramPtrs) {
3128
+
3129
+ // Store value must come from a Binary Operation.
3130
+ Instruction *HPtrInstr = nullptr ;
3131
+ BinaryOperator *HBinOp = nullptr ;
3132
+ if (!match (HSt, m_Store (m_BinOp (HBinOp), m_Instruction (HPtrInstr))))
3133
+ return ;
3134
+
3135
+ // BinOp must be an Add or a Sub modifying the bucket value by a
3136
+ // loop invariant amount.
3137
+ // FIXME: We assume the loop invariant term is on the RHS.
3138
+ // Fine for an immediate/constant, but maybe not a generic value?
3139
+ Value *HIncVal = nullptr ;
3140
+ if (!match (HBinOp, m_Add (m_Load (m_Specific (HPtrInstr)), m_Value (HIncVal))) &&
3141
+ !match (HBinOp, m_Sub (m_Load (m_Specific (HPtrInstr)), m_Value (HIncVal))))
3142
+ return ;
3143
+
3144
+ // Make sure the increment value is loop invariant.
3145
+ if (!TheLoop->isLoopInvariant (HIncVal))
3146
+ return ;
3147
+
3148
+ // The address to store is calculated through a GEP Instruction.
3149
+ // FIXME: Support GEPs with more operands.
3150
+ GetElementPtrInst *HPtr = dyn_cast<GetElementPtrInst>(HPtrInstr);
3151
+ if (!HPtr || HPtr->getNumOperands () > 2 )
3152
+ return ;
3153
+
3154
+ // Check that the index is calculated by loading from another array. Ignore
3155
+ // any extensions.
3156
+ // FIXME: Support indices from other sources that a linear load from memory?
3157
+ Value *HIdx = HPtr->getOperand (1 );
3158
+ Instruction *IdxInst = nullptr ;
3159
+ if (!match (HIdx, m_ZExtOrSExtOrSelf (m_Instruction (IdxInst))))
3160
+ return ;
3161
+
3162
+ // Currently restricting this to linear addressing when loading indices.
3163
+ LoadInst *VLoad = dyn_cast<LoadInst>(IdxInst);
3164
+ Value *VPtrVal;
3165
+ if (!VLoad || !match (VLoad, m_Load (m_Value (VPtrVal))))
3166
+ return ;
3167
+
3168
+ if (!isa<SCEVAddRecExpr>(PSE.getSCEV (VPtrVal)))
3169
+ return ;
3170
+
3171
+ // Ensure we'll have the same mask by checking that all parts of the histogram
3172
+ // (gather load, update, scatter store) are in the same block.
3173
+ LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand (0 ));
3174
+ BasicBlock *LdBB = IndexedLoad->getParent ();
3175
+ if (LdBB != HBinOp->getParent () || LdBB != HSt->getParent ())
3176
+ return ;
3177
+
3178
+ // A histogram pointer may only alias to itself, and must only have two uses,
3179
+ // the load and the store.
3180
+ // We may be able to relax these constraints later.
3181
+ for (AliasSet &AS : AST)
3182
+ if (AS.isMustAlias () || AS.isMayAlias ())
3183
+ if ((is_contained (AS.getPointers (), HPtr) && AS.size () > 1 ) ||
3184
+ HPtr->getNumUses () != 2 )
3185
+ return ;
3186
+
3187
+ HistogramsDetected++;
3188
+
3189
+ LLVM_DEBUG (dbgs () << " LAA: Found histogram for load: " << *IndexedLoad
3190
+ << " and store: " << *HSt << " \n " );
3191
+
3192
+ // Store the operations that make up the histogram.
3193
+ Histograms.emplace_back (IndexedLoad, HBinOp, HSt);
3194
+ // Store pointers used to write those counts in the computed histogram.
3195
+ HistogramPtrs.insert (HPtr);
3196
+ }
3197
+
3088
3198
bool LoopAccessInfoManager::invalidate (
3089
3199
Function &F, const PreservedAnalyses &PA,
3090
3200
FunctionAnalysisManager::Invalidator &Inv) {
0 commit comments