Skip to content

Commit 5a986db

Browse files
committed
LV Changes
1 parent 2618095 commit 5a986db

File tree

11 files changed

+428
-40
lines changed

11 files changed

+428
-40
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ class MemoryDepChecker {
144144
// on MinDepDistBytes.
145145
BackwardVectorizable,
146146
// Same, but may prevent store-to-load forwarding.
147-
BackwardVectorizableButPreventsForwarding
147+
BackwardVectorizableButPreventsForwarding,
148+
// Access is to a loop loaded value, but is part of a histogram operation.
149+
Histogram
148150
};
149151

150152
/// String version of the types.
@@ -201,7 +203,8 @@ class MemoryDepChecker {
201203
/// Only checks sets with elements in \p CheckDeps.
202204
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
203205
const DenseMap<Value *, SmallVector<const Value *, 16>>
204-
&UnderlyingObjects);
206+
&UnderlyingObjects,
207+
const SmallPtrSetImpl<const Value *> &HistogramPtrs);
205208

206209
/// No memory dependence was encountered that would inhibit
207210
/// vectorization.
@@ -352,7 +355,8 @@ class MemoryDepChecker {
352355
isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
353356
unsigned BIdx,
354357
const DenseMap<Value *, SmallVector<const Value *, 16>>
355-
&UnderlyingObjects);
358+
&UnderlyingObjects,
359+
const SmallPtrSetImpl<const Value *> &HistogramPtrs);
356360

357361
/// Check whether the data dependence could prevent store-load
358362
/// forwarding.
@@ -393,7 +397,8 @@ class MemoryDepChecker {
393397
const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
394398
Instruction *BInst,
395399
const DenseMap<Value *, SmallVector<const Value *, 16>>
396-
&UnderlyingObjects);
400+
&UnderlyingObjects,
401+
const SmallPtrSetImpl<const Value *> &HistogramPtrs);
397402
};
398403

399404
class RuntimePointerChecking;
@@ -445,6 +450,15 @@ struct PointerDiffInfo {
445450
NeedsFreeze(NeedsFreeze) {}
446451
};
447452

453+
struct HistogramInfo {
454+
LoadInst *Load;
455+
Instruction *Update;
456+
StoreInst *Store;
457+
458+
HistogramInfo(LoadInst *Load, Instruction *Update, StoreInst *Store)
459+
: Load(Load), Update(Update), Store(Store) {}
460+
};
461+
448462
/// Holds information about the memory runtime legality checks to verify
449463
/// that a group of pointers do not overlap.
450464
class RuntimePointerChecking {
@@ -664,6 +678,10 @@ class LoopAccessInfo {
664678
unsigned getNumStores() const { return NumStores; }
665679
unsigned getNumLoads() const { return NumLoads;}
666680

681+
const SmallVectorImpl<HistogramInfo> &getHistograms() const {
682+
return Histograms;
683+
}
684+
667685
/// The diagnostics report generated for the analysis. E.g. why we
668686
/// couldn't analyze the loop.
669687
const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
@@ -777,6 +795,13 @@ class LoopAccessInfo {
777795
/// If an access has a symbolic strides, this maps the pointer value to
778796
/// the stride symbol.
779797
DenseMap<Value *, const SCEV *> SymbolicStrides;
798+
799+
/// Holds the load, update, and store instructions for all histogram-style
800+
/// operations found in the loop.
801+
SmallVector<HistogramInfo, 2> Histograms;
802+
803+
/// Storing Histogram Pointers
804+
SmallPtrSet<const Value *, 2> HistogramPtrs;
780805
};
781806

782807
/// Return the SCEV corresponding to a pointer with the symbolic stride

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,23 @@ class LoopVectorizationLegality {
387387
unsigned getNumStores() const { return LAI->getNumStores(); }
388388
unsigned getNumLoads() const { return LAI->getNumLoads(); }
389389

390+
std::optional<const HistogramInfo *> getHistogramInfo(Instruction *I) const {
391+
for (const HistogramInfo &HGram : LAI->getHistograms())
392+
if (HGram.Load == I || HGram.Update == I || HGram.Store == I)
393+
return &HGram;
394+
395+
return std::nullopt;
396+
}
397+
398+
std::optional<const HistogramInfo *>
399+
getHistogramForStore(StoreInst *SI) const {
400+
for (const HistogramInfo &HGram : LAI->getHistograms())
401+
if (HGram.Store == SI)
402+
return &HGram;
403+
404+
return std::nullopt;
405+
}
406+
390407
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
391408
return &PSE;
392409
}

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 124 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/ADT/SmallPtrSet.h"
2222
#include "llvm/ADT/SmallSet.h"
2323
#include "llvm/ADT/SmallVector.h"
24+
#include "llvm/ADT/Statistic.h"
2425
#include "llvm/Analysis/AliasAnalysis.h"
2526
#include "llvm/Analysis/AliasSetTracker.h"
2627
#include "llvm/Analysis/LoopAnalysisManager.h"
@@ -70,6 +71,8 @@ using namespace llvm::PatternMatch;
7071

7172
#define DEBUG_TYPE "loop-accesses"
7273

74+
STATISTIC(HistogramsDetected, "Number of Histograms detected");
75+
7376
static cl::opt<unsigned, true>
7477
VectorizationFactor("force-vector-width", cl::Hidden,
7578
cl::desc("Sets the SIMD width. Zero is autoselect."),
@@ -732,6 +735,23 @@ class AccessAnalysis {
732735
return UnderlyingObjects;
733736
}
734737

738+
/// Find Histogram counts that match high-level code in loops:
739+
/// \code
740+
/// buckets[indices[i]]+=step;
741+
/// \endcode
742+
///
743+
/// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
744+
/// array the computed histogram. It uses a BinOp to sum all counts, storing
745+
/// them using a loop-variant index Load from the 'indices' input array.
746+
///
747+
/// On successful matches it updates the STATISTIC 'HistogramsDetected',
748+
/// regardless of hardware support. When there is support, it additionally
749+
/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
750+
/// used to update histogram in \p HistogramPtrs.
751+
void findHistograms(StoreInst *HSt, Loop *TheLoop,
752+
SmallVectorImpl<HistogramInfo> &Histograms,
753+
SmallPtrSetImpl<const Value *> &HistogramPtrs);
754+
735755
private:
736756
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
737757

@@ -1698,6 +1718,7 @@ MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
16981718
case NoDep:
16991719
case Forward:
17001720
case BackwardVectorizable:
1721+
case Histogram:
17011722
return VectorizationSafetyStatus::Safe;
17021723

17031724
case Unknown:
@@ -1718,6 +1739,7 @@ bool MemoryDepChecker::Dependence::isBackward() const {
17181739
case ForwardButPreventsForwarding:
17191740
case Unknown:
17201741
case IndirectUnsafe:
1742+
case Histogram:
17211743
return false;
17221744

17231745
case BackwardVectorizable:
@@ -1729,7 +1751,7 @@ bool MemoryDepChecker::Dependence::isBackward() const {
17291751
}
17301752

17311753
bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1732-
return isBackward() || Type == Unknown;
1754+
return isBackward() || Type == Unknown || Type == Histogram;
17331755
}
17341756

17351757
bool MemoryDepChecker::Dependence::isForward() const {
@@ -1744,6 +1766,7 @@ bool MemoryDepChecker::Dependence::isForward() const {
17441766
case Backward:
17451767
case BackwardVectorizableButPreventsForwarding:
17461768
case IndirectUnsafe:
1769+
case Histogram:
17471770
return false;
17481771
}
17491772
llvm_unreachable("unexpected DepType!");
@@ -1913,8 +1936,8 @@ std::variant<MemoryDepChecker::Dependence::DepType,
19131936
MemoryDepChecker::getDependenceDistanceStrideAndSize(
19141937
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
19151938
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
1916-
const DenseMap<Value *, SmallVector<const Value *, 16>>
1917-
&UnderlyingObjects) {
1939+
const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
1940+
const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
19181941
auto &DL = InnermostLoop->getHeader()->getDataLayout();
19191942
auto &SE = *PSE.getSE();
19201943
auto [APtr, AIsWrite] = A;
@@ -1932,6 +1955,12 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19321955
BPtr->getType()->getPointerAddressSpace())
19331956
return MemoryDepChecker::Dependence::Unknown;
19341957

1958+
// Ignore Histogram count updates as they are handled by the Intrinsic. This
1959+
// happens when the same pointer is first used to read from and then is used
1960+
// to write to.
1961+
if (!AIsWrite && BIsWrite && APtr == BPtr && HistogramPtrs.contains(APtr))
1962+
return MemoryDepChecker::Dependence::Histogram;
1963+
19351964
int64_t StrideAPtr =
19361965
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true)
19371966
.value_or(0);
@@ -2008,14 +2037,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
20082037
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
20092038
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
20102039
unsigned BIdx,
2011-
const DenseMap<Value *, SmallVector<const Value *, 16>>
2012-
&UnderlyingObjects) {
2040+
const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
2041+
const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
20132042
assert(AIdx < BIdx && "Must pass arguments in program order");
20142043

20152044
// Get the dependence distance, stride, type size and what access writes for
20162045
// the dependence between A and B.
20172046
auto Res = getDependenceDistanceStrideAndSize(
2018-
A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
2047+
A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects, HistogramPtrs);
20192048
if (std::holds_alternative<Dependence::DepType>(Res))
20202049
return std::get<Dependence::DepType>(Res);
20212050

@@ -2251,8 +2280,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
22512280

22522281
bool MemoryDepChecker::areDepsSafe(
22532282
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
2254-
const DenseMap<Value *, SmallVector<const Value *, 16>>
2255-
&UnderlyingObjects) {
2283+
const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
2284+
const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
22562285

22572286
MinDepDistBytes = -1;
22582287
SmallPtrSet<MemAccessInfo, 8> Visited;
@@ -2295,8 +2324,9 @@ bool MemoryDepChecker::areDepsSafe(
22952324
if (*I1 > *I2)
22962325
std::swap(A, B);
22972326

2298-
Dependence::DepType Type = isDependent(*A.first, A.second, *B.first,
2299-
B.second, UnderlyingObjects);
2327+
Dependence::DepType Type =
2328+
isDependent(*A.first, A.second, *B.first, B.second,
2329+
UnderlyingObjects, HistogramPtrs);
23002330
mergeInStatus(Dependence::isSafeForVectorization(Type));
23012331

23022332
// Gather dependences unless we accumulated MaxDependences
@@ -2347,7 +2377,8 @@ const char *MemoryDepChecker::Dependence::DepName[] = {
23472377
"ForwardButPreventsForwarding",
23482378
"Backward",
23492379
"BackwardVectorizable",
2350-
"BackwardVectorizableButPreventsForwarding"};
2380+
"BackwardVectorizableButPreventsForwarding",
2381+
"Histogram"};
23512382

23522383
void MemoryDepChecker::Dependence::print(
23532384
raw_ostream &OS, unsigned Depth,
@@ -2629,6 +2660,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26292660
// check.
26302661
Accesses.buildDependenceSets();
26312662

2663+
for (StoreInst *ST : Stores)
2664+
Accesses.findHistograms(ST, TheLoop, Histograms, HistogramPtrs);
2665+
26322666
// Find pointers with computable bounds. We are going to use this information
26332667
// to place a runtime bound check.
26342668
Value *UncomputablePtr = nullptr;
@@ -2650,9 +2684,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26502684
bool DepsAreSafe = true;
26512685
if (Accesses.isDependencyCheckNeeded()) {
26522686
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2653-
DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
2654-
Accesses.getDependenciesToCheck(),
2655-
Accesses.getUnderlyingObjects());
2687+
DepsAreSafe = DepChecker->areDepsSafe(
2688+
DependentAccesses, Accesses.getDependenciesToCheck(),
2689+
Accesses.getUnderlyingObjects(), HistogramPtrs);
26562690

26572691
if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
26582692
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
@@ -2757,6 +2791,9 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() {
27572791
case MemoryDepChecker::Dependence::Unknown:
27582792
R << "\nUnknown data dependence.";
27592793
break;
2794+
case MemoryDepChecker::Dependence::Histogram:
2795+
R << "\nHistogram data dependence.";
2796+
break;
27602797
}
27612798

27622799
if (Instruction *I = Dep.getSource(getDepChecker())) {
@@ -3085,6 +3122,79 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
30853122
return *It->second;
30863123
}
30873124

3125+
void AccessAnalysis::findHistograms(
3126+
StoreInst *HSt, Loop *TheLoop, SmallVectorImpl<HistogramInfo> &Histograms,
3127+
SmallPtrSetImpl<const Value *> &HistogramPtrs) {
3128+
3129+
// Store value must come from a Binary Operation.
3130+
Instruction *HPtrInstr = nullptr;
3131+
BinaryOperator *HBinOp = nullptr;
3132+
if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
3133+
return;
3134+
3135+
// BinOp must be an Add or a Sub modifying the bucket value by a
3136+
// loop invariant amount.
3137+
// FIXME: We assume the loop invariant term is on the RHS.
3138+
// Fine for an immediate/constant, but maybe not a generic value?
3139+
Value *HIncVal = nullptr;
3140+
if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
3141+
!match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
3142+
return;
3143+
3144+
// Make sure the increment value is loop invariant.
3145+
if (!TheLoop->isLoopInvariant(HIncVal))
3146+
return;
3147+
3148+
// The address to store is calculated through a GEP Instruction.
3149+
// FIXME: Support GEPs with more operands.
3150+
GetElementPtrInst *HPtr = dyn_cast<GetElementPtrInst>(HPtrInstr);
3151+
if (!HPtr || HPtr->getNumOperands() > 2)
3152+
return;
3153+
3154+
// Check that the index is calculated by loading from another array. Ignore
3155+
// any extensions.
3156+
// FIXME: Support indices from other sources that a linear load from memory?
3157+
Value *HIdx = HPtr->getOperand(1);
3158+
Instruction *IdxInst = nullptr;
3159+
if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Instruction(IdxInst))))
3160+
return;
3161+
3162+
// Currently restricting this to linear addressing when loading indices.
3163+
LoadInst *VLoad = dyn_cast<LoadInst>(IdxInst);
3164+
Value *VPtrVal;
3165+
if (!VLoad || !match(VLoad, m_Load(m_Value(VPtrVal))))
3166+
return;
3167+
3168+
if (!isa<SCEVAddRecExpr>(PSE.getSCEV(VPtrVal)))
3169+
return;
3170+
3171+
// Ensure we'll have the same mask by checking that all parts of the histogram
3172+
// (gather load, update, scatter store) are in the same block.
3173+
LoadInst *IndexedLoad = cast<LoadInst>(HBinOp->getOperand(0));
3174+
BasicBlock *LdBB = IndexedLoad->getParent();
3175+
if (LdBB != HBinOp->getParent() || LdBB != HSt->getParent())
3176+
return;
3177+
3178+
// A histogram pointer may only alias to itself, and must only have two uses,
3179+
// the load and the store.
3180+
// We may be able to relax these constraints later.
3181+
for (AliasSet &AS : AST)
3182+
if (AS.isMustAlias() || AS.isMayAlias())
3183+
if ((is_contained(AS.getPointers(), HPtr) && AS.size() > 1) ||
3184+
HPtr->getNumUses() != 2)
3185+
return;
3186+
3187+
HistogramsDetected++;
3188+
3189+
LLVM_DEBUG(dbgs() << "LAA: Found histogram for load: " << *IndexedLoad
3190+
<< " and store: " << *HSt << "\n");
3191+
3192+
// Store the operations that make up the histogram.
3193+
Histograms.emplace_back(IndexedLoad, HBinOp, HSt);
3194+
// Store pointers used to write those counts in the computed histogram.
3195+
HistogramPtrs.insert(HPtr);
3196+
}
3197+
30883198
bool LoopAccessInfoManager::invalidate(
30893199
Function &F, const PreservedAnalyses &PA,
30903200
FunctionAnalysisManager::Invalidator &Inv) {

llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ class LoadEliminationForLoop {
199199
Instruction *Destination = Dep.getDestination(DepChecker);
200200

201201
if (Dep.Type == MemoryDepChecker::Dependence::Unknown ||
202+
Dep.Type == MemoryDepChecker::Dependence::Histogram ||
202203
Dep.Type == MemoryDepChecker::Dependence::IndirectUnsafe) {
203204
if (isa<LoadInst>(Source))
204205
LoadsWithUnknownDepedence.insert(Source);

0 commit comments

Comments
 (0)