Skip to content

Commit 0d71291

Browse files
committed
Add new interface, switch for histogram support
Adds a separate interface to distinguish between the existing semantics of canVectorizeMemory(), and the ability to do so if the transform pass can handle histograms. Also adds a flag to enable histogram support in LoopVecLegality, defaulting to off for now.
1 parent 5a986db commit 0d71291

File tree

4 files changed

+37
-20
lines changed

4 files changed

+37
-20
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,13 @@ class RuntimePointerChecking {
639639
/// Checks for both memory dependences and the SCEV predicates contained in the
640640
/// PSE must be emitted in order for the results of this analysis to be valid.
641641
class LoopAccessInfo {
642+
/// Represents whether the memory access dependencies in the loop:
643+
/// * Prohibit vectorization
644+
/// * Allow for vectorization (possibly with runtime checks)
645+
/// * Allow for vectorization (possibly with runtime checks),
646+
/// as long as histogram operations are supported.
647+
enum VecMemPossible { CantVec = 0, NormalVec = 1, HistogramVec = 2 };
648+
642649
public:
643650
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
644651
const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
@@ -650,7 +657,11 @@ class LoopAccessInfo {
650657
/// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
651658
/// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
652659
/// checked.
653-
bool canVectorizeMemory() const { return CanVecMem; }
660+
bool canVectorizeMemory() const { return CanVecMem == NormalVec; }
661+
662+
bool canVectorizeMemoryWithHistogram() const {
663+
return CanVecMem == NormalVec || CanVecMem == HistogramVec;
664+
}
654665

655666
/// Return true if there is a convergent operation in the loop. There may
656667
/// still be reported runtime pointer checks that would be required, but it is
@@ -733,8 +744,8 @@ class LoopAccessInfo {
733744
private:
734745
/// Analyze the loop. Returns true if all memory access in the loop can be
735746
/// vectorized.
736-
bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI,
737-
DominatorTree *DT);
747+
VecMemPossible analyzeLoop(AAResults *AA, LoopInfo *LI,
748+
const TargetLibraryInfo *TLI, DominatorTree *DT);
738749

739750
/// Check if the structure of the loop allows it to be analyzed by this
740751
/// pass.
@@ -775,7 +786,7 @@ class LoopAccessInfo {
775786
unsigned NumStores = 0;
776787

777788
/// Cache the result of analyzeLoop.
778-
bool CanVecMem = false;
789+
VecMemPossible CanVecMem = CantVec;
779790
bool HasConvergentOp = false;
780791

781792
/// Indicator that there are two non vectorizable stores to the same uniform

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,9 +2426,9 @@ bool LoopAccessInfo::canAnalyzeLoop() {
24262426
return true;
24272427
}
24282428

2429-
bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2430-
const TargetLibraryInfo *TLI,
2431-
DominatorTree *DT) {
2429+
LoopAccessInfo::VecMemPossible
2430+
LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2431+
const TargetLibraryInfo *TLI, DominatorTree *DT) {
24322432
// Holds the Load and Store instructions.
24332433
SmallVector<LoadInst *, 16> Loads;
24342434
SmallVector<StoreInst *, 16> Stores;
@@ -2468,7 +2468,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
24682468
// With both a non-vectorizable memory instruction and a convergent
24692469
// operation, found in this loop, no reason to continue the search.
24702470
if (HasComplexMemInst && HasConvergentOp)
2471-
return false;
2471+
return CantVec;
24722472

24732473
// Avoid hitting recordAnalysis multiple times.
24742474
if (HasComplexMemInst)
@@ -2544,7 +2544,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
25442544
} // Next block.
25452545

25462546
if (HasComplexMemInst)
2547-
return false;
2547+
return CantVec;
25482548

25492549
// Now we have two lists that hold the loads and the stores.
25502550
// Next, we find the pointers that they use.
@@ -2553,7 +2553,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
25532553
// care if the pointers are *restrict*.
25542554
if (!Stores.size()) {
25552555
LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2556-
return true;
2556+
return NormalVec;
25572557
}
25582558

25592559
MemoryDepChecker::DepCandidates DependentAccesses;
@@ -2606,7 +2606,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26062606
LLVM_DEBUG(
26072607
dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
26082608
<< "checks.\n");
2609-
return true;
2609+
return NormalVec;
26102610
}
26112611

26122612
for (LoadInst *LD : Loads) {
@@ -2653,7 +2653,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26532653
// other reads in this loop then is it safe to vectorize.
26542654
if (NumReadWrites == 1 && NumReads == 0) {
26552655
LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2656-
return true;
2656+
return NormalVec;
26572657
}
26582658

26592659
// Build dependence sets and check whether we need a runtime pointer bounds
@@ -2675,7 +2675,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26752675
<< "cannot identify array bounds";
26762676
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
26772677
<< "the array bounds.\n");
2678-
return false;
2678+
return CantVec;
26792679
}
26802680

26812681
LLVM_DEBUG(
@@ -2708,7 +2708,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
27082708
recordAnalysis("CantCheckMemDepsAtRunTime", I)
27092709
<< "cannot check memory dependencies at runtime";
27102710
LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2711-
return false;
2711+
return CantVec;
27122712
}
27132713
DepsAreSafe = true;
27142714
}
@@ -2719,19 +2719,19 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
27192719
<< "cannot add control dependency to convergent operation";
27202720
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
27212721
"would be needed with a convergent operation\n");
2722-
return false;
2722+
return CantVec;
27232723
}
27242724

27252725
if (DepsAreSafe) {
27262726
LLVM_DEBUG(
27272727
dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
27282728
<< (PtrRtChecking->Need ? "" : " don't")
27292729
<< " need runtime memory checks.\n");
2730-
return true;
2730+
return Histograms.empty() ? NormalVec : HistogramVec;
27312731
}
27322732

27332733
emitUnsafeDependenceRemark();
2734-
return false;
2734+
return CantVec;
27352735
}
27362736

27372737
void LoopAccessInfo::emitUnsafeDependenceRemark() {
@@ -3065,7 +3065,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
30653065
}
30663066

30673067
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
3068-
if (CanVecMem) {
3068+
if (CanVecMem != CantVec) {
30693069
OS.indent(Depth) << "Memory dependences are safe";
30703070
const MemoryDepChecker &DC = getDepChecker();
30713071
if (!DC.isSafeForAnyVectorWidth())

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ static cl::opt<LoopVectorizeHints::ScalableForceKind>
7878
"Scalable vectorization is available and favored when the "
7979
"cost is inconclusive.")));
8080

81+
static cl::opt<bool> EnableHistogramVectorization(
82+
"enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
83+
cl::desc("Enables autovectorization of some loops containing histograms"));
84+
8185
/// Maximum vectorization interleave count.
8286
static const unsigned MaxInterleaveFactor = 16;
8387

@@ -1065,7 +1069,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
10651069
}
10661070

10671071
if (!LAI->canVectorizeMemory())
1068-
return false;
1072+
if (!EnableHistogramVectorization ||
1073+
!LAI->canVectorizeMemoryWithHistogram())
1074+
return false;
10691075

10701076
if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
10711077
reportVectorizationFailure("We don't allow storing to uniform addresses",

llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2-
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -sve-gather-overhead=2 -sve-scatter-overhead=2 -S | FileCheck %s
2+
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -S | FileCheck %s
33

44
target triple = "aarch64-unknown-linux-gnu"
55

0 commit comments

Comments
 (0)