Skip to content

Commit 5405539

Browse files
committed
[LAA] Always use DepCands when grouping runtime checks.
Update groupChecks to always use DepCands to try and merge runtime checks. DepCands contains the dependency partition, grouping together all accessed pointers to he same underlying objects. If we computed the dependencies, We only need to check accesses to the same underlying object, if there is an unknown dependency for this underlying object; otherwise we already proved that all accesses withing the underlying object are safe w.r.t. vectorization and we only need to check that accesses to the underlying object don't overlap with accesses to other underlying objects. To ensure runtime checks are generated for the case with unknown dependencies, remove equivalence classes containing accesses involved in unknown dependencies. This reduces the number of runtime checks needed in case non-constant dependence distances are found, and is in preparation for removing the restriction that the accesses need to have the same stride which was added in llvm#88039.
1 parent 5f73d29 commit 5405539

8 files changed

+133
-78
lines changed

llvm/include/llvm/ADT/EquivalenceClasses.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,13 @@ class EquivalenceClasses {
233233
return findLeader(TheMapping.find(V));
234234
}
235235

236+
/// Erase the class containing \p V.
237+
void eraseClass(const ElemTy &V) {
238+
if (TheMapping.find(V) == TheMapping.end())
239+
return;
240+
TheMapping.erase(V);
241+
}
242+
236243
/// union - Merge the two equivalence sets for the specified values, inserting
237244
/// them if they do not already exist in the equivalence set.
238245
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2) {

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,8 @@ class RuntimePointerChecking {
453453

454454
/// Generate the checks and store it. This also performs the grouping
455455
/// of pointers to reduce the number of memchecks necessary.
456-
void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
457-
bool UseDependencies);
456+
void generateChecks(const MemoryDepChecker &DepChecker,
457+
MemoryDepChecker::DepCandidates &DepCands);
458458

459459
/// Returns the checks that generateChecks created. They can be used to ensure
460460
/// no read/write accesses overlap across all loop iterations.
@@ -521,10 +521,9 @@ class RuntimePointerChecking {
521521
private:
522522
/// Groups pointers such that a single memcheck is required
523523
/// between two different groups. This will clear the CheckingGroups vector
524-
/// and re-compute it. We will only group dependecies if \p UseDependencies
525-
/// is true, otherwise we will create a separate group for each pointer.
526-
void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
527-
bool UseDependencies);
524+
/// and re-compute it.
525+
void groupChecks(const MemoryDepChecker &DepChecker,
526+
MemoryDepChecker::DepCandidates &DepCands);
528527

529528
/// Generate the checks and return them.
530529
SmallVector<RuntimePointerCheck, 4> generateChecks();

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 55 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -385,9 +385,10 @@ SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
385385
}
386386

387387
void RuntimePointerChecking::generateChecks(
388-
MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
388+
const MemoryDepChecker &DepChecker,
389+
MemoryDepChecker::DepCandidates &DepCands) {
389390
assert(Checks.empty() && "Checks is not empty");
390-
groupChecks(DepCands, UseDependencies);
391+
groupChecks(DepChecker, DepCands);
391392
Checks = generateChecks();
392393
}
393394

@@ -454,7 +455,8 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
454455
}
455456

456457
void RuntimePointerChecking::groupChecks(
457-
MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
458+
const MemoryDepChecker &DepChecker,
459+
MemoryDepChecker::DepCandidates &DepCands) {
458460
// We build the groups from dependency candidates equivalence classes
459461
// because:
460462
// - We know that pointers in the same equivalence class share
@@ -490,21 +492,11 @@ void RuntimePointerChecking::groupChecks(
490492
// us to perform an accurate check in this case.
491493
//
492494
// The above case requires that we have an UnknownDependence between
493-
// accesses to the same underlying object. This cannot happen unless
494-
// FoundNonConstantDistanceDependence is set, and therefore UseDependencies
495-
// is also false. In this case we will use the fallback path and create
496-
// separate checking groups for all pointers.
497-
498-
// If we don't have the dependency partitions, construct a new
499-
// checking pointer group for each pointer. This is also required
500-
// for correctness, because in this case we can have checking between
501-
// pointers to the same underlying object.
502-
if (!UseDependencies) {
503-
for (unsigned I = 0; I < Pointers.size(); ++I)
504-
CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
505-
return;
506-
}
507-
495+
// accesses to the same underlying object. It is the caller's responsibility
496+
// to clear any entries for accesses with unknown dependencies from the
497+
// dependency partition (DepCands). This is required for correctness, because
498+
// in this case we can have checking between pointers to the same underlying
499+
// object.
508500
unsigned TotalComparisons = 0;
509501

510502
DenseMap<Value *, SmallVector<unsigned>> PositionMap;
@@ -529,6 +521,13 @@ void RuntimePointerChecking::groupChecks(
529521
MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
530522
Pointers[I].IsWritePtr);
531523

524+
// If there is no entry in the dependency partition, there are no potential
525+
// accesses to merge; simply add a new pointer checking group.
526+
if (DepCands.findValue(Access) == DepCands.end()) {
527+
CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
528+
continue;
529+
}
530+
532531
SmallVector<RuntimeCheckingPtrGroup, 2> Groups;
533532
auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
534533

@@ -700,8 +699,10 @@ class AccessAnalysis {
700699
///
701700
/// Returns true if we need no check or if we do and we can generate them
702701
/// (i.e. the pointers have computable bounds).
703-
bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
704-
Loop *TheLoop, const DenseMap<Value *, const SCEV *> &Strides,
702+
bool canCheckPtrAtRT(const MemoryDepChecker &DepChecker,
703+
RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
704+
Loop *TheLoop,
705+
const DenseMap<Value *, const SCEV *> &Strides,
705706
Value *&UncomputablePtr, bool ShouldCheckWrap = false);
706707

707708
/// Goes over all memory accesses, checks whether a RT check is needed
@@ -717,12 +718,6 @@ class AccessAnalysis {
717718
/// dependency checking (i.e. FoundNonConstantDistanceDependence).
718719
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
719720

720-
/// We decided that no dependence analysis would be used. Reset the state.
721-
void resetDepChecks(MemoryDepChecker &DepChecker) {
722-
CheckDeps.clear();
723-
DepChecker.clearDependences();
724-
}
725-
726721
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
727722

728723
const DenseMap<Value *, SmallVector<const Value *, 16>> &
@@ -1107,7 +1102,8 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
11071102
// The id of the dependence set.
11081103
unsigned DepId;
11091104

1110-
if (isDependencyCheckNeeded()) {
1105+
if (isDependencyCheckNeeded() &&
1106+
DepCands.findValue(Access) != DepCands.end()) {
11111107
Value *Leader = DepCands.getLeaderValue(Access).getPointer();
11121108
unsigned &LeaderId = DepSetId[Leader];
11131109
if (!LeaderId)
@@ -1126,18 +1122,38 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
11261122
return true;
11271123
}
11281124

1129-
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
1130-
ScalarEvolution *SE, Loop *TheLoop,
1131-
const DenseMap<Value *, const SCEV *> &StridesMap,
1132-
Value *&UncomputablePtr, bool ShouldCheckWrap) {
1125+
bool AccessAnalysis::canCheckPtrAtRT(
1126+
const MemoryDepChecker &DepChecker, RuntimePointerChecking &RtCheck,
1127+
ScalarEvolution *SE, Loop *TheLoop,
1128+
const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
1129+
bool ShouldCheckWrap) {
11331130
// Find pointers with computable bounds. We are going to use this information
11341131
// to place a runtime bound check.
11351132
bool CanDoRT = true;
11361133

11371134
bool MayNeedRTCheck = false;
11381135
if (!IsRTCheckAnalysisNeeded) return true;
11391136

1140-
bool IsDepCheckNeeded = isDependencyCheckNeeded();
1137+
if (auto *Deps = DepChecker.getDependences()) {
1138+
// If there are unknown dependences, this means runtime checks are needed to
1139+
// ensure there's no overlap between accesses to the same underlying object.
1140+
// Remove the equivalence classes containing both source and destination
1141+
// accesses from DepCands. This ensures runtime checks will be generated
1142+
// between those accesses and prevents them from being grouped together.
1143+
for (const auto &Dep : *Deps) {
1144+
if (Dep.Type != MemoryDepChecker::Dependence::Unknown) {
1145+
assert(MemoryDepChecker::Dependence::isSafeForVectorization(Dep.Type) ==
1146+
MemoryDepChecker::VectorizationSafetyStatus::Safe &&
1147+
"Should only skip safe dependences");
1148+
continue;
1149+
}
1150+
Instruction *Src = Dep.getSource(DepChecker);
1151+
Instruction *Dst = Dep.getDestination(DepChecker);
1152+
DepCands.eraseClass({getPointerOperand(Src), Src->mayWriteToMemory()});
1153+
DepCands.eraseClass({getPointerOperand(Dst), Dst->mayWriteToMemory()});
1154+
}
1155+
} else
1156+
CheckDeps.clear();
11411157

11421158
// We assign a consecutive id to access from different alias sets.
11431159
// Accesses between different groups doesn't need to be checked.
@@ -1265,7 +1281,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
12651281
}
12661282

12671283
if (MayNeedRTCheck && CanDoRT)
1268-
RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
1284+
RtCheck.generateChecks(DepChecker, DepCands);
12691285

12701286
LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
12711287
<< " pointer comparisons.\n");
@@ -2625,9 +2641,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26252641
// Find pointers with computable bounds. We are going to use this information
26262642
// to place a runtime bound check.
26272643
Value *UncomputablePtr = nullptr;
2628-
bool CanDoRTIfNeeded =
2629-
Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
2630-
SymbolicStrides, UncomputablePtr, false);
2644+
bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2645+
getDepChecker(), *PtrRtChecking, PSE->getSE(), TheLoop, SymbolicStrides,
2646+
UncomputablePtr, false);
26312647
if (!CanDoRTIfNeeded) {
26322648
auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
26332649
recordAnalysis("CantIdentifyArrayBounds", I)
@@ -2651,16 +2667,14 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26512667
if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
26522668
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
26532669

2654-
// Clear the dependency checks. We assume they are not needed.
2655-
Accesses.resetDepChecks(*DepChecker);
2656-
26572670
PtrRtChecking->reset();
26582671
PtrRtChecking->Need = true;
26592672

26602673
auto *SE = PSE->getSE();
26612674
UncomputablePtr = nullptr;
2662-
CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2663-
*PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
2675+
CanDoRTIfNeeded =
2676+
Accesses.canCheckPtrAtRT(getDepChecker(), *PtrRtChecking, SE, TheLoop,
2677+
SymbolicStrides, UncomputablePtr, true);
26642678

26652679
// Check that we found the bounds for the pointer.
26662680
if (!CanDoRTIfNeeded) {

llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ define void @test_indirect_read_loop_also_modifies_pointer_array(ptr noundef %ar
9090
; CHECK-NEXT: loop.2:
9191
; CHECK-NEXT: Memory dependences are safe with run-time checks
9292
; CHECK-NEXT: Dependences:
93+
; CHECK-NEXT: Unknown:
94+
; CHECK-NEXT: %l.1 = load ptr, ptr %gep.iv.1, align 8, !tbaa !0 ->
95+
; CHECK-NEXT: store i64 %l.2, ptr %gep.iv.2, align 8, !tbaa !0
96+
; CHECK-EMPTY:
9397
; CHECK-NEXT: Run-time memory checks:
9498
; CHECK-NEXT: Check 0:
9599
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
@@ -158,6 +162,10 @@ define void @test_indirect_write_loop_also_modifies_pointer_array(ptr noundef %a
158162
; CHECK-NEXT: loop.2:
159163
; CHECK-NEXT: Memory dependences are safe with run-time checks
160164
; CHECK-NEXT: Dependences:
165+
; CHECK-NEXT: Unknown:
166+
; CHECK-NEXT: %l.1 = load ptr, ptr %gep.iv.1, align 8, !tbaa !0 ->
167+
; CHECK-NEXT: store ptr %l.1, ptr %gep.iv.2, align 8, !tbaa !0
168+
; CHECK-EMPTY:
161169
; CHECK-NEXT: Run-time memory checks:
162170
; CHECK-NEXT: Check 0:
163171
; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):

llvm/test/Analysis/LoopAccessAnalysis/multiple-strides-rt-memory-checks.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@
2525
; CHECK: .inner:
2626
; CHECK-NEXT: Memory dependences are safe with run-time checks
2727
; CHECK-NEXT: Dependences:
28+
; CHECK-NEXT: Unknown:
29+
; CHECK-NEXT: %4 = load i32, ptr %1, align 4 ->
30+
; CHECK-NEXT: store i32 %8, ptr %6, align 4
31+
; CHECK-EMPTY:
32+
; CHECK-NEXT: Unknown:
33+
; CHECK-NEXT: %3 = load i32, ptr %2, align 4 ->
34+
; CHECK-NEXT: store i32 %8, ptr %6, align 4
35+
; CHECK-EMPTY:
36+
; CHECK-NEXT: Forward:
37+
; CHECK-NEXT: %7 = load i32, ptr %6, align 4 ->
38+
; CHECK-NEXT: store i32 %8, ptr %6, align 4
39+
; CHECK-EMPTY:
2840
; CHECK-NEXT: Run-time memory checks:
2941
; CHECK: Check 0:
3042
; CHECK: Check 1:

llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ define void @test_distance_positive_independent_via_trip_count(ptr %A) {
1010
; CHECK-NEXT: loop:
1111
; CHECK-NEXT: Memory dependences are safe with run-time checks
1212
; CHECK-NEXT: Dependences:
13+
; CHECK-NEXT: Unknown:
14+
; CHECK-NEXT: %l = load i8, ptr %gep.A, align 1 ->
15+
; CHECK-NEXT: store i32 %ext, ptr %gep.A.400, align 4
16+
; CHECK-EMPTY:
1317
; CHECK-NEXT: Run-time memory checks:
1418
; CHECK-NEXT: Check 0:
1519
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
@@ -55,6 +59,10 @@ define void @test_distance_positive_backwards(ptr %A) {
5559
; CHECK-NEXT: loop:
5660
; CHECK-NEXT: Memory dependences are safe with run-time checks
5761
; CHECK-NEXT: Dependences:
62+
; CHECK-NEXT: Unknown:
63+
; CHECK-NEXT: %l = load i8, ptr %gep.A, align 1 ->
64+
; CHECK-NEXT: store i32 %ext, ptr %gep.A.400, align 4
65+
; CHECK-EMPTY:
5866
; CHECK-NEXT: Run-time memory checks:
5967
; CHECK-NEXT: Check 0:
6068
; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
@@ -98,6 +106,10 @@ define void @test_distance_positive_via_assume(ptr %A, i64 %off) {
98106
; CHECK-NEXT: loop:
99107
; CHECK-NEXT: Memory dependences are safe with run-time checks
100108
; CHECK-NEXT: Dependences:
109+
; CHECK-NEXT: Unknown:
110+
; CHECK-NEXT: %l = load i8, ptr %gep.A, align 1 ->
111+
; CHECK-NEXT: store i32 %ext, ptr %gep.A.400, align 4
112+
; CHECK-EMPTY:
101113
; CHECK-NEXT: Run-time memory checks:
102114
; CHECK-NEXT: Check 0:
103115
; CHECK-NEXT: Comparing group ([[GRP5:0x[0-9a-f]+]]):

llvm/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ target triple = "x86_64-apple-macosx10.10.0"
1313

1414
; CHECK: Memory dependences are safe with run-time checks
1515
; CHECK-NEXT: Dependences:
16+
; CHECK-NEXT: Unknown:
17+
; CHECK-NEXT: %loadA = load i16, ptr %arrayidxA, align 2 ->
18+
; CHECK-NEXT: store i16 %mul1, ptr %arrayidxA2, align 2
19+
; CHECK-EMPTY:
1620
; CHECK-NEXT: Run-time memory checks:
1721
; CHECK-NEXT: 0:
1822
; CHECK-NEXT: Comparing group

0 commit comments

Comments
 (0)