@@ -385,9 +385,10 @@ SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
385
385
}
386
386
387
387
void RuntimePointerChecking::generateChecks (
388
- MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
388
+ const MemoryDepChecker &DepChecker,
389
+ MemoryDepChecker::DepCandidates &DepCands) {
389
390
assert (Checks.empty () && " Checks is not empty" );
390
- groupChecks (DepCands, UseDependencies );
391
+ groupChecks (DepChecker, DepCands );
391
392
Checks = generateChecks ();
392
393
}
393
394
@@ -454,7 +455,8 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
454
455
}
455
456
456
457
void RuntimePointerChecking::groupChecks (
457
- MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
458
+ const MemoryDepChecker &DepChecker,
459
+ MemoryDepChecker::DepCandidates &DepCands) {
458
460
// We build the groups from dependency candidates equivalence classes
459
461
// because:
460
462
// - We know that pointers in the same equivalence class share
@@ -490,21 +492,11 @@ void RuntimePointerChecking::groupChecks(
490
492
// us to perform an accurate check in this case.
491
493
//
492
494
// The above case requires that we have an UnknownDependence between
493
- // accesses to the same underlying object. This cannot happen unless
494
- // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
495
- // is also false. In this case we will use the fallback path and create
496
- // separate checking groups for all pointers.
497
-
498
- // If we don't have the dependency partitions, construct a new
499
- // checking pointer group for each pointer. This is also required
500
- // for correctness, because in this case we can have checking between
501
- // pointers to the same underlying object.
502
- if (!UseDependencies) {
503
- for (unsigned I = 0 ; I < Pointers.size (); ++I)
504
- CheckingGroups.push_back (RuntimeCheckingPtrGroup (I, *this ));
505
- return ;
506
- }
507
-
495
+ // accesses to the same underlying object. It is the caller's responsibility
496
+ // to clear any entries for accesses with unknown dependencies from the
497
+ // dependency partition (DepCands). This is required for correctness, because
498
+ // in this case we can have checking between pointers to the same underlying
499
+ // object.
508
500
unsigned TotalComparisons = 0 ;
509
501
510
502
DenseMap<Value *, SmallVector<unsigned >> PositionMap;
@@ -529,6 +521,13 @@ void RuntimePointerChecking::groupChecks(
529
521
MemoryDepChecker::MemAccessInfo Access (Pointers[I].PointerValue ,
530
522
Pointers[I].IsWritePtr );
531
523
524
+ // If there is no entry in the dependency partition, there are no potential
525
+ // accesses to merge; simply add a new pointer checking group.
526
+ if (DepCands.findValue (Access) == DepCands.end ()) {
527
+ CheckingGroups.push_back (RuntimeCheckingPtrGroup (I, *this ));
528
+ continue ;
529
+ }
530
+
532
531
SmallVector<RuntimeCheckingPtrGroup, 2 > Groups;
533
532
auto LeaderI = DepCands.findValue (DepCands.getLeaderValue (Access));
534
533
@@ -700,8 +699,10 @@ class AccessAnalysis {
700
699
// /
701
700
// / Returns true if we need no check or if we do and we can generate them
702
701
// / (i.e. the pointers have computable bounds).
703
- bool canCheckPtrAtRT (RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
704
- Loop *TheLoop, const DenseMap<Value *, const SCEV *> &Strides,
702
+ bool canCheckPtrAtRT (const MemoryDepChecker &DepChecker,
703
+ RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
704
+ Loop *TheLoop,
705
+ const DenseMap<Value *, const SCEV *> &Strides,
705
706
Value *&UncomputablePtr, bool ShouldCheckWrap = false );
706
707
707
708
// / Goes over all memory accesses, checks whether a RT check is needed
@@ -717,12 +718,6 @@ class AccessAnalysis {
717
718
// / dependency checking (i.e. FoundNonConstantDistanceDependence).
718
719
bool isDependencyCheckNeeded () { return !CheckDeps.empty (); }
719
720
720
- // / We decided that no dependence analysis would be used. Reset the state.
721
- void resetDepChecks (MemoryDepChecker &DepChecker) {
722
- CheckDeps.clear ();
723
- DepChecker.clearDependences ();
724
- }
725
-
726
721
MemAccessInfoList &getDependenciesToCheck () { return CheckDeps; }
727
722
728
723
const DenseMap<Value *, SmallVector<const Value *, 16 >> &
@@ -1107,7 +1102,8 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
1107
1102
// The id of the dependence set.
1108
1103
unsigned DepId;
1109
1104
1110
- if (isDependencyCheckNeeded ()) {
1105
+ if (isDependencyCheckNeeded () &&
1106
+ DepCands.findValue (Access) != DepCands.end ()) {
1111
1107
Value *Leader = DepCands.getLeaderValue (Access).getPointer ();
1112
1108
unsigned &LeaderId = DepSetId[Leader];
1113
1109
if (!LeaderId)
@@ -1126,18 +1122,38 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
1126
1122
return true ;
1127
1123
}
1128
1124
1129
- bool AccessAnalysis::canCheckPtrAtRT (RuntimePointerChecking &RtCheck,
1130
- ScalarEvolution *SE, Loop *TheLoop,
1131
- const DenseMap<Value *, const SCEV *> &StridesMap,
1132
- Value *&UncomputablePtr, bool ShouldCheckWrap) {
1125
+ bool AccessAnalysis::canCheckPtrAtRT (
1126
+ const MemoryDepChecker &DepChecker, RuntimePointerChecking &RtCheck,
1127
+ ScalarEvolution *SE, Loop *TheLoop,
1128
+ const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
1129
+ bool ShouldCheckWrap) {
1133
1130
// Find pointers with computable bounds. We are going to use this information
1134
1131
// to place a runtime bound check.
1135
1132
bool CanDoRT = true ;
1136
1133
1137
1134
bool MayNeedRTCheck = false ;
1138
1135
if (!IsRTCheckAnalysisNeeded) return true ;
1139
1136
1140
- bool IsDepCheckNeeded = isDependencyCheckNeeded ();
1137
+ if (auto *Deps = DepChecker.getDependences ()) {
1138
+ // If there are unknown dependences, this means runtime checks are needed to
1139
+ // ensure there's no overlap between accesses to the same underlying object.
1140
+ // Remove the equivalence classes containing both source and destination
1141
+ // accesses from DepCands. This ensures runtime checks will be generated
1142
+ // between those accesses and prevents them from being grouped together.
1143
+ for (const auto &Dep : *Deps) {
1144
+ if (Dep.Type != MemoryDepChecker::Dependence::Unknown) {
1145
+ assert (MemoryDepChecker::Dependence::isSafeForVectorization (Dep.Type ) ==
1146
+ MemoryDepChecker::VectorizationSafetyStatus::Safe &&
1147
+ " Should only skip safe dependences" );
1148
+ continue ;
1149
+ }
1150
+ Instruction *Src = Dep.getSource (DepChecker);
1151
+ Instruction *Dst = Dep.getDestination (DepChecker);
1152
+ DepCands.eraseClass ({getPointerOperand (Src), Src->mayWriteToMemory ()});
1153
+ DepCands.eraseClass ({getPointerOperand (Dst), Dst->mayWriteToMemory ()});
1154
+ }
1155
+ } else
1156
+ CheckDeps.clear ();
1141
1157
1142
1158
// We assign a consecutive id to access from different alias sets.
1143
1159
// Accesses between different groups doesn't need to be checked.
@@ -1265,7 +1281,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
1265
1281
}
1266
1282
1267
1283
if (MayNeedRTCheck && CanDoRT)
1268
- RtCheck.generateChecks (DepCands, IsDepCheckNeeded );
1284
+ RtCheck.generateChecks (DepChecker, DepCands );
1269
1285
1270
1286
LLVM_DEBUG (dbgs () << " LAA: We need to do " << RtCheck.getNumberOfChecks ()
1271
1287
<< " pointer comparisons.\n " );
@@ -2625,9 +2641,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2625
2641
// Find pointers with computable bounds. We are going to use this information
2626
2642
// to place a runtime bound check.
2627
2643
Value *UncomputablePtr = nullptr ;
2628
- bool CanDoRTIfNeeded =
2629
- Accesses. canCheckPtrAtRT ( *PtrRtChecking, PSE->getSE (), TheLoop,
2630
- SymbolicStrides, UncomputablePtr, false );
2644
+ bool CanDoRTIfNeeded = Accesses. canCheckPtrAtRT (
2645
+ getDepChecker (), *PtrRtChecking, PSE->getSE (), TheLoop, SymbolicStrides ,
2646
+ UncomputablePtr, false );
2631
2647
if (!CanDoRTIfNeeded) {
2632
2648
auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2633
2649
recordAnalysis (" CantIdentifyArrayBounds" , I)
@@ -2651,16 +2667,14 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2651
2667
if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck ()) {
2652
2668
LLVM_DEBUG (dbgs () << " LAA: Retrying with memory checks\n " );
2653
2669
2654
- // Clear the dependency checks. We assume they are not needed.
2655
- Accesses.resetDepChecks (*DepChecker);
2656
-
2657
2670
PtrRtChecking->reset ();
2658
2671
PtrRtChecking->Need = true ;
2659
2672
2660
2673
auto *SE = PSE->getSE ();
2661
2674
UncomputablePtr = nullptr ;
2662
- CanDoRTIfNeeded = Accesses.canCheckPtrAtRT (
2663
- *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true );
2675
+ CanDoRTIfNeeded =
2676
+ Accesses.canCheckPtrAtRT (getDepChecker (), *PtrRtChecking, SE, TheLoop,
2677
+ SymbolicStrides, UncomputablePtr, true );
2664
2678
2665
2679
// Check that we found the bounds for the pointer.
2666
2680
if (!CanDoRTIfNeeded) {
0 commit comments