Skip to content

Commit 81d3189

Browse files
[LAA] Keep pointer checks on partial analysis (#139719)
Currently if there's any memory access that AccessAnalysis couldn't analyze then all of the runtime pointer check results are discarded. This patch makes this able to be controlled with the AllowPartial option, which makes it so we generate the runtime check information for those pointers that we could analyze, as transformations may still be able to make use of the partial information. Of the transformations that use LoopAccessAnalysis, only LoopVersioningLICM changes behaviour as a result of this change. This is because the others either: * Check canVectorizeMemory, which will return false when we have partial pointer information as analyzeLoop() will return false. * Examine the dependencies returned by getDepChecker(), which will be empty as we exit analyzeLoop if we have partial pointer information before calling areDepsSafe(), which is what fills in the dependency information.
1 parent 8f352f4 commit 81d3189

14 files changed

+511
-65
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,8 @@ class MemoryDepChecker {
194194
/// of a write access.
195195
LLVM_ABI void addAccess(LoadInst *LI);
196196

197-
/// Check whether the dependencies between the accesses are safe.
197+
/// Check whether the dependencies between the accesses are safe, and records
198+
/// the dependence information in Dependences if so.
198199
///
199200
/// Only checks sets with elements in \p CheckDeps.
200201
LLVM_ABI bool areDepsSafe(const DepCandidates &AccessSets,
@@ -654,7 +655,8 @@ class RuntimePointerChecking {
654655
/// For memory dependences that cannot be determined at compile time, it
655656
/// generates run-time checks to prove independence. This is done by
656657
/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
657-
/// RuntimePointerCheck class.
658+
/// RuntimePointerCheck class. \p AllowPartial determines whether partial checks
659+
/// are generated when not all pointers could be analyzed.
658660
///
659661
/// If pointers can wrap or can't be expressed as affine AddRec expressions by
660662
/// ScalarEvolution, we will generate run-time checks by emitting a
@@ -667,7 +669,8 @@ class LoopAccessInfo {
667669
LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE,
668670
const TargetTransformInfo *TTI,
669671
const TargetLibraryInfo *TLI, AAResults *AA,
670-
DominatorTree *DT, LoopInfo *LI);
672+
DominatorTree *DT, LoopInfo *LI,
673+
bool AllowPartial = false);
671674

672675
/// Return true we can analyze the memory accesses in the loop and there are
673676
/// no memory dependence cycles. Note that for dependences between loads &
@@ -682,6 +685,11 @@ class LoopAccessInfo {
682685
/// not legal to insert them.
683686
bool hasConvergentOp() const { return HasConvergentOp; }
684687

688+
/// Return true if, when runtime pointer checking does not have complete
689+
/// results, it instead has partial results for those memory accesses that
690+
/// could be analyzed.
691+
bool hasAllowPartial() const { return AllowPartial; }
692+
685693
const RuntimePointerChecking *getRuntimePointerChecking() const {
686694
return PtrRtChecking.get();
687695
}
@@ -784,20 +792,30 @@ class LoopAccessInfo {
784792

785793
/// We need to check that all of the pointers in this list are disjoint
786794
/// at runtime. Using std::unique_ptr to make using move ctor simpler.
795+
/// If AllowPartial is true then this list may contain only partial
796+
/// information when we've failed to analyze all the memory accesses in the
797+
/// loop, in which case HasCompletePtrRtChecking will be false.
787798
std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
788799

789-
/// the Memory Dependence Checker which can determine the
800+
/// The Memory Dependence Checker which can determine the
790801
/// loop-independent and loop-carried dependences between memory accesses.
802+
/// This will be empty if we've failed to analyze all the memory access in the
803+
/// loop (i.e. CanVecMem is false).
791804
std::unique_ptr<MemoryDepChecker> DepChecker;
792805

793806
Loop *TheLoop;
794807

808+
/// Determines whether we should generate partial runtime checks when not all
809+
/// memory accesses could be analyzed.
810+
bool AllowPartial;
811+
795812
unsigned NumLoads = 0;
796813
unsigned NumStores = 0;
797814

798815
/// Cache the result of analyzeLoop.
799816
bool CanVecMem = false;
800817
bool HasConvergentOp = false;
818+
bool HasCompletePtrRtChecking = false;
801819

802820
/// Indicator that there are two non vectorizable stores to the same uniform
803821
/// address.
@@ -920,7 +938,7 @@ class LoopAccessInfoManager {
920938
const TargetLibraryInfo *TLI)
921939
: SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
922940

923-
LLVM_ABI const LoopAccessInfo &getInfo(Loop &L);
941+
LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false);
924942

925943
LLVM_ABI void clear();
926944

llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ class raw_ostream;
2020
class LoopAccessInfoPrinterPass
2121
: public PassInfoMixin<LoopAccessInfoPrinterPass> {
2222
raw_ostream &OS;
23+
bool AllowPartial;
2324

2425
public:
25-
explicit LoopAccessInfoPrinterPass(raw_ostream &OS) : OS(OS) {}
26+
explicit LoopAccessInfoPrinterPass(raw_ostream &OS, bool AllowPartial)
27+
: OS(OS), AllowPartial(AllowPartial) {}
2628
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
2729
static bool isRequired() { return true; }
2830
};

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -530,8 +530,10 @@ void RuntimePointerChecking::groupChecks(
530530
// equivalence class, the iteration order is deterministic.
531531
for (auto M : DepCands.members(Access)) {
532532
auto PointerI = PositionMap.find(M.getPointer());
533-
assert(PointerI != PositionMap.end() &&
534-
"pointer in equivalence class not found in PositionMap");
533+
// If we can't find the pointer in PositionMap that means we can't
534+
// generate a memcheck for it.
535+
if (PointerI == PositionMap.end())
536+
continue;
535537
for (unsigned Pointer : PointerI->second) {
536538
bool Merged = false;
537539
// Mark this pointer as seen.
@@ -693,10 +695,13 @@ class AccessAnalysis {
693695
/// non-intersection.
694696
///
695697
/// Returns true if we need no check or if we do and we can generate them
696-
/// (i.e. the pointers have computable bounds).
698+
/// (i.e. the pointers have computable bounds). A return value of false means
699+
/// we couldn't analyze and generate runtime checks for all pointers in the
700+
/// loop, but if \p AllowPartial is set then we will have checks for those
701+
/// pointers we could analyze.
697702
bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
698703
const DenseMap<Value *, const SCEV *> &Strides,
699-
Value *&UncomputablePtr);
704+
Value *&UncomputablePtr, bool AllowPartial);
700705

701706
/// Goes over all memory accesses, checks whether a RT check is needed
702707
/// and builds sets of dependent accesses.
@@ -1174,8 +1179,8 @@ bool AccessAnalysis::createCheckForAccess(
11741179

11751180
bool AccessAnalysis::canCheckPtrAtRT(
11761181
RuntimePointerChecking &RtCheck, Loop *TheLoop,
1177-
const DenseMap<Value *, const SCEV *> &StridesMap,
1178-
Value *&UncomputablePtr) {
1182+
const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
1183+
bool AllowPartial) {
11791184
// Find pointers with computable bounds. We are going to use this information
11801185
// to place a runtime bound check.
11811186
bool CanDoRT = true;
@@ -1268,7 +1273,8 @@ bool AccessAnalysis::canCheckPtrAtRT(
12681273
/*Assume=*/true)) {
12691274
CanDoAliasSetRT = false;
12701275
UncomputablePtr = Access.getPointer();
1271-
break;
1276+
if (!AllowPartial)
1277+
break;
12721278
}
12731279
}
12741280
}
@@ -1308,7 +1314,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
13081314
}
13091315
}
13101316

1311-
if (MayNeedRTCheck && CanDoRT)
1317+
if (MayNeedRTCheck && (CanDoRT || AllowPartial))
13121318
RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
13131319

13141320
LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
@@ -1322,7 +1328,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
13221328
bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
13231329
assert(CanDoRTIfNeeded == (CanDoRT || !MayNeedRTCheck) &&
13241330
"CanDoRTIfNeeded depends on RtCheck.Need");
1325-
if (!CanDoRTIfNeeded)
1331+
if (!CanDoRTIfNeeded && !AllowPartial)
13261332
RtCheck.reset();
13271333
return CanDoRTIfNeeded;
13281334
}
@@ -2592,9 +2598,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
25922598
// Find pointers with computable bounds. We are going to use this information
25932599
// to place a runtime bound check.
25942600
Value *UncomputablePtr = nullptr;
2595-
bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2596-
*PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr);
2597-
if (!CanDoRTIfNeeded) {
2601+
HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
2602+
*PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial);
2603+
if (!HasCompletePtrRtChecking) {
25982604
const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
25992605
recordAnalysis("CantIdentifyArrayBounds", I)
26002606
<< "cannot identify array bounds";
@@ -2622,11 +2628,12 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
26222628
PtrRtChecking->Need = true;
26232629

26242630
UncomputablePtr = nullptr;
2625-
CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2626-
*PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr);
2631+
HasCompletePtrRtChecking =
2632+
Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
2633+
UncomputablePtr, AllowPartial);
26272634

26282635
// Check that we found the bounds for the pointer.
2629-
if (!CanDoRTIfNeeded) {
2636+
if (!HasCompletePtrRtChecking) {
26302637
auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
26312638
recordAnalysis("CantCheckMemDepsAtRunTime", I)
26322639
<< "cannot check memory dependencies at runtime";
@@ -2901,9 +2908,10 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
29012908
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
29022909
const TargetTransformInfo *TTI,
29032910
const TargetLibraryInfo *TLI, AAResults *AA,
2904-
DominatorTree *DT, LoopInfo *LI)
2911+
DominatorTree *DT, LoopInfo *LI,
2912+
bool AllowPartial)
29052913
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2906-
PtrRtChecking(nullptr), TheLoop(L) {
2914+
PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) {
29072915
unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
29082916
if (TTI && !TTI->enableScalableVectorization())
29092917
// Scale the vector width by 2 as rough estimate to also consider
@@ -2952,6 +2960,8 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
29522960

29532961
// List the pair of accesses need run-time checks to prove independence.
29542962
PtrRtChecking->print(OS, Depth);
2963+
if (PtrRtChecking->Need && !HasCompletePtrRtChecking)
2964+
OS.indent(Depth) << "Generated run-time checks are incomplete\n";
29552965
OS << "\n";
29562966

29572967
OS.indent(Depth)
@@ -2971,12 +2981,15 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
29712981
PSE->print(OS, Depth);
29722982
}
29732983

2974-
const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
2984+
const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L,
2985+
bool AllowPartial) {
29752986
const auto &[It, Inserted] = LoopAccessInfoMap.try_emplace(&L);
29762987

2977-
if (Inserted)
2978-
It->second =
2979-
std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI);
2988+
// We need to create the LoopAccessInfo if either we don't already have one,
2989+
// or if it was created with a different value of AllowPartial.
2990+
if (Inserted || It->second->hasAllowPartial() != AllowPartial)
2991+
It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT,
2992+
&LI, AllowPartial);
29802993

29812994
return *It->second;
29822995
}

llvm/lib/Passes/PassRegistry.def

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,6 @@ FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(errs()))
441441
FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(errs()))
442442
FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(errs()))
443443
FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(errs()))
444-
FUNCTION_PASS("print<access-info>", LoopAccessInfoPrinterPass(errs()))
445444
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(errs()))
446445
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(errs()))
447446
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(errs()))
@@ -583,6 +582,16 @@ FUNCTION_PASS_WITH_PARAMS(
583582
return MergedLoadStoreMotionPass(Opts);
584583
},
585584
parseMergedLoadStoreMotionOptions, "no-split-footer-bb;split-footer-bb")
585+
FUNCTION_PASS_WITH_PARAMS(
586+
"print<access-info>", "LoopAccessInfoPrinterPass",
587+
[](bool AllowPartial) {
588+
return LoopAccessInfoPrinterPass(errs(), AllowPartial);
589+
},
590+
[](StringRef Params) {
591+
return PassBuilder::parseSinglePassOption(Params, "allow-partial",
592+
"LoopAccessInfoPrinterPass");
593+
},
594+
"allow-partial")
586595
FUNCTION_PASS_WITH_PARAMS(
587596
"print<da>", "DependenceAnalysisPrinterPass",
588597
[](bool NormalizeResults) {

llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ PreservedAnalyses LoopAccessInfoPrinterPass::run(Function &F,
2828
while (!Worklist.empty()) {
2929
Loop *L = Worklist.pop_back_val();
3030
OS.indent(2) << L->getHeader()->getName() << ":\n";
31-
LAIs.getInfo(*L).print(OS, 4);
31+
LAIs.getInfo(*L, AllowPartial).print(OS, 4);
3232
}
3333
return PreservedAnalyses::all();
3434
}

llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ bool LoopVersioningLICM::legalLoopInstructions() {
368368
IsReadOnlyLoop = true;
369369
using namespace ore;
370370
// Get LoopAccessInfo from current loop via the proxy.
371-
LAI = &LAIs.getInfo(*CurLoop);
371+
LAI = &LAIs.getInfo(*CurLoop, /*AllowPartial=*/true);
372372
// Check LoopAccessInfo for need of runtime check.
373373
if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
374374
LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; RUN: opt -disable-output -passes='print<access-info><allow-partial>,print<access-info>' %s 2>&1 | FileCheck %s --check-prefixes=ALLOW-BEFORE
2+
; RUN: opt -disable-output -passes='print<access-info>,print<access-info><allow-partial>' %s 2>&1 | FileCheck %s --check-prefixes=ALLOW-AFTER
3+
4+
; Check that we get the right results when loop access analysis is run twice,
5+
; once without partial results and once with.
6+
7+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
8+
9+
define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) {
10+
; ALLOW-BEFORE-LABEL: 'gep_loaded_offset'
11+
; ALLOW-BEFORE-NEXT: while.body:
12+
; ALLOW-BEFORE-NEXT: Report: cannot identify array bounds
13+
; ALLOW-BEFORE-NEXT: Dependences:
14+
; ALLOW-BEFORE-NEXT: Run-time memory checks:
15+
; ALLOW-BEFORE-NEXT: Check 0:
16+
; ALLOW-BEFORE-NEXT: Comparing group GRP0:
17+
; ALLOW-BEFORE-NEXT: %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
18+
; ALLOW-BEFORE-NEXT: Against group GRP1:
19+
; ALLOW-BEFORE-NEXT: ptr %r
20+
; ALLOW-BEFORE-NEXT: Grouped accesses:
21+
; ALLOW-BEFORE-NEXT: Group GRP0:
22+
; ALLOW-BEFORE-NEXT: (Low: %p High: (4 + (4 * (zext i32 (-1 + %n)<nsw> to i64))<nuw><nsw> + %p))
23+
; ALLOW-BEFORE-NEXT: Member: {%p,+,4}<nuw><%while.body>
24+
; ALLOW-BEFORE-NEXT: Group GRP1:
25+
; ALLOW-BEFORE-NEXT: (Low: %r High: (8 + %r))
26+
; ALLOW-BEFORE-NEXT: Member: %r
27+
; ALLOW-BEFORE-NEXT: Generated run-time checks are incomplete
28+
; ALLOW-BEFORE-EMPTY:
29+
; ALLOW-BEFORE-NEXT: Non vectorizable stores to invariant address were not found in loop.
30+
; ALLOW-BEFORE-NEXT: SCEV assumptions:
31+
; ALLOW-BEFORE-EMPTY:
32+
; ALLOW-BEFORE-NEXT: Expressions re-written:
33+
;
34+
; ALLOW-BEFORE-LABEL: 'gep_loaded_offset'
35+
; ALLOW-BEFORE-NEXT: while.body:
36+
; ALLOW-BEFORE-NEXT: Report: cannot identify array bounds
37+
; ALLOW-BEFORE-NEXT: Dependences:
38+
; ALLOW-BEFORE-NEXT: Run-time memory checks:
39+
; ALLOW-BEFORE-NEXT: Grouped accesses:
40+
; ALLOW-BEFORE-EMPTY:
41+
; ALLOW-BEFORE-NEXT: Non vectorizable stores to invariant address were not found in loop.
42+
; ALLOW-BEFORE-NEXT: SCEV assumptions:
43+
; ALLOW-BEFORE-EMPTY:
44+
; ALLOW-BEFORE-NEXT: Expressions re-written:
45+
;
46+
; ALLOW-AFTER-LABEL: 'gep_loaded_offset'
47+
; ALLOW-AFTER-NEXT: while.body:
48+
; ALLOW-AFTER-NEXT: Report: cannot identify array bounds
49+
; ALLOW-AFTER-NEXT: Dependences:
50+
; ALLOW-AFTER-NEXT: Run-time memory checks:
51+
; ALLOW-AFTER-NEXT: Grouped accesses:
52+
; ALLOW-AFTER-EMPTY:
53+
; ALLOW-AFTER-NEXT: Non vectorizable stores to invariant address were not found in loop.
54+
; ALLOW-AFTER-NEXT: SCEV assumptions:
55+
; ALLOW-AFTER-EMPTY:
56+
; ALLOW-AFTER-NEXT: Expressions re-written:
57+
;
58+
; ALLOW-AFTER-LABEL: 'gep_loaded_offset'
59+
; ALLOW-AFTER-NEXT: while.body:
60+
; ALLOW-AFTER-NEXT: Report: cannot identify array bounds
61+
; ALLOW-AFTER-NEXT: Dependences:
62+
; ALLOW-AFTER-NEXT: Run-time memory checks:
63+
; ALLOW-AFTER-NEXT: Check 0:
64+
; ALLOW-AFTER-NEXT: Comparing group GRP0:
65+
; ALLOW-AFTER-NEXT: %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
66+
; ALLOW-AFTER-NEXT: Against group GRP1:
67+
; ALLOW-AFTER-NEXT: ptr %r
68+
; ALLOW-AFTER-NEXT: Grouped accesses:
69+
; ALLOW-AFTER-NEXT: Group GRP0:
70+
; ALLOW-AFTER-NEXT: (Low: %p High: (4 + (4 * (zext i32 (-1 + %n)<nsw> to i64))<nuw><nsw> + %p))
71+
; ALLOW-AFTER-NEXT: Member: {%p,+,4}<nuw><%while.body>
72+
; ALLOW-AFTER-NEXT: Group GRP1:
73+
; ALLOW-AFTER-NEXT: (Low: %r High: (8 + %r))
74+
; ALLOW-AFTER-NEXT: Member: %r
75+
; ALLOW-AFTER-NEXT: Generated run-time checks are incomplete
76+
; ALLOW-AFTER-EMPTY:
77+
; ALLOW-AFTER-NEXT: Non vectorizable stores to invariant address were not found in loop.
78+
; ALLOW-AFTER-NEXT: SCEV assumptions:
79+
; ALLOW-AFTER-EMPTY:
80+
; ALLOW-AFTER-NEXT: Expressions re-written:
81+
;
82+
entry:
83+
br label %while.body
84+
85+
while.body:
86+
%n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ]
87+
%p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ]
88+
%dec = add nsw i32 %n.addr, -1
89+
%rval = load i64, ptr %r, align 4
90+
%arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval
91+
%val = load i32, ptr %arrayidx, align 4
92+
%incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4
93+
store i32 %val, ptr %p.addr, align 4
94+
%tobool.not = icmp eq i32 %dec, 0
95+
br i1 %tobool.not, label %while.end, label %while.body
96+
97+
while.end:
98+
ret void
99+
}

0 commit comments

Comments
 (0)