Skip to content

Commit 1a61c4f

Browse files
committed
[LAA] Perform checks for no-wrap separately from getPtrStride.
Reorganize the code in isNoWrap to perform the no-wrap checks without relying on getPtrStride directly. getPtrStride now uses isNoWrap. The new structure allows deriving no-wrap in more cases in LAA, because there are some cases where getPtrStride bails out early because it cannot return a constant stride, but we can still prove no-wrap for the pointer. An example are AddRecs with non-ConstantInt strides with inbound GEPs, in the improved test cases. This enables vectorization with runtime checks in a few more cases.
1 parent 424fcc5 commit 1a61c4f

File tree

2 files changed

+134
-44
lines changed

2 files changed

+134
-44
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -855,16 +855,60 @@ getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
855855
return Stride;
856856
}
857857

858+
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
859+
PredicatedScalarEvolution &PSE, const Loop *L);
860+
858861
/// Check whether a pointer address cannot wrap.
859862
static bool isNoWrap(PredicatedScalarEvolution &PSE,
860863
const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr,
861-
Type *AccessTy, Loop *L, bool Assume) {
862-
const SCEV *PtrScev = PSE.getSCEV(Ptr);
864+
Type *AccessTy, const Loop *L, bool Assume,
865+
std::optional<int64_t> Stride = std::nullopt) {
866+
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
863867
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
864868
return true;
865869

866-
return getPtrStride(PSE, AccessTy, Ptr, L, Strides, Assume).has_value() ||
867-
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
870+
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
871+
if (Assume && !AR)
872+
AR = PSE.getAsAddRec(Ptr);
873+
if (!AR)
874+
return false;
875+
876+
// The address calculation must not wrap. Otherwise, a dependence could be
877+
// inverted.
878+
if (isNoWrapAddRec(Ptr, AR, PSE, L))
879+
return true;
880+
881+
// An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
882+
// the distance between the previously accessed location and the wrapped
883+
// location will be larger than half the pointer index type space. In that
884+
// case, the GEP would be poison and any memory access dependent on it would
885+
// be immediate UB when executed.
886+
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
887+
GEP && GEP->hasNoUnsignedSignedWrap())
888+
return true;
889+
890+
// If the null pointer is undefined, then a access sequence which would
891+
// otherwise access it can be assumed not to unsigned wrap. Note that this
892+
// assumes the object in memory is aligned to the natural alignment.
893+
if (!Stride)
894+
Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
895+
if (Stride) {
896+
unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
897+
if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
898+
(*Stride == 1 || *Stride == -1))
899+
return true;
900+
}
901+
902+
if (Assume) {
903+
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
904+
LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
905+
<< "LAA: Pointer: " << *Ptr << "\n"
906+
<< "LAA: SCEV: " << *AR << "\n"
907+
<< "LAA: Added an overflow assumption\n");
908+
return true;
909+
}
910+
911+
return PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
868912
}
869913

870914
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
@@ -1505,36 +1549,9 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
15051549
if (!ShouldCheckWrap || !Stride)
15061550
return Stride;
15071551

1508-
// The address calculation must not wrap. Otherwise, a dependence could be
1509-
// inverted.
1510-
if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
1552+
if (isNoWrap(PSE, StridesMap, Ptr, AccessTy, Lp, Assume, Stride))
15111553
return Stride;
15121554

1513-
// An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
1514-
// the distance between the previously accessed location and the wrapped
1515-
// location will be larger than half the pointer index type space. In that
1516-
// case, the GEP would be poison and any memory access dependent on it would
1517-
// be immediate UB when executed.
1518-
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1519-
GEP && GEP->hasNoUnsignedSignedWrap())
1520-
return Stride;
1521-
1522-
// If the null pointer is undefined, then a access sequence which would
1523-
// otherwise access it can be assumed not to unsigned wrap. Note that this
1524-
// assumes the object in memory is aligned to the natural alignment.
1525-
unsigned AddrSpace = Ty->getPointerAddressSpace();
1526-
if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
1527-
(Stride == 1 || Stride == -1))
1528-
return Stride;
1529-
1530-
if (Assume) {
1531-
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1532-
LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
1533-
<< "LAA: Pointer: " << *Ptr << "\n"
1534-
<< "LAA: SCEV: " << *AR << "\n"
1535-
<< "LAA: Added an overflow assumption\n");
1536-
return Stride;
1537-
}
15381555
LLVM_DEBUG(
15391556
dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
15401557
<< *Ptr << " SCEV: " << *AR << "\n");

llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll

Lines changed: 85 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,38 @@ exit:
6565
define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step5(ptr %a, ptr %b, i64 %offset, i64 %n) {
6666
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step5'
6767
; CHECK-NEXT: loop:
68-
; CHECK-NEXT: Report: cannot check memory dependencies at runtime
68+
; CHECK-NEXT: Memory dependences are safe with run-time checks
6969
; CHECK-NEXT: Dependences:
7070
; CHECK-NEXT: Run-time memory checks:
71+
; CHECK-NEXT: Check 0:
72+
; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]):
73+
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
74+
; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]):
75+
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
76+
; CHECK-NEXT: Check 1:
77+
; CHECK-NEXT: Comparing group ([[GRP4]]):
78+
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
79+
; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
80+
; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2
81+
; CHECK-NEXT: Check 2:
82+
; CHECK-NEXT: Comparing group ([[GRP5]]):
83+
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
84+
; CHECK-NEXT: Against group ([[GRP6]]):
85+
; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2
7186
; CHECK-NEXT: Grouped accesses:
87+
; CHECK-NEXT: Group [[GRP4]]:
88+
; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
89+
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
90+
; CHECK-NEXT: Group [[GRP5]]:
91+
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
92+
; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
93+
; CHECK-NEXT: Group [[GRP6]]:
94+
; CHECK-NEXT: (Low: %b High: (-1 + (5 * %n) + %b))
95+
; CHECK-NEXT: Member: {%b,+,5}<%loop>
7296
; CHECK-EMPTY:
7397
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
7498
; CHECK-NEXT: SCEV assumptions:
99+
; CHECK-NEXT: {%b,+,5}<%loop> Added Flags: <nusw>
75100
; CHECK-EMPTY:
76101
; CHECK-NEXT: Expressions re-written:
77102
;
@@ -102,10 +127,34 @@ exit:
102127
define void @dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_step_not_constant(ptr %a, ptr %b, i64 %offset, i64 %n, i64 %s) {
103128
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_step_not_constant'
104129
; CHECK-NEXT: loop:
105-
; CHECK-NEXT: Report: cannot check memory dependencies at runtime
130+
; CHECK-NEXT: Memory dependences are safe with run-time checks
106131
; CHECK-NEXT: Dependences:
107132
; CHECK-NEXT: Run-time memory checks:
133+
; CHECK-NEXT: Check 0:
134+
; CHECK-NEXT: Comparing group ([[GRP7:0x[0-9a-f]+]]):
135+
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
136+
; CHECK-NEXT: Against group ([[GRP8:0x[0-9a-f]+]]):
137+
; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
138+
; CHECK-NEXT: Check 1:
139+
; CHECK-NEXT: Comparing group ([[GRP7]]):
140+
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
141+
; CHECK-NEXT: Against group ([[GRP9:0x[0-9a-f]+]]):
142+
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
143+
; CHECK-NEXT: Check 2:
144+
; CHECK-NEXT: Comparing group ([[GRP8]]):
145+
; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
146+
; CHECK-NEXT: Against group ([[GRP9]]):
147+
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
108148
; CHECK-NEXT: Grouped accesses:
149+
; CHECK-NEXT: Group [[GRP7]]:
150+
; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
151+
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
152+
; CHECK-NEXT: Group [[GRP8]]:
153+
; CHECK-NEXT: (Low: %b High: (3 + %n + %b))
154+
; CHECK-NEXT: Member: {%b,+,1}<%loop>
155+
; CHECK-NEXT: Group [[GRP9]]:
156+
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
157+
; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
109158
; CHECK-EMPTY:
110159
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
111160
; CHECK-NEXT: SCEV assumptions:
@@ -144,10 +193,34 @@ exit:
144193
define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step_not_constant(ptr %a, ptr %b, i64 %offset, i64 %n, i64 %s) {
145194
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step_not_constant'
146195
; CHECK-NEXT: loop:
147-
; CHECK-NEXT: Report: cannot check memory dependencies at runtime
196+
; CHECK-NEXT: Memory dependences are safe with run-time checks
148197
; CHECK-NEXT: Dependences:
149198
; CHECK-NEXT: Run-time memory checks:
199+
; CHECK-NEXT: Check 0:
200+
; CHECK-NEXT: Comparing group ([[GRP10:0x[0-9a-f]+]]):
201+
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
202+
; CHECK-NEXT: Against group ([[GRP11:0x[0-9a-f]+]]):
203+
; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
204+
; CHECK-NEXT: Check 1:
205+
; CHECK-NEXT: Comparing group ([[GRP10]]):
206+
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
207+
; CHECK-NEXT: Against group ([[GRP12:0x[0-9a-f]+]]):
208+
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
209+
; CHECK-NEXT: Check 2:
210+
; CHECK-NEXT: Comparing group ([[GRP11]]):
211+
; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
212+
; CHECK-NEXT: Against group ([[GRP12]]):
213+
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
150214
; CHECK-NEXT: Grouped accesses:
215+
; CHECK-NEXT: Group [[GRP10]]:
216+
; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
217+
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
218+
; CHECK-NEXT: Group [[GRP11]]:
219+
; CHECK-NEXT: (Low: %b High: (3 + %n + %b))
220+
; CHECK-NEXT: Member: {%b,+,1}<%loop>
221+
; CHECK-NEXT: Group [[GRP12]]:
222+
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
223+
; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
151224
; CHECK-EMPTY:
152225
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
153226
; CHECK-NEXT: SCEV assumptions:
@@ -189,28 +262,28 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt
189262
; CHECK-NEXT: Dependences:
190263
; CHECK-NEXT: Run-time memory checks:
191264
; CHECK-NEXT: Check 0:
192-
; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]):
265+
; CHECK-NEXT: Comparing group ([[GRP13:0x[0-9a-f]+]]):
193266
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
194-
; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]):
267+
; CHECK-NEXT: Against group ([[GRP14:0x[0-9a-f]+]]):
195268
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
196269
; CHECK-NEXT: Check 1:
197-
; CHECK-NEXT: Comparing group ([[GRP4]]):
270+
; CHECK-NEXT: Comparing group ([[GRP13]]):
198271
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
199-
; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
272+
; CHECK-NEXT: Against group ([[GRP15:0x[0-9a-f]+]]):
200273
; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2
201274
; CHECK-NEXT: Check 2:
202-
; CHECK-NEXT: Comparing group ([[GRP5]]):
275+
; CHECK-NEXT: Comparing group ([[GRP14]]):
203276
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
204-
; CHECK-NEXT: Against group ([[GRP6]]):
277+
; CHECK-NEXT: Against group ([[GRP15]]):
205278
; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2
206279
; CHECK-NEXT: Grouped accesses:
207-
; CHECK-NEXT: Group [[GRP4]]:
280+
; CHECK-NEXT: Group [[GRP13]]:
208281
; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
209282
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
210-
; CHECK-NEXT: Group [[GRP5]]:
283+
; CHECK-NEXT: Group [[GRP14]]:
211284
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
212285
; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
213-
; CHECK-NEXT: Group [[GRP6]]:
286+
; CHECK-NEXT: Group [[GRP15]]:
214287
; CHECK-NEXT: (Low: %b High: (-4 + (8 * %n) + %b))
215288
; CHECK-NEXT: Member: {%b,+,8}<%loop>
216289
; CHECK-EMPTY:

0 commit comments

Comments
 (0)