Skip to content

Commit f719cfa

Browse files
authored
LAA: be less conservative in isNoWrap (#112553)
isNoWrap has exactly one caller which handles Assume = true separately, but too conservatively. Instead, pass Assume to isNoWrap, so it is threaded into getPtrStride, which has the correct handling for the Assume flag. Also note that the Stride == 1 check in isNoWrap is incorrect: getPtrStride returns Strides == 1 or -1, except when isNoWrapAddRec or Assume are true, assuming ShouldCheckWrap is true; we can include the case of -1 Stride, and when isNoWrapAddRec is true. With this change, passing Assume = true to getPtrStride could return a non-unit stride, and we correctly handle that case as well.
1 parent d15559d commit f719cfa

File tree

4 files changed

+13
-78
lines changed

4 files changed

+13
-78
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -825,14 +825,13 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
825825

826826
/// Check whether a pointer address cannot wrap.
827827
static bool isNoWrap(PredicatedScalarEvolution &PSE,
828-
const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr, Type *AccessTy,
829-
Loop *L) {
828+
const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr,
829+
Type *AccessTy, Loop *L, bool Assume) {
830830
const SCEV *PtrScev = PSE.getSCEV(Ptr);
831831
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
832832
return true;
833833

834-
int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides).value_or(0);
835-
return Stride == 1 ||
834+
return getPtrStride(PSE, AccessTy, Ptr, L, Strides, Assume).has_value() ||
836835
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
837836
}
838837

@@ -1079,12 +1078,8 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
10791078
if (TranslatedPtrs.size() > 1)
10801079
return false;
10811080

1082-
if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
1083-
const SCEV *Expr = PSE.getSCEV(Ptr);
1084-
if (!Assume || !isa<SCEVAddRecExpr>(Expr))
1085-
return false;
1086-
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1087-
}
1081+
if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop, Assume))
1082+
return false;
10881083
}
10891084
// If there's only one option for Ptr, look it up after bounds and wrap
10901085
// checking, because assumptions might have been added to PSE.

llvm/test/Analysis/LoopAccessAnalysis/offset-range-known-via-assume.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ define void @offset_i32_known_positive_via_assume_forward_dep_1(ptr %A, i64 %off
6969
; CHECK-EMPTY:
7070
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
7171
; CHECK-NEXT: SCEV assumptions:
72-
; CHECK-NEXT: {((4 * %offset)<nsw> + %A),+,4}<nw><%loop> Added Flags: <nusw>
7372
; CHECK-EMPTY:
7473
; CHECK-NEXT: Expressions re-written:
7574
;

llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -22,47 +22,9 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 {
2222
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1
2323
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
2424
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
25-
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.umax.i64(i64 288, i64 [[TMP7]])
25+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.umax.i64(i64 128, i64 [[TMP7]])
2626
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP5]], [[TMP8]]
27-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
28-
; CHECK: [[VECTOR_SCEVCHECK]]:
29-
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[X_I64]], i64 99)
30-
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[SMAX]], [[X_I64]]
31-
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP9]], i64 1)
32-
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[SMAX]], [[UMIN]]
33-
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], [[X_I64]]
34-
; CHECK-NEXT: [[TMP12:%.*]] = udiv i64 [[TMP11]], 3
35-
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[UMIN]], [[TMP12]]
36-
; CHECK-NEXT: [[TMP14:%.*]] = shl nsw i64 [[X_I64]], 1
37-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP14]]
38-
; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 6, i64 [[TMP13]])
39-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
40-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
41-
; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT]]
42-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
43-
; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP]]
44-
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]]
45-
; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i64 [[X_I64]], 3
46-
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP19]]
47-
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 24, i64 [[TMP13]])
48-
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
49-
; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
50-
; CHECK-NEXT: [[TMP20:%.*]] = sub i64 0, [[MUL_RESULT3]]
51-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
52-
; CHECK-NEXT: [[TMP22:%.*]] = icmp ult ptr [[TMP21]], [[SCEVGEP1]]
53-
; CHECK-NEXT: [[TMP23:%.*]] = or i1 [[TMP22]], [[MUL_OVERFLOW4]]
54-
; CHECK-NEXT: [[TMP24:%.*]] = add nsw i64 [[TMP19]], -8
55-
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP24]]
56-
; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 24, i64 [[TMP13]])
57-
; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
58-
; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
59-
; CHECK-NEXT: [[TMP25:%.*]] = sub i64 0, [[MUL_RESULT7]]
60-
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
61-
; CHECK-NEXT: [[TMP27:%.*]] = icmp ult ptr [[TMP26]], [[SCEVGEP5]]
62-
; CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP27]], [[MUL_OVERFLOW8]]
63-
; CHECK-NEXT: [[TMP29:%.*]] = or i1 [[TMP18]], [[TMP23]]
64-
; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[TMP28]]
65-
; CHECK-NEXT: br i1 [[TMP30]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
27+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
6628
; CHECK: [[VECTOR_MEMCHECK]]:
6729
; CHECK-NEXT: [[TMP31:%.*]] = shl nsw i64 [[X_I64]], 1
6830
; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP31]]
@@ -130,12 +92,12 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 {
13092
; CHECK: [[MIDDLE_BLOCK]]:
13193
; CHECK-NEXT: br label %[[SCALAR_PH]]
13294
; CHECK: [[SCALAR_PH]]:
133-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X_I64]], %[[ENTRY]] ], [ [[X_I64]], %[[VECTOR_SCEVCHECK]] ], [ [[X_I64]], %[[VECTOR_MEMCHECK]] ]
134-
; CHECK-NEXT: [[BC_RESUME_VAL23:%.*]] = phi i32 [ [[IND_END22]], %[[MIDDLE_BLOCK]] ], [ [[X_I32]], %[[ENTRY]] ], [ [[X_I32]], %[[VECTOR_SCEVCHECK]] ], [ [[X_I32]], %[[VECTOR_MEMCHECK]] ]
95+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X_I64]], %[[ENTRY]] ], [ [[X_I64]], %[[VECTOR_MEMCHECK]] ]
96+
; CHECK-NEXT: [[BC_RESUME_VAL12:%.*]] = phi i32 [ [[IND_END22]], %[[MIDDLE_BLOCK]] ], [ [[X_I32]], %[[ENTRY]] ], [ [[X_I32]], %[[VECTOR_MEMCHECK]] ]
13597
; CHECK-NEXT: br label %[[LOOP:.*]]
13698
; CHECK: [[LOOP]]:
13799
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
138-
; CHECK-NEXT: [[IV_CONV:%.*]] = phi i32 [ [[BC_RESUME_VAL23]], %[[SCALAR_PH]] ], [ [[TMP64:%.*]], %[[LOOP]] ]
100+
; CHECK-NEXT: [[IV_CONV:%.*]] = phi i32 [ [[BC_RESUME_VAL12]], %[[SCALAR_PH]] ], [ [[TMP64:%.*]], %[[LOOP]] ]
139101
; CHECK-NEXT: [[GEP_I64:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]]
140102
; CHECK-NEXT: [[TMP61:%.*]] = load i64, ptr [[GEP_I64]], align 8
141103
; CHECK-NEXT: [[TMP62:%.*]] = sext i32 [[IV_CONV]] to i64

llvm/test/Transforms/LoopVersioning/wrapping-pointer-non-integral-addrspace.ll

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,28 +37,7 @@ define void @wrapping_ptr_nonint_addrspace(ptr %arg) {
3737
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr addrspace(13) [[SCEVGEP]], [[LOAD3]]
3838
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr addrspace(13) [[SCEVGEP2]], [[SCEVGEP1]]
3939
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
40-
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[CALL]]
41-
; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[SEXT]], 2
42-
; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP6]], -4
43-
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr addrspace(13) [[LOAD3]], i64 [[TMP7]]
44-
; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP5]])
45-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
46-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
47-
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 0, [[MUL_RESULT]]
48-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(13) [[SCEVGEP3]], i64 [[TMP8]]
49-
; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt ptr addrspace(13) [[TMP9]], [[SCEVGEP3]]
50-
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW]]
51-
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr addrspace(13) [[LOAD3]], i64 -4
52-
; CHECK-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP5]])
53-
; CHECK-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0
54-
; CHECK-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1
55-
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[MUL_RESULT6]]
56-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(13) [[SCEVGEP4]], i64 [[TMP12]]
57-
; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt ptr addrspace(13) [[TMP13]], [[SCEVGEP4]]
58-
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW7]]
59-
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP11]], [[TMP15]]
60-
; CHECK-NEXT: [[LVER_SAFE:%.*]] = or i1 [[FOUND_CONFLICT]], [[TMP16]]
61-
; CHECK-NEXT: br i1 [[LVER_SAFE]], label %[[LOOP_PH_LVER_ORIG:.*]], label %[[LOOP_PH:.*]]
40+
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[LOOP_PH_LVER_ORIG:.*]], label %[[LOOP_PH:.*]]
6241
; CHECK: [[LOOP_PH_LVER_ORIG]]:
6342
; CHECK-NEXT: br label %[[LOOP_LVER_ORIG:.*]]
6443
; CHECK: [[LOOP_LVER_ORIG]]:
@@ -82,10 +61,10 @@ define void @wrapping_ptr_nonint_addrspace(ptr %arg) {
8261
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(13) [[LOAD3]], i64 [[ADD1]]
8362
; CHECK-NEXT: store i32 [[LOAD4]], ptr addrspace(13) [[GEP1]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
8463
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[VALUE_PHI3]], [[CALL]]
85-
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT_LOOPEXIT8:.*]], label %[[LOOP]]
64+
; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT_LOOPEXIT3:.*]], label %[[LOOP]]
8665
; CHECK: [[EXIT_LOOPEXIT]]:
8766
; CHECK-NEXT: br label %[[EXIT:.*]]
88-
; CHECK: [[EXIT_LOOPEXIT8]]:
67+
; CHECK: [[EXIT_LOOPEXIT3]]:
8968
; CHECK-NEXT: br label %[[EXIT]]
9069
; CHECK: [[EXIT]]:
9170
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)