Skip to content

Commit 9345ab3

Browse files
committed
[SCEVExpander] Skip creating <u 0 check, which is always false.
Unsigned compares of the form <u 0 are always false. Do not create such a redundant check in generateOverflowCheck. The patch introduces a new lambda to create the check, so we can exit early conveniently and skip creating some instructions feeding the check. I am planning to sink a few additional instructions as follow-ups, but I would prefer to do this separately, to keep the changes and diff smaller. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D116811
1 parent 0e19186 commit 9345ab3

File tree

4 files changed

+44
-48
lines changed

4 files changed

+44
-48
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2516,38 +2516,46 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
25162516
// And select either 1. or 2. depending on whether step is positive or
25172517
// negative. If Step is known to be positive or negative, only create
25182518
// either 1. or 2.
2519-
Value *Add = nullptr, *Sub = nullptr;
2520-
bool NeedPosCheck = !SE.isKnownNegative(Step);
2521-
bool NeedNegCheck = !SE.isKnownPositive(Step);
2522-
2523-
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
2524-
StartValue = InsertNoopCastOfTo(
2525-
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
2526-
Value *NegMulV = Builder.CreateNeg(MulV);
2527-
if (NeedPosCheck)
2528-
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
2529-
if (NeedNegCheck)
2530-
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
2531-
} else {
2519+
auto ComputeEndCheck = [&]() -> Value * {
2520+
// Checking <u 0 is always false.
2521+
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2522+
return ConstantInt::getFalse(Loc->getContext());
2523+
2524+
Value *Add = nullptr, *Sub = nullptr;
2525+
bool NeedPosCheck = !SE.isKnownNegative(Step);
2526+
bool NeedNegCheck = !SE.isKnownPositive(Step);
2527+
2528+
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
2529+
StartValue = InsertNoopCastOfTo(
2530+
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
2531+
Value *NegMulV = Builder.CreateNeg(MulV);
2532+
if (NeedPosCheck)
2533+
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
2534+
if (NeedNegCheck)
2535+
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
2536+
} else {
2537+
if (NeedPosCheck)
2538+
Add = Builder.CreateAdd(StartValue, MulV);
2539+
if (NeedNegCheck)
2540+
Sub = Builder.CreateSub(StartValue, MulV);
2541+
}
2542+
2543+
Value *EndCompareLT = nullptr;
2544+
Value *EndCompareGT = nullptr;
2545+
Value *EndCheck = nullptr;
25322546
if (NeedPosCheck)
2533-
Add = Builder.CreateAdd(StartValue, MulV);
2547+
EndCheck = EndCompareLT = Builder.CreateICmp(
2548+
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
25342549
if (NeedNegCheck)
2535-
Sub = Builder.CreateSub(StartValue, MulV);
2536-
}
2537-
2538-
Value *EndCompareLT = nullptr;
2539-
Value *EndCompareGT = nullptr;
2540-
Value *EndCheck = nullptr;
2541-
if (NeedPosCheck)
2542-
EndCheck = EndCompareLT = Builder.CreateICmp(
2543-
Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue);
2544-
if (NeedNegCheck)
2545-
EndCheck = EndCompareGT = Builder.CreateICmp(
2546-
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
2547-
if (NeedPosCheck && NeedNegCheck) {
2548-
// Select the answer based on the sign of Step.
2549-
EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2550-
}
2550+
EndCheck = EndCompareGT = Builder.CreateICmp(
2551+
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
2552+
if (NeedPosCheck && NeedNegCheck) {
2553+
// Select the answer based on the sign of Step.
2554+
EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2555+
}
2556+
return EndCheck;
2557+
};
2558+
Value *EndCheck = ComputeEndCheck();
25512559

25522560
// If the backedge taken count type is larger than the AR type,
25532561
// check that we don't drop any bits by truncating it. If we are

llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,8 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
1717
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
1818
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
1919
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
20-
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
21-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
2220
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
23-
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
21+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
2422
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
2523
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
2624
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
@@ -159,10 +157,8 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
159157
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
160158
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
161159
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
162-
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
163-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
164160
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
165-
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
161+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
166162
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
167163
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
168164
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0

llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@ define void @load_clamped_index(i32* %A, i32* %B, i32 %N) {
2020
; CHECK: vector.scevcheck:
2121
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
2222
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
23-
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
24-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
2523
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
26-
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
24+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
2725
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
2826
; CHECK: vector.memcheck:
2927
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
@@ -107,10 +105,8 @@ define void @store_clamped_index(i32* %A, i32* %B, i32 %N) {
107105
; CHECK: vector.scevcheck:
108106
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
109107
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
110-
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
111-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
112108
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
113-
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
109+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
114110
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
115111
; CHECK: vector.memcheck:
116112
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[N]], -1
@@ -273,10 +269,8 @@ define void @clamped_index_equal_dependence(i32* %A, i32* %B, i32 %N) {
273269
; CHECK: vector.scevcheck:
274270
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
275271
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i2
276-
; CHECK-NEXT: [[TMP2:%.*]] = add i2 0, [[TMP1]]
277-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i2 [[TMP2]], 0
278272
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP0]], 3
279-
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
273+
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
280274
; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
281275
; CHECK: vector.ph:
282276
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 2

llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,8 @@ define void @f1(i16* noalias %a,
3434
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
3535
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
3636
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
37-
; LV-NEXT: [[TMP2:%.*]] = add i32 0, [[MUL_RESULT]]
38-
; LV-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP2]], 0
3937
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
40-
; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]]
38+
; LV-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
4139
; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
4240
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
4341
; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0

0 commit comments

Comments
 (0)