Skip to content

Commit ad1b877

Browse files
committed
[SCEVExpander] Only create multiplication if needed.
9345ab3 updated generateOverflowCheck to skip creating checks that always evaluate to false. This in turn means that we only need to compute |Step| * Trip count if the result of the multiplication is actually used. Sink the multiplication into ComputeEndCheck, so it is only created when there's an actual check.
1 parent 3891619 commit ad1b877

File tree

3 files changed

+22
-30
lines changed

3 files changed

+22
-30
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2494,21 +2494,6 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
24942494
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
24952495

24962496
// Compute |Step| * Backedge
2497-
Value *MulV, *OfMul;
2498-
if (Step->isOne()) {
2499-
// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2500-
// needed, there is never an overflow, so to avoid artificially inflating
2501-
// the cost of the check, directly emit the optimized IR.
2502-
MulV = TruncTripCount;
2503-
OfMul = ConstantInt::getFalse(MulV->getContext());
2504-
} else {
2505-
auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2506-
Intrinsic::umul_with_overflow, Ty);
2507-
CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2508-
MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2509-
OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2510-
}
2511-
25122497
// Compute:
25132498
// 1. Start + |Step| * Backedge < Start
25142499
// 2. Start - |Step| * Backedge > Start
@@ -2521,6 +2506,22 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
25212506
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
25222507
return ConstantInt::getFalse(Loc->getContext());
25232508

2509+
Value *MulV, *OfMul;
2510+
if (Step->isOne()) {
2511+
// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2512+
// needed, there is never an overflow, so to avoid artificially inflating
2513+
// the cost of the check, directly emit the optimized IR.
2514+
MulV = TruncTripCount;
2515+
OfMul = ConstantInt::getFalse(MulV->getContext());
2516+
} else {
2517+
auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2518+
Intrinsic::umul_with_overflow, Ty);
2519+
CallInst *Mul =
2520+
Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2521+
MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2522+
OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2523+
}
2524+
25242525
Value *Add = nullptr, *Sub = nullptr;
25252526
bool NeedPosCheck = !SE.isKnownNegative(Step);
25262527
bool NeedNegCheck = !SE.isKnownPositive(Step);

llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
1414
; CHECK: for.body.lver.check:
1515
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
1616
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
17-
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
18-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
19-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
2017
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
2118
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
2219
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
@@ -88,10 +85,10 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
8885
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
8986
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
9087
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
91-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
88+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT3:%.*]], label [[FOR_BODY]]
9289
; CHECK: for.end.loopexit:
9390
; CHECK-NEXT: br label [[FOR_END:%.*]]
94-
; CHECK: for.end.loopexit6:
91+
; CHECK: for.end.loopexit3:
9592
; CHECK-NEXT: br label [[FOR_END]]
9693
; CHECK: for.end:
9794
; CHECK-NEXT: ret void
@@ -153,9 +150,6 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
153150
; CHECK: for.body.lver.check:
154151
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
155152
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
156-
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
157-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
158-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
159153
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
160154
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
161155
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
@@ -227,10 +221,10 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
227221
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
228222
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
229223
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
230-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
224+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
231225
; CHECK: for.end.loopexit:
232226
; CHECK-NEXT: br label [[FOR_END:%.*]]
233-
; CHECK: for.end.loopexit5:
227+
; CHECK: for.end.loopexit2:
234228
; CHECK-NEXT: br label [[FOR_END]]
235229
; CHECK: for.end:
236230
; CHECK-NEXT: ret void

llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ define void @f1(i16* noalias %a,
3131
; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8*
3232
; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
3333
; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
34-
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
35-
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
36-
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
3734
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
3835
; LV-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
3936
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
@@ -78,10 +75,10 @@ define void @f1(i16* noalias %a,
7875
; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1
7976
; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1
8077
; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
81-
; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
78+
; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT3:%.*]], label [[FOR_BODY]]
8279
; LV: for.end.loopexit:
8380
; LV-NEXT: br label [[FOR_END:%.*]]
84-
; LV: for.end.loopexit6:
81+
; LV: for.end.loopexit3:
8582
; LV-NEXT: br label [[FOR_END]]
8683
; LV: for.end:
8784
; LV-NEXT: ret void

0 commit comments

Comments
 (0)