Skip to content

Commit dd0cf23

Browse files
committed
[LICM] Reassociate & hoist sub expressions
LICM could reassociate mixed variant/invariant comparison/arithmetic operations and hoist invariant parts out of loop if it can prove that they can be computed without overflow. Motivating example here: ``` INV1 - VAR1 < INV2 ``` can be turned into ``` VAR > INV1 - INV2 ``` if we can prove no-signed-overflow here. Then `INV1 - INV2` can be computed out of loop, so we save one arithmetic operation in-loop. Reviewed By: skatkov Differential Revision: https://reviews.llvm.org/D148001
1 parent 0a6aec2 commit dd0cf23

File tree

2 files changed

+70
-7
lines changed

2 files changed

+70
-7
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2572,6 +2572,68 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
25722572
return true;
25732573
}
25742574

2575+
/// Try to reassociate and hoist the following two patterns:
2576+
/// LV - C1 < C2 --> LV < C1 + C2,
2577+
/// C1 - LV < C2 --> LV > C1 - C2.
2578+
static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
2579+
Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2580+
ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2581+
AssumptionCache *AC, DominatorTree *DT) {
2582+
assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
2583+
assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2584+
assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2585+
2586+
// Try to represent VariantLHS as sum of invariant and variant operands.
2587+
using namespace PatternMatch;
2588+
Value *VariantOp, *InvariantOp;
2589+
if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
2590+
return false;
2591+
2592+
bool VariantSubtracted = false;
2593+
// LHS itself is a loop-variant, try to represent it in the form:
2594+
// "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
2595+
// the variant operand goes with minus, we use a slightly different scheme.
2596+
if (L.isLoopInvariant(VariantOp)) {
2597+
std::swap(VariantOp, InvariantOp);
2598+
VariantSubtracted = true;
2599+
Pred = ICmpInst::getSwappedPredicate(Pred);
2600+
}
2601+
if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2602+
return false;
2603+
2604+
// In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
2605+
// freely move values from left side of inequality to right side (just as in
2606+
// normal linear arithmetics). Overflows make things much more complicated, so
2607+
// we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
2608+
// "C1 - C2" does not overflow.
2609+
auto &DL = L.getHeader()->getModule()->getDataLayout();
2610+
if (VariantSubtracted) {
2611+
// C1 - LV < C2 --> LV > C1 - C2
2612+
if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp,
2613+
DT) != llvm::OverflowResult::NeverOverflows)
2614+
return false;
2615+
} else {
2616+
// LV - C1 < C2 --> LV < C1 + C2
2617+
if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp,
2618+
DT) != llvm::OverflowResult::NeverOverflows)
2619+
return false;
2620+
}
2621+
auto *Preheader = L.getLoopPreheader();
2622+
assert(Preheader && "Loop is not in simplify form?");
2623+
IRBuilder<> Builder(Preheader->getTerminator());
2624+
Value *NewCmpOp =
2625+
VariantSubtracted
2626+
? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
2627+
/*HasNUW*/ false, /*HasNSW*/ true)
2628+
: Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
2629+
/*HasNUW*/ false, /*HasNSW*/ true);
2630+
ICmp.setPredicate(Pred);
2631+
ICmp.setOperand(0, VariantOp);
2632+
ICmp.setOperand(1, NewCmpOp);
2633+
eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
2634+
return true;
2635+
}
2636+
25752637
/// Reassociate and hoist add/sub expressions.
25762638
static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
25772639
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2601,7 +2663,8 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
26012663
if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
26022664
return true;
26032665

2604-
// TODO: Support Sub.
2666+
if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2667+
return true;
26052668

26062669
return false;
26072670
}

llvm/test/Transforms/LICM/hoist-add-sub.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
22
; RUN: opt -S -passes=licm -verify-memoryssa < %s | FileCheck %s
33

4-
; TODO: x - iv < 4 ==> iv > x - 4
4+
; x - iv < 4 ==> iv > x - 4
55
define i32 @test_01(ptr %p, ptr %x_p, ptr %length_p) {
66
; CHECK-LABEL: define i32 @test_01
77
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
88
; CHECK-NEXT: entry:
99
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0:![0-9]+]]
1010
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
11+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[X]], 4
1112
; CHECK-NEXT: br label [[LOOP:%.*]]
1213
; CHECK: loop:
1314
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
14-
; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[X]], [[IV]]
15-
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
15+
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_OP]]
1616
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
1717
; CHECK: backedge:
1818
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
@@ -278,18 +278,18 @@ failed:
278278
ret i32 -2
279279
}
280280

281-
; TODO: iv - x < 4 ==> iv < 4 + x
281+
; iv - x < 4 ==> iv < 4 + x
282282
define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) {
283283
; CHECK-LABEL: define i32 @test_03
284284
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
285285
; CHECK-NEXT: entry:
286286
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]]
287287
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
288+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4
288289
; CHECK-NEXT: br label [[LOOP:%.*]]
289290
; CHECK: loop:
290291
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
291-
; CHECK-NEXT: [[ARITH:%.*]] = sub nsw i32 [[IV]], [[X]]
292-
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
292+
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]]
293293
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
294294
; CHECK: backedge:
295295
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]

0 commit comments

Comments
 (0)