-
Notifications
You must be signed in to change notification settings - Fork 14.3k
LICM: extend hoistAddSub to unsigned case #106373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Trivially extend dd0cf23 ([LICM] Reassociate & hoist sub expressions) to handle unsigned predicates as well. Alive2 proofs: test_01_unsigned: https://alive2.llvm.org/ce/z/GXJ39G test_03_unsigned: https://alive2.llvm.org/ce/z/P9Bt4J
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesTrivially extend dd0cf23 ([LICM] Reassociate & hoist sub expressions) to handle unsigned predicates as well. Alive2 proofs: Patch is 26.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106373.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 526ae4e8834396..86c7dceffc5245 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2537,14 +2537,19 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
AssumptionCache *AC, DominatorTree *DT) {
- assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
+ bool IsSigned = ICmpInst::isSigned(Pred);
+
// Try to represent VariantLHS as sum of invariant and variant operands.
using namespace PatternMatch;
Value *VariantOp, *InvariantOp;
- if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
+ if (IsSigned &&
+ !match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
+ return false;
+ if (!IsSigned &&
+ !match(VariantLHS, m_NUWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
return false;
// LHS itself is a loop-variant, try to represent it in the form:
@@ -2559,17 +2564,20 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
// normal linear arithmetics). Overflows make things much more complicated, so
// we want to avoid this.
auto &DL = L.getHeader()->getDataLayout();
- bool ProvedNoOverflowAfterReassociate =
- computeOverflowForSignedSub(InvariantRHS, InvariantOp,
- SimplifyQuery(DL, DT, AC, &ICmp)) ==
- llvm::OverflowResult::NeverOverflows;
- if (!ProvedNoOverflowAfterReassociate)
+ SimplifyQuery SQ(DL, DT, AC, &ICmp);
+ if (IsSigned && computeOverflowForSignedSub(InvariantRHS, InvariantOp, SQ) !=
+ llvm::OverflowResult::NeverOverflows)
+ return false;
+ if (!IsSigned &&
+ computeOverflowForUnsignedSub(InvariantRHS, InvariantOp, SQ) !=
+ llvm::OverflowResult::NeverOverflows)
return false;
auto *Preheader = L.getLoopPreheader();
assert(Preheader && "Loop is not in simplify form?");
IRBuilder<> Builder(Preheader->getTerminator());
- Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
- /*HasNUW*/ false, /*HasNSW*/ true);
+ Value *NewCmpOp =
+ Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
+ /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned);
ICmp.setPredicate(Pred);
ICmp.setOperand(0, VariantOp);
ICmp.setOperand(1, NewCmpOp);
@@ -2584,14 +2592,19 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
AssumptionCache *AC, DominatorTree *DT) {
- assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
+ bool IsSigned = ICmpInst::isSigned(Pred);
+
// Try to represent VariantLHS as sum of invariant and variant operands.
using namespace PatternMatch;
Value *VariantOp, *InvariantOp;
- if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
+ if (IsSigned &&
+ !match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
+ return false;
+ if (!IsSigned &&
+ !match(VariantLHS, m_NUWSub(m_Value(VariantOp), m_Value(InvariantOp))))
return false;
bool VariantSubtracted = false;
@@ -2613,16 +2626,26 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
// "C1 - C2" does not overflow.
auto &DL = L.getHeader()->getDataLayout();
SimplifyQuery SQ(DL, DT, AC, &ICmp);
- if (VariantSubtracted) {
+ if (VariantSubtracted && IsSigned) {
// C1 - LV < C2 --> LV > C1 - C2
if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, SQ) !=
llvm::OverflowResult::NeverOverflows)
return false;
- } else {
+ } else if (VariantSubtracted && !IsSigned) {
+ // C1 - LV < C2 --> LV > C1 - C2
+ if (computeOverflowForUnsignedSub(InvariantOp, InvariantRHS, SQ) !=
+ llvm::OverflowResult::NeverOverflows)
+ return false;
+ } else if (!VariantSubtracted && IsSigned) {
// LV - C1 < C2 --> LV < C1 + C2
if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, SQ) !=
llvm::OverflowResult::NeverOverflows)
return false;
+ } else { // !VariantSubtracted && !IsSigned
+ // LV - C1 < C2 --> LV < C1 + C2
+ if (computeOverflowForUnsignedAdd(InvariantOp, InvariantRHS, SQ) !=
+ llvm::OverflowResult::NeverOverflows)
+ return false;
}
auto *Preheader = L.getLoopPreheader();
assert(Preheader && "Loop is not in simplify form?");
@@ -2630,9 +2653,9 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
Value *NewCmpOp =
VariantSubtracted
? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
- /*HasNUW*/ false, /*HasNSW*/ true)
+ /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned)
: Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
- /*HasNUW*/ false, /*HasNSW*/ true);
+ /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned);
ICmp.setPredicate(Pred);
ICmp.setOperand(0, VariantOp);
ICmp.setOperand(1, NewCmpOp);
@@ -2650,10 +2673,6 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
return false;
- // TODO: Support unsigned predicates?
- if (!ICmpInst::isSigned(Pred))
- return false;
-
// Put variant operand to LHS position.
if (L.isLoopInvariant(LHS)) {
std::swap(LHS, RHS);
diff --git a/llvm/test/Transforms/LICM/hoist-add-sub.ll b/llvm/test/Transforms/LICM/hoist-add-sub.ll
index 5393cdb1d29c43..06418fdd543626 100644
--- a/llvm/test/Transforms/LICM/hoist-add-sub.ll
+++ b/llvm/test/Transforms/LICM/hoist-add-sub.ll
@@ -51,6 +51,55 @@ out_of_bounds:
ret i32 -1
}
+define i32 @test_01_unsigned(ptr %p, ptr %x_p, ptr %length_p) {
+; CHECK-LABEL: define i32 @test_01_unsigned
+; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]]
+; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG2:![0-9]+]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nuw i32 [[X]], 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ugt i32 [[IV]], [[INVARIANT_OP]]
+; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
+; CHECK: backedge:
+; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
+; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
+; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]]
+; CHECK: out_of_bounds:
+; CHECK-NEXT: ret i32 -1
+;
+entry:
+ %x = load i32, ptr %x_p, !range !2
+ %length = load i32, ptr %length_p, !range !1
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+ %arith = sub nuw i32 %x, %iv
+ %x_check = icmp ult i32 %arith, 4
+ br i1 %x_check, label %out_of_bounds, label %backedge
+
+backedge:
+ %el.ptr = getelementptr i32, ptr %p, i32 %iv
+ store i32 1, ptr %el.ptr
+ %iv.next = add nuw nsw i32 %iv, 4
+ %loop_cond = icmp ult i32 %iv.next, %length
+ br i1 %loop_cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv.next
+
+out_of_bounds:
+ ret i32 -1
+}
+
; TODO: x - iv < 4 ==> iv > x - 4
define i32 @test_01a(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_01a
@@ -114,6 +163,68 @@ failed:
ret i32 -2
}
+define i32 @test_01a_unsigned(ptr %p, ptr %x_p, ptr %length_p) {
+; CHECK-LABEL: define i32 @test_01a_unsigned
+; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4
+; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4
+; CHECK-NEXT: [[PRECOND_1:%.*]] = icmp uge i32 [[X]], 0
+; CHECK-NEXT: [[PRECOND_2:%.*]] = icmp uge i32 [[LENGTH]], 0
+; CHECK-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]]
+; CHECK-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]]
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[ARITH:%.*]] = sub nuw i32 [[X]], [[IV]]
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4
+; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
+; CHECK: backedge:
+; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
+; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
+; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]]
+; CHECK: out_of_bounds:
+; CHECK-NEXT: ret i32 -1
+; CHECK: failed:
+; CHECK-NEXT: ret i32 -2
+;
+entry:
+ %x = load i32, ptr %x_p
+ %length = load i32, ptr %length_p
+ %precond_1 = icmp uge i32 %x, 0
+ %precond_2 = icmp uge i32 %length, 0
+ %precond = and i1 %precond_1, %precond_2
+ br i1 %precond, label %loop, label %failed
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+ %arith = sub nuw i32 %x, %iv
+ %x_check = icmp ult i32 %arith, 4
+ br i1 %x_check, label %out_of_bounds, label %backedge
+
+backedge:
+ %el.ptr = getelementptr i32, ptr %p, i32 %iv
+ store i32 1, ptr %el.ptr
+ %iv.next = add nuw nsw i32 %iv, 4
+ %loop_cond = icmp ult i32 %iv.next, %length
+ br i1 %loop_cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv.next
+
+out_of_bounds:
+ ret i32 -1
+
+failed:
+ ret i32 -2
+}
+
; Range info is missing for x, cannot prove no-overflow. Should not hoist.
define i32 @test_01_neg(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_01_neg
@@ -164,6 +275,54 @@ out_of_bounds:
ret i32 -1
}
+define i32 @test_01_neg_unsigned(ptr %p, ptr %x_p, ptr %length_p) {
+; CHECK-LABEL: define i32 @test_01_neg_unsigned
+; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4
+; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT: [[ARITH:%.*]] = sub nuw i32 [[X]], [[IV]]
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4
+; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
+; CHECK: backedge:
+; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
+; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
+; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]]
+; CHECK: out_of_bounds:
+; CHECK-NEXT: ret i32 -1
+;
+entry:
+ %x = load i32, ptr %x_p
+ %length = load i32, ptr %length_p, !range !0
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+ %arith = sub nuw i32 %x, %iv
+ %x_check = icmp ult i32 %arith, 4
+ br i1 %x_check, label %out_of_bounds, label %backedge
+
+backedge:
+ %el.ptr = getelementptr i32, ptr %p, i32 %iv
+ store i32 1, ptr %el.ptr
+ %iv.next = add nuw nsw i32 %iv, 4
+ %loop_cond = icmp ult i32 %iv.next, %length
+ br i1 %loop_cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv.next
+
+out_of_bounds:
+ ret i32 -1
+}
; x + iv < 4 ==> iv < 4 - x
define i32 @test_02(ptr %p, ptr %x_p, ptr %length_p) {
@@ -215,6 +374,55 @@ out_of_bounds:
ret i32 -1
}
+define i32 @test_02_unsigned_neg(ptr %p, ptr %x_p, ptr %length_p) {
+; CHECK-LABEL: define i32 @test_02_unsigned_neg
+; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1]]
+; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG2]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT: [[ARITH:%.*]] = add nuw i32 [[X]], [[IV]]
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4
+; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
+; CHECK: backedge:
+; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
+; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
+; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]]
+; CHECK: out_of_bounds:
+; CHECK-NEXT: ret i32 -1
+;
+entry:
+ %x = load i32, ptr %x_p, !range !2
+ %length = load i32, ptr %length_p, !range !1
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+ %arith = add nuw i32 %x, %iv
+ %x_check = icmp ult i32 %arith, 4
+ br i1 %x_check, label %out_of_bounds, label %backedge
+
+backedge:
+ %el.ptr = getelementptr i32, ptr %p, i32 %iv
+ store i32 1, ptr %el.ptr
+ %iv.next = add nuw nsw i32 %iv, 4
+ %loop_cond = icmp ult i32 %iv.next, %length
+ br i1 %loop_cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv.next
+
+out_of_bounds:
+ ret i32 -1
+}
+
; TODO: x + iv < 4 ==> iv < 4 - x
define i32 @test_02a(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_02a
@@ -278,12 +486,74 @@ failed:
ret i32 -2
}
+define i32 @test_02a_unsigned(ptr %p, ptr %x_p, ptr %length_p) {
+; CHECK-LABEL: define i32 @test_02a_unsigned
+; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4
+; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4
+; CHECK-NEXT: [[PRECOND_1:%.*]] = icmp uge i32 [[X]], 0
+; CHECK-NEXT: [[PRECOND_2:%.*]] = icmp uge i32 [[LENGTH]], 0
+; CHECK-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]]
+; CHECK-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]]
+; CHECK: loop.preheader:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT: [[ARITH:%.*]] = add nuw i32 [[X]], [[IV]]
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4
+; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
+; CHECK: backedge:
+; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
+; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
+; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]]
+; CHECK: out_of_bounds:
+; CHECK-NEXT: ret i32 -1
+; CHECK: failed:
+; CHECK-NEXT: ret i32 -2
+;
+entry:
+ %x = load i32, ptr %x_p
+ %length = load i32, ptr %length_p
+ %precond_1 = icmp uge i32 %x, 0
+ %precond_2 = icmp uge i32 %length, 0
+ %precond = and i1 %precond_1, %precond_2
+ br i1 %precond, label %loop, label %failed
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+ %arith = add nuw i32 %x, %iv
+ %x_check = icmp ult i32 %arith, 4
+ br i1 %x_check, label %out_of_bounds, label %backedge
+
+backedge:
+ %el.ptr = getelementptr i32, ptr %p, i32 %iv
+ store i32 1, ptr %el.ptr
+ %iv.next = add nuw nsw i32 %iv, 4
+ %loop_cond = icmp ult i32 %iv.next, %length
+ br i1 %loop_cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv.next
+
+out_of_bounds:
+ ret i32 -1
+
+failed:
+ ret i32 -2
+}
+
; iv - x < 4 ==> iv < 4 + x
define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_03
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]]
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG2]]
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -328,6 +598,55 @@ out_of_bounds:
ret i32 -1
}
+define i32 @test_03_unsigned(ptr %p, ptr %x_p, ptr %length_p) {
+; CHECK-LABEL: define i32 @test_03_unsigned
+; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG2]]
+; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG2]]
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nuw i32 [[X]], 4
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_OP]]
+; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
+; CHECK: backedge:
+; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
+; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]]
+; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
+; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]]
+; CHECK: out_of_bounds:
+; CHECK-NEXT: ret i32 -1
+;
+entry:
+ %x = load i32, ptr %x_p, !range !1
+ %length = load i32, ptr %length_p, !range !1
+ br label %loop
+
+loop:
+ %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+ %arith = sub nuw i32 %iv, %x
+ %x_check = icmp ult i32 %arith, 4
+ br i1 %x_check, label %out_of_bounds, label %backedge
+
+backedge:
+ %el.ptr = getelementptr i32, ptr %p, i32 %iv
+ store i32 1, ptr %el.ptr
+ %iv.next = add nuw nsw i32 %iv, 4
+ %loop_cond = icmp ult i32 %iv.next, %length
+ br i1 %loop_cond, label %loop, label %exit
+
+exit:
+ ret i32 %iv.next
+
+out_of_bounds:
+ ret i32 -1
+}
+
; TODO: iv - x < 4 ==> iv < 4 + x
define i32 @test_03a(ptr %p, ptr %x_p, ptr %length_p) {
; CHECK-LABEL: define i32 @test_03a
@@ -391,6 +710,68 @@ failed:
ret i32 -2
}
+define i32 @test_03a_unsigned(ptr %p, ptr %x_p, ptr %length_p) {
+; CHECK-LABEL: define i32 @test_03a_unsigned
+; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alive2 proofs: test_01_unsigned: https://alive2.llvm.org/ce/z/GXJ39G test_02_unsigned: https://alive2.llvm.org/ce/z/xWmcnT test_03_unsigned: https://alive2.llvm.org/ce/z/P9Bt4J test_04_unsigned: https://alive2.llvm.org/ce/z/AxBZ9R
For proofs it would be more useful to only prove the performed reassociation, without the loop, which is not really relevant here. It's easy to get false positive proofs when loops are involved.
Also, the proofs should encode the preconditions generically (e.g. using with.overflow intrinsics), not hardcode specific values.
Right, @fhahn taught me that in a different PR. Apparently, -src-unroll and -tgt-unroll are necessary. I've removed the proofs now. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Fixed proofs: https://alive2.llvm.org/ce/z/GdDBtT
Trivially extend dd0cf23 ([LICM] Reassociate & hoist sub expressions) to handle unsigned predicates as well.
Alive2 proofs: https://alive2.llvm.org/ce/z/GdDBtT.