Skip to content

Commit b86b468

Browse files
committed
[IRCE] Support non-strict range check's predicate
Patch by Aleksandr Popov! Differential Revision: https://reviews.llvm.org/D148227
1 parent 22a408a commit b86b468

File tree

2 files changed

+61
-5
lines changed

2 files changed

+61
-5
lines changed

llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,18 @@ bool InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
295295
Index = SE.getSCEV(LHS);
296296
End = SE.getSCEV(RHS);
297297
return true;
298+
299+
case ICmpInst::ICMP_SLE:
300+
case ICmpInst::ICMP_ULE:
301+
const SCEV *One = SE.getOne(RHS->getType());
302+
const SCEV *RHSS = SE.getSCEV(RHS);
303+
bool Signed = Pred == ICmpInst::ICMP_SLE;
304+
if (SE.willNotOverflow(Instruction::BinaryOps::Add, Signed, RHSS, One)) {
305+
Index = SE.getSCEV(LHS);
306+
End = SE.getAddExpr(RHSS, One);
307+
return true;
308+
}
309+
return false;
298310
}
299311

300312
llvm_unreachable("default clause returns!");

llvm/test/Transforms/IRCE/stride_more_than_1.ll

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -755,31 +755,75 @@ out_of_bounds:
755755
}
756756

757757
; Same as test_09 but range check comparison is non-strict:
758-
; TODO: IRCE is allowed.
758+
; IRCE is allowed.
759759
define i32 @test_11(ptr %p, ptr %capacity_p, ptr %num_elements_p) {
760760
; CHECK-LABEL: define i32 @test_11
761761
; CHECK-SAME: (ptr [[P:%.*]], ptr [[CAPACITY_P:%.*]], ptr [[NUM_ELEMENTS_P:%.*]]) {
762762
; CHECK-NEXT: entry:
763763
; CHECK-NEXT: [[CAPACITY:%.*]] = load i32, ptr [[CAPACITY_P]], align 4, !range [[RNG16]]
764764
; CHECK-NEXT: [[NUM_ELEMENTS:%.*]] = load i32, ptr [[NUM_ELEMENTS_P]], align 4, !range [[RNG16]]
765765
; CHECK-NEXT: [[LIMIT:%.*]] = sub i32 [[CAPACITY]], 4
766+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[CAPACITY]], -3
767+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[CAPACITY]], 2147483646
768+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
769+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[SMAX]]
770+
; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[CAPACITY]], -3
771+
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP3]], i32 0)
772+
; CHECK-NEXT: [[SMAX2:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 -1)
773+
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[SMAX2]], 1
774+
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP2]], [[TMP4]]
775+
; CHECK-NEXT: [[SMIN3:%.*]] = call i32 @llvm.smin.i32(i32 [[NUM_ELEMENTS]], i32 [[TMP5]])
776+
; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN3]], i32 0)
777+
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
778+
; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
779+
; CHECK: loop.preheader:
766780
; CHECK-NEXT: br label [[LOOP:%.*]]
767781
; CHECK: loop:
768-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
782+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ]
769783
; CHECK-NEXT: [[CAPACITY_CHECK:%.*]] = icmp sle i32 [[IV]], [[LIMIT]]
770-
; CHECK-NEXT: br i1 [[CAPACITY_CHECK]], label [[BACKEDGE]], label [[OUT_OF_BOUNDS:%.*]], !prof [[PROF17]]
784+
; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof [[PROF17]]
771785
; CHECK: backedge:
772786
; CHECK-NEXT: [[IV_WIDE:%.*]] = zext i32 [[IV]] to i64
773787
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i64 [[IV_WIDE]]
774788
; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4
775789
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4
776790
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[NUM_ELEMENTS]]
777-
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
791+
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[IV_NEXT]], [[EXIT_MAINLOOP_AT]]
792+
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
793+
; CHECK: main.exit.selector:
794+
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ]
795+
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ]
796+
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i32 [[IV_NEXT_LCSSA]], [[NUM_ELEMENTS]]
797+
; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]]
798+
; CHECK: main.pseudo.exit:
799+
; CHECK-NEXT: [[IV_COPY:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
800+
; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
801+
; CHECK-NEXT: br label [[POSTLOOP:%.*]]
802+
; CHECK: exit.loopexit:
803+
; CHECK-NEXT: [[IV_LCSSA1_PH:%.*]] = phi i32 [ [[IV_POSTLOOP:%.*]], [[BACKEDGE_POSTLOOP:%.*]] ]
804+
; CHECK-NEXT: br label [[EXIT]]
778805
; CHECK: exit:
779-
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV]], [[BACKEDGE]] ]
806+
; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i32 [ [[IV_LCSSA]], [[MAIN_EXIT_SELECTOR]] ], [ [[IV_LCSSA1_PH]], [[EXIT_LOOPEXIT:%.*]] ]
780807
; CHECK-NEXT: ret i32 [[IV_LCSSA1]]
808+
; CHECK: out_of_bounds.loopexit:
809+
; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]]
810+
; CHECK: out_of_bounds.loopexit5:
811+
; CHECK-NEXT: br label [[OUT_OF_BOUNDS]]
781812
; CHECK: out_of_bounds:
782813
; CHECK-NEXT: ret i32 -1
814+
; CHECK: postloop:
815+
; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]]
816+
; CHECK: loop.postloop:
817+
; CHECK-NEXT: [[IV_POSTLOOP]] = phi i32 [ [[IV_COPY]], [[POSTLOOP]] ], [ [[IV_NEXT_POSTLOOP:%.*]], [[BACKEDGE_POSTLOOP]] ]
818+
; CHECK-NEXT: [[CAPACITY_CHECK_POSTLOOP:%.*]] = icmp sle i32 [[IV_POSTLOOP]], [[LIMIT]]
819+
; CHECK-NEXT: br i1 [[CAPACITY_CHECK_POSTLOOP]], label [[BACKEDGE_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof [[PROF17]]
820+
; CHECK: backedge.postloop:
821+
; CHECK-NEXT: [[IV_WIDE_POSTLOOP:%.*]] = zext i32 [[IV_POSTLOOP]] to i64
822+
; CHECK-NEXT: [[EL_PTR_POSTLOOP:%.*]] = getelementptr i32, ptr [[P]], i64 [[IV_WIDE_POSTLOOP]]
823+
; CHECK-NEXT: store i32 1, ptr [[EL_PTR_POSTLOOP]], align 4
824+
; CHECK-NEXT: [[IV_NEXT_POSTLOOP]] = add nuw nsw i32 [[IV_POSTLOOP]], 4
825+
; CHECK-NEXT: [[LOOP_COND_POSTLOOP:%.*]] = icmp slt i32 [[IV_NEXT_POSTLOOP]], [[NUM_ELEMENTS]]
826+
; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP20:![0-9]+]], !irce.loop.clone !6
783827
;
784828
entry:
785829
%capacity = load i32, ptr %capacity_p, !range !4

0 commit comments

Comments
 (0)