-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[NFC][LoopVectorize] Add more simple early exit tests #112529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
I realised we are missing tests to cover more loops with multiple early exits - some countable and some uncountable. I've also added a few SVE versions of the test in the AArch64 directory, since I expect there to be some divergence from the same tests in the top level directory once we start vectorising them.
@llvm/pr-subscribers-llvm-transforms Author: David Sherwood (david-arm) ChangesI realised we are missing tests to cover more loops with multiple early exits - some countable and some uncountable. I've also added a few SVE versions of the test in the AArch64 directory. Once we can vectorise such early exit loops it's a good sanity check to make sure they also vectorise for SVE. Also, for some of the tests I expect there to be some divergence from the same tests in the top level directory once we start vectorising them. Patch is 22.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112529.diff 2 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
new file mode 100644
index 00000000000000..82556bdd2a5ec1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -0,0 +1,333 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S < %s -p loop-vectorize | FileCheck %s --check-prefixes=CHECK
+
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @init_mem(ptr, i64);
+
+define i64 @same_exit_block_pre_inc_use1() #1 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX2]]
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX2]]
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX2]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[START_0_LCSSA]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+; In this example the early exit block appears in the list of ExitNotTaken
+; SCEVs, but is not computable.
+define i64 @same_exit_block_pre_inc_use4() {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use4() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i64], align 8
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i64]
+ %p2 = alloca [1024 x i64]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i64, ptr %p1, i64 %index
+ %ld1 = load i64, ptr %arrayidx, align 1
+ %cmp3 = icmp ult i64 %index, %ld1
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @loop_contains_safe_call() #1 {
+; CHECK-LABEL: define i64 @loop_contains_safe_call(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX2]]
+; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC1]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX2]], [[LOOP1]] ], [ 67, [[LOOP_INC1]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index
+ %ld1 = load float, ptr %arrayidx, align 1
+ %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1)
+ %cmp = fcmp fast ult float %sqrt, 3.0e+00
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @loop_contains_safe_div() #1 {
+; CHECK-LABEL: define i64 @loop_contains_safe_div(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 1
+ %div = udiv i32 %ld1, 20000
+ %cmp = icmp eq i32 %div, 1
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) {
+; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
+; DEBUG: LV: Found an early exit loop with symbolic max backedge taken count: 63
+; DEBUG-NEXT: LV: We can vectorize this loop!
+; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported.
+; CHECK-LABEL: define i64 @loop_contains_load_after_early_exit(
+; CHECK-SAME: ptr align 8 dereferenceable(1024) [[P2:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ [[LD2]], [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 1
+ %cmp = icmp eq i32 %ld1, 1
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index
+ %ld2 = load i64, ptr %arrayidx2, align 8
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ]
+ ret i64 %retval
+}
+
+
+; The form of the induction variables requires SCEV predicates.
+define i32 @diff_exit_block_needs_scev_check(i32 %end) {
+; DEBUG-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check'
+; DEBUG: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32)))<nsw>
+; DEBUG-NEXT: LV: We can vectorize this loop!
+; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported.
+; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check(
+; CHECK-SAME: i32 [[END:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023
+; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
+; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY1]], label [[EXIT:%.*]]
+; CHECK: found:
+; CHECK-NEXT: ret i32 1
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %p1 = alloca [1024 x i32]
+ %p2 = alloca [1024 x i32]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ %end.clamped = and i32 %end, 1023
+ br label %for.body
+
+for.body:
+ %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
+ %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
+ %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
+ %0 = load i32, ptr %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
+ %1 = load i32, ptr %arrayidx2, align 4
+ %cmp.early = icmp eq i32 %0, %1
+ br i1 %cmp.early, label %found, label %for.inc
+
+for.inc:
+ %ind.next = add i8 %ind, 1
+ %conv = zext i8 %ind.next to i32
+ %gep.ind.next = add i64 %gep.ind, 1
+ %cmp = icmp ult i32 %conv, %end.clamped
+ br i1 %cmp, label %for.body, label %exit
+
+found:
+ ret i32 1
+
+exit:
+ ret i32 0
+}
+
+
+declare i32 @foo(i32) readonly
+declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
+
+attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
+attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
diff --git a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll
index 49454ae18db79d..d5e4f4d016c6e2 100644
--- a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll
@@ -311,7 +311,6 @@ loop.end:
}
-
define i64 @same_exit_block_post_inc_use() {
; CHECK-LABEL: define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: entry:
@@ -860,8 +859,8 @@ loop.end:
; There are multiple exit blocks - two of them have an exact representation for the
; exit-not-taken counts and the other is unknown, i.e. the "early exit".
-define i64 @multiple_exits_one_early() {
-; CHECK-LABEL: define i64 @multiple_exits_one_early() {
+define i64 @multiple_exiting_one_early_same_exit() {
+; CHECK-LABEL: define i64 @multiple_exiting_one_early_same_exit() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
@@ -918,6 +917,189 @@ loop.end:
}
+define i64 @multiple_exiting_one_early_same_exit_phi_of_consts() {
+; CHECK-LABEL: define i64 @multiple_exiting_one_early_same_exit_phi_of_consts() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
+; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]]
+; CHECK: search:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[SEARCH]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %cmp1 = icmp ne i64 %index, 64
+ br i1 %cmp1, label %search, label %loop.end
+
+search:
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.end, label %loop.inc
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 128
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @multiple_exiting_one_early_diff_exit() {
+; CHECK-LABEL: define i64 @multiple_exiting_one_early_diff_exit() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
+; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]]
+; CHECK: search:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end.early:
+; CHECK-NEXT: [[RET_EARLY:%.*]] = phi i64 [ [[INDEX]], [[SEARCH]] ]
+; CHECK-NEXT: ret i64 [[RET_EARLY]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ 128, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %cmp1 = icmp ne i64 %index, 64
+ br i1 %cmp1, label %search, label %loop.end
+
+search:
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.end.early, label %loop.inc
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 128
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end.early:
+ %ret.early = phi i64 [ %index, %search ]
+ ret i64 %...
[truncated]
|
Since this patch is just adding tests, if there are no objections I'll merge the patch tomorrow morning! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
@@ -918,6 +917,189 @@ loop.end: | |||
} | |||
|
|||
|
|||
define i64 @multiple_exiting_one_early_same_exit_phi_of_consts() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This file is getting a bit big , perhaps worth splitting it up into multiple files if possible?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, that makes sense. If it's ok with you, I'll land this patch first then follow up with a simple refactor that collects tests of the same class in their own file, i.e. early exits with no outside users of loop variables, ones that use loop variables, tests with multiple early exits (countable + uncountable), tests for different memory access patterns, etc.
I realised we are missing tests to cover more loops with multiple early exits - some countable and some uncountable.
I've also added a few SVE versions of the test in the AArch64 directory. Once we can vectorise such early exit loops it's a good sanity check to make sure they also vectorise for SVE. Also, for some of the tests I expect there to be some divergence from the same tests in the top level directory once we start vectorising them.