Skip to content

Commit 40b7034

Browse files
committed
[LV] Add tests for vector backedge elimination with early-exit loops.
1 parent ee29e16 commit 40b7034

File tree

1 file changed

+317
-0
lines changed

1 file changed

+317
-0
lines changed
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=1 -enable-early-exit-vectorization -S %s | FileCheck --check-prefixes=VF8UF1 %s
3+
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=2 -enable-early-exit-vectorization -S %s | FileCheck --check-prefixes=VF8UF2 %s
4+
; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=16 -force-vector-interleave=1 -enable-early-exit-vectorization -S %s | FileCheck --check-prefixes=VF16UF1 %s
5+
6+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
7+
8+
; Check if the vector loop condition can be simplified to true for a given
9+
; VF/IC combination.
10+
define i8 @test_early_exit_max_tc_less_than_16(ptr %A, i64 %N) nosync nofree {
11+
; VF8UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
12+
; VF8UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
13+
; VF8UF1-NEXT: [[ENTRY:.*]]:
14+
; VF8UF1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
15+
; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16+
; VF8UF1: [[VECTOR_PH]]:
17+
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
18+
; VF8UF1: [[VECTOR_BODY]]:
19+
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
20+
; VF8UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
21+
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
22+
; VF8UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
23+
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
24+
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
25+
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
26+
; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
27+
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
28+
; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
29+
; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
30+
; VF8UF1: [[MIDDLE_SPLIT]]:
31+
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
32+
; VF8UF1: [[MIDDLE_BLOCK]]:
33+
; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
34+
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
35+
; VF8UF1-NEXT: br label %[[EXIT]]
36+
; VF8UF1: [[SCALAR_PH]]:
37+
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
38+
; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]]
39+
; VF8UF1: [[LOOP_HEADER]]:
40+
; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
41+
; VF8UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
42+
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
43+
; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
44+
; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
45+
; VF8UF1: [[LOOP_LATCH]]:
46+
; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
47+
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
48+
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
49+
; VF8UF1: [[EXIT]]:
50+
; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
51+
; VF8UF1-NEXT: ret i8 [[RES]]
52+
;
53+
; VF8UF2-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
54+
; VF8UF2-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
55+
; VF8UF2-NEXT: [[ENTRY:.*]]:
56+
; VF8UF2-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
57+
; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
58+
; VF8UF2: [[VECTOR_PH]]:
59+
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
60+
; VF8UF2: [[VECTOR_BODY]]:
61+
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
62+
; VF8UF2-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
63+
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
64+
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
65+
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
66+
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
67+
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
68+
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
69+
; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
70+
; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
71+
; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
72+
; VF8UF2: [[MIDDLE_SPLIT]]:
73+
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
74+
; VF8UF2: [[MIDDLE_BLOCK]]:
75+
; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
76+
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
77+
; VF8UF2-NEXT: br label %[[EXIT]]
78+
; VF8UF2: [[SCALAR_PH]]:
79+
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
80+
; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]]
81+
; VF8UF2: [[LOOP_HEADER]]:
82+
; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
83+
; VF8UF2-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
84+
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
85+
; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
86+
; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
87+
; VF8UF2: [[LOOP_LATCH]]:
88+
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
89+
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
90+
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
91+
; VF8UF2: [[EXIT]]:
92+
; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
93+
; VF8UF2-NEXT: ret i8 [[RES]]
94+
;
95+
; VF16UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
96+
; VF16UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
97+
; VF16UF1-NEXT: [[ENTRY:.*]]:
98+
; VF16UF1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
99+
; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
100+
; VF16UF1: [[VECTOR_PH]]:
101+
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
102+
; VF16UF1: [[VECTOR_BODY]]:
103+
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
104+
; VF16UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
105+
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
106+
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
107+
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
108+
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
109+
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
110+
; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
111+
; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
112+
; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
113+
; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
114+
; VF16UF1: [[MIDDLE_SPLIT]]:
115+
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
116+
; VF16UF1: [[MIDDLE_BLOCK]]:
117+
; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
118+
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
119+
; VF16UF1-NEXT: br label %[[EXIT]]
120+
; VF16UF1: [[SCALAR_PH]]:
121+
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
122+
; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]]
123+
; VF16UF1: [[LOOP_HEADER]]:
124+
; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
125+
; VF16UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
126+
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
127+
; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
128+
; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
129+
; VF16UF1: [[LOOP_LATCH]]:
130+
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
131+
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
132+
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
133+
; VF16UF1: [[EXIT]]:
134+
; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
135+
; VF16UF1-NEXT: ret i8 [[RES]]
136+
;
137+
entry:
138+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 32) ]
139+
br label %loop.header
140+
141+
loop.header:
142+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
143+
%p.src = getelementptr inbounds i8, ptr %A, i64 %iv
144+
%l = load i8, ptr %p.src, align 1
145+
%c = icmp eq i8 %l, 0
146+
br i1 %c, label %exit, label %loop.latch
147+
148+
loop.latch:
149+
%iv.next = add nsw i64 %iv, 1
150+
%cmp = icmp eq i64 %iv.next, 16
151+
br i1 %cmp, label %exit, label %loop.header
152+
153+
exit:
154+
%res = phi i8 [ 0, %loop.header ], [ 1, %loop.latch ]
155+
ret i8 %res
156+
}
157+
158+
define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64 %N) nosync nofree {
159+
; VF8UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
160+
; VF8UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
161+
; VF8UF1-NEXT: [[ENTRY:.*]]:
162+
; VF8UF1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
163+
; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
164+
; VF8UF1: [[VECTOR_PH]]:
165+
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
166+
; VF8UF1: [[VECTOR_BODY]]:
167+
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
168+
; VF8UF1-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
169+
; VF8UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
170+
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
171+
; VF8UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
172+
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
173+
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
174+
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
175+
; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
176+
; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
177+
; VF8UF1-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
178+
; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
179+
; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
180+
; VF8UF1: [[MIDDLE_SPLIT]]:
181+
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
182+
; VF8UF1: [[MIDDLE_BLOCK]]:
183+
; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
184+
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
185+
; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
186+
; VF8UF1-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <8 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
187+
; VF8UF1-NEXT: br label %[[EXIT]]
188+
; VF8UF1: [[SCALAR_PH]]:
189+
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
190+
; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]]
191+
; VF8UF1: [[LOOP_HEADER]]:
192+
; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
193+
; VF8UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
194+
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
195+
; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
196+
; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
197+
; VF8UF1: [[LOOP_LATCH]]:
198+
; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
199+
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
200+
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
201+
; VF8UF1: [[EXIT]]:
202+
; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
203+
; VF8UF1-NEXT: ret i64 [[RES]]
204+
;
205+
; VF8UF2-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
206+
; VF8UF2-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
207+
; VF8UF2-NEXT: [[ENTRY:.*]]:
208+
; VF8UF2-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
209+
; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
210+
; VF8UF2: [[VECTOR_PH]]:
211+
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
212+
; VF8UF2: [[VECTOR_BODY]]:
213+
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
214+
; VF8UF2-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
215+
; VF8UF2-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
216+
; VF8UF2-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
217+
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
218+
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
219+
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
220+
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
221+
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
222+
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
223+
; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
224+
; VF8UF2-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
225+
; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
226+
; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
227+
; VF8UF2: [[MIDDLE_SPLIT]]:
228+
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
229+
; VF8UF2: [[MIDDLE_BLOCK]]:
230+
; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
231+
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
232+
; VF8UF2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
233+
; VF8UF2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <8 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
234+
; VF8UF2-NEXT: br label %[[EXIT]]
235+
; VF8UF2: [[SCALAR_PH]]:
236+
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
237+
; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]]
238+
; VF8UF2: [[LOOP_HEADER]]:
239+
; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
240+
; VF8UF2-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
241+
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
242+
; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
243+
; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
244+
; VF8UF2: [[LOOP_LATCH]]:
245+
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
246+
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
247+
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
248+
; VF8UF2: [[EXIT]]:
249+
; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
250+
; VF8UF2-NEXT: ret i64 [[RES]]
251+
;
252+
; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
253+
; VF16UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
254+
; VF16UF1-NEXT: [[ENTRY:.*]]:
255+
; VF16UF1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
256+
; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
257+
; VF16UF1: [[VECTOR_PH]]:
258+
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
259+
; VF16UF1: [[VECTOR_BODY]]:
260+
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
261+
; VF16UF1-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
262+
; VF16UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
263+
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
264+
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
265+
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
266+
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
267+
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
268+
; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
269+
; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
270+
; VF16UF1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
271+
; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
272+
; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
273+
; VF16UF1: [[MIDDLE_SPLIT]]:
274+
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
275+
; VF16UF1: [[MIDDLE_BLOCK]]:
276+
; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
277+
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
278+
; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true)
279+
; VF16UF1-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
280+
; VF16UF1-NEXT: br label %[[EXIT]]
281+
; VF16UF1: [[SCALAR_PH]]:
282+
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
283+
; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]]
284+
; VF16UF1: [[LOOP_HEADER]]:
285+
; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
286+
; VF16UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
287+
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
288+
; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
289+
; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
290+
; VF16UF1: [[LOOP_LATCH]]:
291+
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
292+
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
293+
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
294+
; VF16UF1: [[EXIT]]:
295+
; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
296+
; VF16UF1-NEXT: ret i64 [[RES]]
297+
;
298+
entry:
299+
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 32) ]
300+
br label %loop.header
301+
302+
loop.header:
303+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
304+
%p.src = getelementptr inbounds i8, ptr %A, i64 %iv
305+
%l = load i8, ptr %p.src, align 1
306+
%c = icmp eq i8 %l, 0
307+
br i1 %c, label %exit, label %loop.latch
308+
309+
loop.latch:
310+
%iv.next = add nsw i64 %iv, 1
311+
%cmp = icmp eq i64 %iv.next, 16
312+
br i1 %cmp, label %exit, label %loop.header
313+
314+
exit:
315+
%res = phi i64 [ %iv, %loop.header ], [ 1, %loop.latch ]
316+
ret i64 %res
317+
}

0 commit comments

Comments
 (0)