Skip to content

Commit 526701f

Browse files
committed
[LV] Add additional tests for narrowing interleave groups.
Add additional test coverage for narrowing interleave groups with derived IVs & scalar steps.
1 parent e9c9adc commit 526701f

File tree

1 file changed

+303
-0
lines changed

1 file changed

+303
-0
lines changed
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph\:" --version 5
2+
; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s
3+
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF4 %s
4+
5+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
6+
target triple = "arm64-apple-macosx15.0.0"
7+
8+
define void @derived_int_ivs(ptr noalias %a, ptr noalias %b, i64 %end) {
9+
; VF2-LABEL: define void @derived_int_ivs(
10+
; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[END:%.*]]) {
11+
; VF2-NEXT: [[ENTRY:.*:]]
12+
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[END]], -32
13+
; VF2-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
14+
; VF2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
15+
; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
16+
; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
17+
; VF2: [[VECTOR_PH]]:
18+
; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
19+
; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
20+
; VF2-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 16
21+
; VF2-NEXT: [[TMP4:%.*]] = add i64 16, [[TMP3]]
22+
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
23+
; VF2: [[VECTOR_BODY]]:
24+
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
25+
; VF2-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 16
26+
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 16, [[TMP5]]
27+
; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
28+
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP6]], align 8
29+
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
30+
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
31+
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]]
32+
; VF2-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[STRIDED_VEC]], <2 x double> [[STRIDED_VEC1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
33+
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
34+
; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
35+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
36+
; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37+
; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38+
; VF2: [[MIDDLE_BLOCK]]:
39+
; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
40+
; VF2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
41+
; VF2: [[SCALAR_PH]]:
42+
;
43+
; VF4-LABEL: define void @derived_int_ivs(
44+
; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[END:%.*]]) {
45+
; VF4-NEXT: [[ENTRY:.*:]]
46+
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[END]], -32
47+
; VF4-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
48+
; VF4-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
49+
; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
50+
; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
51+
; VF4: [[VECTOR_PH]]:
52+
; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
53+
; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
54+
; VF4-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 16
55+
; VF4-NEXT: [[TMP4:%.*]] = add i64 16, [[TMP3]]
56+
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
57+
; VF4: [[VECTOR_BODY]]:
58+
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
59+
; VF4-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 16
60+
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 16, [[TMP5]]
61+
; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
62+
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[TMP6]], align 8
63+
; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
64+
; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
65+
; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]]
66+
; VF4-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[STRIDED_VEC]], <4 x double> [[STRIDED_VEC1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
67+
; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP8]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
68+
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
69+
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
70+
; VF4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
71+
; VF4-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
72+
; VF4: [[MIDDLE_BLOCK]]:
73+
; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
74+
; VF4-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
75+
; VF4: [[SCALAR_PH]]:
76+
;
77+
entry:
78+
br label %loop
79+
80+
loop:
81+
%iv = phi i64 [ 16, %entry ], [ %iv.next, %loop ]
82+
%gep.a.0 = getelementptr inbounds i8, ptr %a, i64 %iv
83+
%l.0 = load double, ptr %gep.a.0, align 8
84+
%gep.b.0 = getelementptr inbounds i8, ptr %b, i64 %iv
85+
store double %l.0, ptr %gep.b.0, align 8
86+
%gep.a.1 = getelementptr inbounds i8, ptr %gep.a.0, i64 8
87+
%l.1 = load double, ptr %gep.a.1, align 8
88+
%gep.b.1 = getelementptr inbounds i8, ptr %gep.b.0, i64 8
89+
store double %l.1, ptr %gep.b.1, align 8
90+
%iv.next = add nuw nsw i64 %iv, 16
91+
%ec = icmp eq i64 %iv.next, %end
92+
br i1 %ec, label %exit, label %loop
93+
94+
exit:
95+
ret void
96+
}
97+
98+
define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) {
99+
; VF2-LABEL: define void @derived_pointer_ivs(
100+
; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) {
101+
; VF2-NEXT: [[ENTRY:.*:]]
102+
; VF2-NEXT: [[A5:%.*]] = ptrtoint ptr [[A]] to i64
103+
; VF2-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64
104+
; VF2-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64
105+
; VF2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
106+
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -16
107+
; VF2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A5]]
108+
; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4
109+
; VF2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
110+
; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
111+
; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
112+
; VF2: [[VECTOR_MEMCHECK]]:
113+
; VF2-NEXT: [[TMP4:%.*]] = add i64 [[END1]], -16
114+
; VF2-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[A2]]
115+
; VF2-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4
116+
; VF2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4
117+
; VF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 16
118+
; VF2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
119+
; VF2-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
120+
; VF2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]]
121+
; VF2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
122+
; VF2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
123+
; VF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
124+
; VF2: [[VECTOR_PH]]:
125+
; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
126+
; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
127+
; VF2-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 16
128+
; VF2-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]]
129+
; VF2-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], 16
130+
; VF2-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
131+
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
132+
; VF2: [[VECTOR_BODY]]:
133+
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
134+
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
135+
; VF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
136+
; VF2-NEXT: [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 16
137+
; VF2-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX6]]
138+
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[NEXT_GEP]], align 8
139+
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
140+
; VF2-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
141+
; VF2-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[STRIDED_VEC]], <2 x double> [[STRIDED_VEC8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
142+
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
143+
; VF2-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP7]], align 8
144+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
145+
; VF2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
146+
; VF2-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
147+
; VF2: [[MIDDLE_BLOCK]]:
148+
; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
149+
; VF2-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
150+
; VF2: [[SCALAR_PH]]:
151+
;
152+
; VF4-LABEL: define void @derived_pointer_ivs(
153+
; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) {
154+
; VF4-NEXT: [[ENTRY:.*:]]
155+
; VF4-NEXT: [[A5:%.*]] = ptrtoint ptr [[A]] to i64
156+
; VF4-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64
157+
; VF4-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64
158+
; VF4-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
159+
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -16
160+
; VF4-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A5]]
161+
; VF4-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4
162+
; VF4-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
163+
; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
164+
; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
165+
; VF4: [[VECTOR_MEMCHECK]]:
166+
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[END1]], -16
167+
; VF4-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[A2]]
168+
; VF4-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4
169+
; VF4-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4
170+
; VF4-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 16
171+
; VF4-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]]
172+
; VF4-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
173+
; VF4-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]]
174+
; VF4-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
175+
; VF4-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
176+
; VF4-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
177+
; VF4: [[VECTOR_PH]]:
178+
; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
179+
; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
180+
; VF4-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 16
181+
; VF4-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]]
182+
; VF4-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], 16
183+
; VF4-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
184+
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
185+
; VF4: [[VECTOR_BODY]]:
186+
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
187+
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
188+
; VF4-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
189+
; VF4-NEXT: [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 16
190+
; VF4-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX6]]
191+
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[NEXT_GEP]], align 8
192+
; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
193+
; VF4-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
194+
; VF4-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[STRIDED_VEC]], <4 x double> [[STRIDED_VEC8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
195+
; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
196+
; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP7]], align 8
197+
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
198+
; VF4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
199+
; VF4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
200+
; VF4: [[MIDDLE_BLOCK]]:
201+
; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
202+
; VF4-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
203+
; VF4: [[SCALAR_PH]]:
204+
;
205+
entry:
206+
br label %loop
207+
208+
loop:
209+
%ptr.iv.1 = phi ptr [ %a, %entry ], [ %ptr.iv.1.next, %loop ]
210+
%ptr.iv.2 = phi ptr [ %b, %entry ], [ %ptr.iv.2.next, %loop ]
211+
%l.0 = load double, ptr %ptr.iv.1, align 8
212+
store double %l.0, ptr %ptr.iv.2, align 8
213+
%gep.a.1 = getelementptr inbounds i8, ptr %ptr.iv.1, i64 8
214+
%l.1 = load double, ptr %gep.a.1, align 8
215+
%gep.b.1 = getelementptr inbounds i8, ptr %ptr.iv.2, i64 8
216+
store double %l.1, ptr %gep.b.1, align 8
217+
%ptr.iv.1.next = getelementptr inbounds i8, ptr %ptr.iv.1, i64 16
218+
%ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 16
219+
%ec = icmp eq ptr %ptr.iv.1.next, %end
220+
br i1 %ec, label %exit, label %loop
221+
222+
exit:
223+
ret void
224+
}
225+
226+
define void @narrow_with_uniform_add_and_gep(ptr noalias %p) {
227+
; VF2-LABEL: define void @narrow_with_uniform_add_and_gep(
228+
; VF2-SAME: ptr noalias [[P:%.*]]) {
229+
; VF2-NEXT: [[ENTRY:.*:]]
230+
; VF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
231+
; VF2: [[VECTOR_PH]]:
232+
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
233+
; VF2: [[VECTOR_BODY]]:
234+
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
235+
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
236+
; VF2-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 0
237+
; VF2-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP0]]
238+
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
239+
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
240+
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
241+
; VF2-NEXT: [[TMP2:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
242+
; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 1)
243+
; VF2-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
244+
; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
245+
; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
246+
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
247+
; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
248+
; VF2-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
249+
; VF2: [[MIDDLE_BLOCK]]:
250+
; VF2-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
251+
; VF2: [[SCALAR_PH]]:
252+
;
253+
; VF4-LABEL: define void @narrow_with_uniform_add_and_gep(
254+
; VF4-SAME: ptr noalias [[P:%.*]]) {
255+
; VF4-NEXT: [[ENTRY:.*:]]
256+
; VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
257+
; VF4: [[VECTOR_PH]]:
258+
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
259+
; VF4: [[VECTOR_BODY]]:
260+
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
261+
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
262+
; VF4-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 0
263+
; VF4-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP0]]
264+
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP1]], align 8
265+
; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
266+
; VF4-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
267+
; VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
268+
; VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 1)
269+
; VF4-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
270+
; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
271+
; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
272+
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
273+
; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
274+
; VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
275+
; VF4: [[MIDDLE_BLOCK]]:
276+
; VF4-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]]
277+
; VF4: [[SCALAR_PH]]:
278+
;
279+
entry:
280+
br label %loop
281+
282+
loop:
283+
%iv = phi i64 [0, %entry], [%iv.next, %loop]
284+
285+
%iv0 = add nuw nsw i64 %iv, 0
286+
%gep0 = getelementptr i64, ptr %p, i64 %iv0
287+
%load0 = load i64, ptr %gep0
288+
%add0 = add i64 %load0, 1
289+
store i64 %add0, ptr %gep0
290+
291+
%iv1 = add nuw nsw i64 %iv, 1
292+
%gep1 = getelementptr i64, ptr %p, i64 %iv1
293+
%load1 = load i64, ptr %gep1
294+
%add1 = add i64 %load1, 1
295+
store i64 %add1, ptr %gep1
296+
297+
%iv.next = add nuw nsw i64 %iv, 2
298+
%done = icmp eq i64 %iv.next, 1024
299+
br i1 %done, label %exit, label %loop
300+
301+
exit:
302+
ret void
303+
}

0 commit comments

Comments
 (0)