You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[SelectionDAG][AArch64] Legalize power of 2 vector.[de]interleaveN (#141513)
After #139893, we now have
[de]interleave intrinsics for factors 2-8 inclusive, with the plan to
eventually get the loop vectorizer to emit a single intrinsic for these
factors instead of recursively deinterleaving (to support scalable
non-power-of-2 factors and to remove the complexity in the interleaved
access pass).
AArch64 currently supports scalable interleaved groups of factors 2 and
4 from the loop vectorizer. For factor 4 this is currently emitted as a
series of recursive [de]interleaves, and normally converted to a target
intrinsic in the interleaved access pass.
However if for some reason the interleaved access pass doesn't catch it,
the [de]interleave4 intrinsic will need to be lowered by the backend.
This patch legalizes the node and any other power-of-2 factor to smaller
factors, so if a target can lower [de]interleave2 it should be able to
handle this without crashing.
Factor 3 will probably be more complicated to lower so I've left it out
for now. We can disable it in the AArch64 cost model when implementing
the loop vectorizer changes.
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
+96-1Lines changed: 96 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -151,6 +151,102 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
151
151
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
152
152
}
153
153
154
+
define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv64i8(<vscale x 64 x i8> %vec) {
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> %vec)
167
+
ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %retval
168
+
}
169
+
170
+
define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv32i16(<vscale x 32 x i16> %vec) {
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave4.nxv32i16(<vscale x 32 x i16> %vec)
183
+
ret {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} %retval
184
+
}
185
+
186
+
define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv16i32(<vscale x 16 x i32> %vec) {
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %vec)
199
+
ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %retval
200
+
}
201
+
202
+
define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv8i64(<vscale x 8 x i64> %vec) {
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave4.nxv8i64(<vscale x 8 x i64> %vec)
215
+
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
216
+
}
217
+
218
+
define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv16i64(<vscale x 16 x i64> %vec) {
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave8.nxv16i64(<vscale x 16 x i64> %vec)
247
+
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
248
+
}
249
+
154
250
; Predicated
155
251
define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
+96Lines changed: 96 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -146,6 +146,102 @@ define <vscale x 4 x i64> @interleave2_nxv4i64(<vscale x 2 x i64> %vec0, <vscale
146
146
ret <vscale x 4 x i64> %retval
147
147
}
148
148
149
+
define <vscale x 64 x i8> @interleave4_nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3) {
150
+
; CHECK-LABEL: interleave4_nxv16i8:
151
+
; CHECK: // %bb.0:
152
+
; CHECK-NEXT: zip1 z4.b, z1.b, z3.b
153
+
; CHECK-NEXT: zip1 z5.b, z0.b, z2.b
154
+
; CHECK-NEXT: zip2 z3.b, z1.b, z3.b
155
+
; CHECK-NEXT: zip2 z6.b, z0.b, z2.b
156
+
; CHECK-NEXT: zip1 z0.b, z5.b, z4.b
157
+
; CHECK-NEXT: zip2 z1.b, z5.b, z4.b
158
+
; CHECK-NEXT: zip1 z2.b, z6.b, z3.b
159
+
; CHECK-NEXT: zip2 z3.b, z6.b, z3.b
160
+
; CHECK-NEXT: ret
161
+
%retval = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3)
162
+
ret <vscale x 64 x i8> %retval
163
+
}
164
+
165
+
define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3) {
166
+
; CHECK-LABEL: interleave4_nxv8i16:
167
+
; CHECK: // %bb.0:
168
+
; CHECK-NEXT: zip1 z4.h, z1.h, z3.h
169
+
; CHECK-NEXT: zip1 z5.h, z0.h, z2.h
170
+
; CHECK-NEXT: zip2 z3.h, z1.h, z3.h
171
+
; CHECK-NEXT: zip2 z6.h, z0.h, z2.h
172
+
; CHECK-NEXT: zip1 z0.h, z5.h, z4.h
173
+
; CHECK-NEXT: zip2 z1.h, z5.h, z4.h
174
+
; CHECK-NEXT: zip1 z2.h, z6.h, z3.h
175
+
; CHECK-NEXT: zip2 z3.h, z6.h, z3.h
176
+
; CHECK-NEXT: ret
177
+
%retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
178
+
ret <vscale x 32 x i16> %retval
179
+
}
180
+
181
+
define <vscale x 16 x i32> @interleave4_nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3) {
182
+
; CHECK-LABEL: interleave4_nxv4i32:
183
+
; CHECK: // %bb.0:
184
+
; CHECK-NEXT: zip1 z4.s, z1.s, z3.s
185
+
; CHECK-NEXT: zip1 z5.s, z0.s, z2.s
186
+
; CHECK-NEXT: zip2 z3.s, z1.s, z3.s
187
+
; CHECK-NEXT: zip2 z6.s, z0.s, z2.s
188
+
; CHECK-NEXT: zip1 z0.s, z5.s, z4.s
189
+
; CHECK-NEXT: zip2 z1.s, z5.s, z4.s
190
+
; CHECK-NEXT: zip1 z2.s, z6.s, z3.s
191
+
; CHECK-NEXT: zip2 z3.s, z6.s, z3.s
192
+
; CHECK-NEXT: ret
193
+
%retval = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3)
194
+
ret <vscale x 16 x i32> %retval
195
+
}
196
+
197
+
define <vscale x 8 x i64> @interleave4_nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3) {
198
+
; CHECK-LABEL: interleave4_nxv8i64:
199
+
; CHECK: // %bb.0:
200
+
; CHECK-NEXT: zip1 z4.d, z1.d, z3.d
201
+
; CHECK-NEXT: zip1 z5.d, z0.d, z2.d
202
+
; CHECK-NEXT: zip2 z3.d, z1.d, z3.d
203
+
; CHECK-NEXT: zip2 z6.d, z0.d, z2.d
204
+
; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
205
+
; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
206
+
; CHECK-NEXT: zip1 z2.d, z6.d, z3.d
207
+
; CHECK-NEXT: zip2 z3.d, z6.d, z3.d
208
+
; CHECK-NEXT: ret
209
+
%retval = call <vscale x 8 x i64> @llvm.vector.interleave4.nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3)
210
+
ret <vscale x 8 x i64> %retval
211
+
}
212
+
213
+
define <vscale x 16 x i64> @interleave8_nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7) {
214
+
; CHECK-LABEL: interleave8_nxv16i64:
215
+
; CHECK: // %bb.0:
216
+
; CHECK-NEXT: zip1 z24.d, z3.d, z7.d
217
+
; CHECK-NEXT: zip1 z25.d, z1.d, z5.d
218
+
; CHECK-NEXT: zip1 z26.d, z2.d, z6.d
219
+
; CHECK-NEXT: zip1 z27.d, z0.d, z4.d
220
+
; CHECK-NEXT: zip2 z3.d, z3.d, z7.d
221
+
; CHECK-NEXT: zip2 z1.d, z1.d, z5.d
222
+
; CHECK-NEXT: zip2 z2.d, z2.d, z6.d
223
+
; CHECK-NEXT: zip2 z0.d, z0.d, z4.d
224
+
; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
225
+
; CHECK-NEXT: zip2 z6.d, z25.d, z24.d
226
+
; CHECK-NEXT: zip1 z5.d, z27.d, z26.d
227
+
; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
228
+
; CHECK-NEXT: zip1 z24.d, z1.d, z3.d
229
+
; CHECK-NEXT: zip1 z25.d, z0.d, z2.d
230
+
; CHECK-NEXT: zip2 z26.d, z1.d, z3.d
231
+
; CHECK-NEXT: zip2 z27.d, z0.d, z2.d
232
+
; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
233
+
; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
234
+
; CHECK-NEXT: zip1 z2.d, z7.d, z6.d
235
+
; CHECK-NEXT: zip2 z3.d, z7.d, z6.d
236
+
; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
237
+
; CHECK-NEXT: zip2 z5.d, z25.d, z24.d
238
+
; CHECK-NEXT: zip1 z6.d, z27.d, z26.d
239
+
; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
240
+
; CHECK-NEXT: ret
241
+
%retval = call <vscale x 16 x i64> @llvm.vector.interleave8.nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7)
242
+
ret <vscale x 16 x i64> %retval
243
+
}
244
+
149
245
; Predicated
150
246
151
247
define <vscale x 32 x i1> @interleave2_nxv32i1(<vscale x 16 x i1> %vec0, <vscale x 16 x i1> %vec1) {
0 commit comments