Skip to content

Commit 9a2d4d1

Browse files
authored
[SelectionDAG][AArch64] Legalize power of 2 vector.[de]interleaveN (#141513)
After #139893, we now have [de]interleave intrinsics for factors 2-8 inclusive, with the plan to eventually get the loop vectorizer to emit a single intrinsic for these factors instead of recursively deinterleaving (to support scalable non-power-of-2 factors and to remove the complexity in the interleaved access pass). AArch64 currently supports scalable interleaved groups of factors 2 and 4 from the loop vectorizer. For factor 4 this is currently emitted as a series of recursive [de]interleaves, and normally converted to a target intrinsic in the interleaved access pass. However if for some reason the interleaved access pass doesn't catch it, the [de]interleave4 intrinsic will need to be lowered by the backend. This patch legalizes the node and any other power-of-2 factor to smaller factors, so if a target can lower [de]interleave2 it should be able to handle this without crashing. Factor 3 will probably be more complicated to lower so I've left it out for now. We can disable it in the AArch64 cost model when implementing the loop vectorizer changes.
1 parent ddfeecf commit 9a2d4d1

File tree

4 files changed

+252
-1
lines changed

4 files changed

+252
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3472,6 +3472,59 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
34723472
Results.push_back(TLI.expandVectorSplice(Node, DAG));
34733473
break;
34743474
}
3475+
case ISD::VECTOR_DEINTERLEAVE: {
3476+
unsigned Factor = Node->getNumOperands();
3477+
if (Factor <= 2 || !isPowerOf2_32(Factor))
3478+
break;
3479+
SmallVector<SDValue, 8> Ops;
3480+
for (SDValue Op : Node->ops())
3481+
Ops.push_back(Op);
3482+
EVT VecVT = Node->getValueType(0);
3483+
SmallVector<EVT> HalfVTs(Factor / 2, VecVT);
3484+
// Deinterleave at Factor/2 so each result contains two factors interleaved:
3485+
// a0b0 c0d0 a1b1 c1d1 -> [a0c0 b0d0] [a1c1 b1d1]
3486+
SDValue L = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs,
3487+
ArrayRef(Ops).take_front(Factor / 2));
3488+
SDValue R = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs,
3489+
ArrayRef(Ops).take_back(Factor / 2));
3490+
Results.resize(Factor);
3491+
// Deinterleave the 2 factors out:
3492+
// [a0c0 a1c1] [b0d0 b1d1] -> a0a1 b0b1 c0c1 d0d1
3493+
for (unsigned I = 0; I < Factor / 2; I++) {
3494+
SDValue Deinterleave =
3495+
DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, {VecVT, VecVT},
3496+
{L.getValue(I), R.getValue(I)});
3497+
Results[I] = Deinterleave.getValue(0);
3498+
Results[I + Factor / 2] = Deinterleave.getValue(1);
3499+
}
3500+
break;
3501+
}
3502+
case ISD::VECTOR_INTERLEAVE: {
3503+
unsigned Factor = Node->getNumOperands();
3504+
if (Factor <= 2 || !isPowerOf2_32(Factor))
3505+
break;
3506+
EVT VecVT = Node->getValueType(0);
3507+
SmallVector<EVT> HalfVTs(Factor / 2, VecVT);
3508+
SmallVector<SDValue, 8> LOps, ROps;
3509+
// Interleave so we have 2 factors per result:
3510+
// a0a1 b0b1 c0c1 d0d1 -> [a0c0 b0d0] [a1c1 b1d1]
3511+
for (unsigned I = 0; I < Factor / 2; I++) {
3512+
SDValue Interleave =
3513+
DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, {VecVT, VecVT},
3514+
{Node->getOperand(I), Node->getOperand(I + Factor / 2)});
3515+
LOps.push_back(Interleave.getValue(0));
3516+
ROps.push_back(Interleave.getValue(1));
3517+
}
3518+
// Interleave at Factor/2:
3519+
// [a0c0 b0d0] [a1c1 b1d1] -> a0b0 c0d0 a1b1 c1d1
3520+
SDValue L = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, LOps);
3521+
SDValue R = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, ROps);
3522+
for (unsigned I = 0; I < Factor / 2; I++)
3523+
Results.push_back(L.getValue(I));
3524+
for (unsigned I = 0; I < Factor / 2; I++)
3525+
Results.push_back(R.getValue(I));
3526+
break;
3527+
}
34753528
case ISD::EXTRACT_ELEMENT: {
34763529
EVT OpTy = Node->getOperand(0).getValueType();
34773530
if (Node->getConstantOperandVal(1)) {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29208,6 +29208,10 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
2920829208
EVT OpVT = Op.getValueType();
2920929209
assert(OpVT.isScalableVector() &&
2921029210
"Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
29211+
29212+
if (Op->getNumOperands() != 2)
29213+
return SDValue();
29214+
2921129215
SDValue Even = DAG.getNode(AArch64ISD::UZP1, DL, OpVT, Op.getOperand(0),
2921229216
Op.getOperand(1));
2921329217
SDValue Odd = DAG.getNode(AArch64ISD::UZP2, DL, OpVT, Op.getOperand(0),
@@ -29222,6 +29226,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
2922229226
assert(OpVT.isScalableVector() &&
2922329227
"Expected scalable vector in LowerVECTOR_INTERLEAVE.");
2922429228

29229+
if (Op->getNumOperands() != 2)
29230+
return SDValue();
29231+
2922529232
SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0),
2922629233
Op.getOperand(1));
2922729234
SDValue Hi = DAG.getNode(AArch64ISD::ZIP2, DL, OpVT, Op.getOperand(0),

llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,102 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
151151
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
152152
}
153153

154+
define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv64i8(<vscale x 64 x i8> %vec) {
155+
; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
156+
; CHECK: // %bb.0:
157+
; CHECK-NEXT: uzp1 z4.b, z2.b, z3.b
158+
; CHECK-NEXT: uzp1 z5.b, z0.b, z1.b
159+
; CHECK-NEXT: uzp2 z3.b, z2.b, z3.b
160+
; CHECK-NEXT: uzp2 z6.b, z0.b, z1.b
161+
; CHECK-NEXT: uzp1 z0.b, z5.b, z4.b
162+
; CHECK-NEXT: uzp2 z2.b, z5.b, z4.b
163+
; CHECK-NEXT: uzp1 z1.b, z6.b, z3.b
164+
; CHECK-NEXT: uzp2 z3.b, z6.b, z3.b
165+
; CHECK-NEXT: ret
166+
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> %vec)
167+
ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %retval
168+
}
169+
170+
define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv32i16(<vscale x 32 x i16> %vec) {
171+
; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv32i16:
172+
; CHECK: // %bb.0:
173+
; CHECK-NEXT: uzp1 z4.h, z2.h, z3.h
174+
; CHECK-NEXT: uzp1 z5.h, z0.h, z1.h
175+
; CHECK-NEXT: uzp2 z3.h, z2.h, z3.h
176+
; CHECK-NEXT: uzp2 z6.h, z0.h, z1.h
177+
; CHECK-NEXT: uzp1 z0.h, z5.h, z4.h
178+
; CHECK-NEXT: uzp2 z2.h, z5.h, z4.h
179+
; CHECK-NEXT: uzp1 z1.h, z6.h, z3.h
180+
; CHECK-NEXT: uzp2 z3.h, z6.h, z3.h
181+
; CHECK-NEXT: ret
182+
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave4.nxv32i16(<vscale x 32 x i16> %vec)
183+
ret {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} %retval
184+
}
185+
186+
define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv16i32(<vscale x 16 x i32> %vec) {
187+
; CHECK-LABEL: vector_deinterleave_nxv4i32_nxv16i32:
188+
; CHECK: // %bb.0:
189+
; CHECK-NEXT: uzp1 z4.s, z2.s, z3.s
190+
; CHECK-NEXT: uzp1 z5.s, z0.s, z1.s
191+
; CHECK-NEXT: uzp2 z3.s, z2.s, z3.s
192+
; CHECK-NEXT: uzp2 z6.s, z0.s, z1.s
193+
; CHECK-NEXT: uzp1 z0.s, z5.s, z4.s
194+
; CHECK-NEXT: uzp2 z2.s, z5.s, z4.s
195+
; CHECK-NEXT: uzp1 z1.s, z6.s, z3.s
196+
; CHECK-NEXT: uzp2 z3.s, z6.s, z3.s
197+
; CHECK-NEXT: ret
198+
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %vec)
199+
ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %retval
200+
}
201+
202+
define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv8i64(<vscale x 8 x i64> %vec) {
203+
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv8i64:
204+
; CHECK: // %bb.0:
205+
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
206+
; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
207+
; CHECK-NEXT: uzp2 z3.d, z2.d, z3.d
208+
; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d
209+
; CHECK-NEXT: uzp1 z0.d, z5.d, z4.d
210+
; CHECK-NEXT: uzp2 z2.d, z5.d, z4.d
211+
; CHECK-NEXT: uzp1 z1.d, z6.d, z3.d
212+
; CHECK-NEXT: uzp2 z3.d, z6.d, z3.d
213+
; CHECK-NEXT: ret
214+
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave4.nxv8i64(<vscale x 8 x i64> %vec)
215+
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
216+
}
217+
218+
define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv16i64(<vscale x 16 x i64> %vec) {
219+
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv16i64:
220+
; CHECK: // %bb.0:
221+
; CHECK-NEXT: uzp1 z24.d, z6.d, z7.d
222+
; CHECK-NEXT: uzp1 z25.d, z4.d, z5.d
223+
; CHECK-NEXT: uzp1 z26.d, z2.d, z3.d
224+
; CHECK-NEXT: uzp1 z27.d, z0.d, z1.d
225+
; CHECK-NEXT: uzp2 z6.d, z6.d, z7.d
226+
; CHECK-NEXT: uzp2 z4.d, z4.d, z5.d
227+
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
228+
; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
229+
; CHECK-NEXT: uzp1 z5.d, z25.d, z24.d
230+
; CHECK-NEXT: uzp2 z24.d, z25.d, z24.d
231+
; CHECK-NEXT: uzp1 z7.d, z27.d, z26.d
232+
; CHECK-NEXT: uzp1 z28.d, z4.d, z6.d
233+
; CHECK-NEXT: uzp2 z25.d, z27.d, z26.d
234+
; CHECK-NEXT: uzp1 z29.d, z0.d, z2.d
235+
; CHECK-NEXT: uzp2 z26.d, z4.d, z6.d
236+
; CHECK-NEXT: uzp2 z27.d, z0.d, z2.d
237+
; CHECK-NEXT: uzp1 z0.d, z7.d, z5.d
238+
; CHECK-NEXT: uzp1 z2.d, z25.d, z24.d
239+
; CHECK-NEXT: uzp2 z4.d, z7.d, z5.d
240+
; CHECK-NEXT: uzp1 z1.d, z29.d, z28.d
241+
; CHECK-NEXT: uzp1 z3.d, z27.d, z26.d
242+
; CHECK-NEXT: uzp2 z5.d, z29.d, z28.d
243+
; CHECK-NEXT: uzp2 z6.d, z25.d, z24.d
244+
; CHECK-NEXT: uzp2 z7.d, z27.d, z26.d
245+
; CHECK-NEXT: ret
246+
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave8.nxv16i64(<vscale x 16 x i64> %vec)
247+
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
248+
}
249+
154250
; Predicated
155251
define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
156252
; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
@@ -279,7 +375,6 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv
279375
ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
280376
}
281377

282-
283378
; Floating declarations
284379
declare {<vscale x 2 x half>,<vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half>)
285380
declare {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half>)

llvm/test/CodeGen/AArch64/sve-vector-interleave.ll

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,102 @@ define <vscale x 4 x i64> @interleave2_nxv4i64(<vscale x 2 x i64> %vec0, <vscale
146146
ret <vscale x 4 x i64> %retval
147147
}
148148

149+
define <vscale x 64 x i8> @interleave4_nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3) {
150+
; CHECK-LABEL: interleave4_nxv16i8:
151+
; CHECK: // %bb.0:
152+
; CHECK-NEXT: zip1 z4.b, z1.b, z3.b
153+
; CHECK-NEXT: zip1 z5.b, z0.b, z2.b
154+
; CHECK-NEXT: zip2 z3.b, z1.b, z3.b
155+
; CHECK-NEXT: zip2 z6.b, z0.b, z2.b
156+
; CHECK-NEXT: zip1 z0.b, z5.b, z4.b
157+
; CHECK-NEXT: zip2 z1.b, z5.b, z4.b
158+
; CHECK-NEXT: zip1 z2.b, z6.b, z3.b
159+
; CHECK-NEXT: zip2 z3.b, z6.b, z3.b
160+
; CHECK-NEXT: ret
161+
%retval = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3)
162+
ret <vscale x 64 x i8> %retval
163+
}
164+
165+
define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3) {
166+
; CHECK-LABEL: interleave4_nxv8i16:
167+
; CHECK: // %bb.0:
168+
; CHECK-NEXT: zip1 z4.h, z1.h, z3.h
169+
; CHECK-NEXT: zip1 z5.h, z0.h, z2.h
170+
; CHECK-NEXT: zip2 z3.h, z1.h, z3.h
171+
; CHECK-NEXT: zip2 z6.h, z0.h, z2.h
172+
; CHECK-NEXT: zip1 z0.h, z5.h, z4.h
173+
; CHECK-NEXT: zip2 z1.h, z5.h, z4.h
174+
; CHECK-NEXT: zip1 z2.h, z6.h, z3.h
175+
; CHECK-NEXT: zip2 z3.h, z6.h, z3.h
176+
; CHECK-NEXT: ret
177+
%retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
178+
ret <vscale x 32 x i16> %retval
179+
}
180+
181+
define <vscale x 16 x i32> @interleave4_nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3) {
182+
; CHECK-LABEL: interleave4_nxv4i32:
183+
; CHECK: // %bb.0:
184+
; CHECK-NEXT: zip1 z4.s, z1.s, z3.s
185+
; CHECK-NEXT: zip1 z5.s, z0.s, z2.s
186+
; CHECK-NEXT: zip2 z3.s, z1.s, z3.s
187+
; CHECK-NEXT: zip2 z6.s, z0.s, z2.s
188+
; CHECK-NEXT: zip1 z0.s, z5.s, z4.s
189+
; CHECK-NEXT: zip2 z1.s, z5.s, z4.s
190+
; CHECK-NEXT: zip1 z2.s, z6.s, z3.s
191+
; CHECK-NEXT: zip2 z3.s, z6.s, z3.s
192+
; CHECK-NEXT: ret
193+
%retval = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3)
194+
ret <vscale x 16 x i32> %retval
195+
}
196+
197+
define <vscale x 8 x i64> @interleave4_nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3) {
198+
; CHECK-LABEL: interleave4_nxv8i64:
199+
; CHECK: // %bb.0:
200+
; CHECK-NEXT: zip1 z4.d, z1.d, z3.d
201+
; CHECK-NEXT: zip1 z5.d, z0.d, z2.d
202+
; CHECK-NEXT: zip2 z3.d, z1.d, z3.d
203+
; CHECK-NEXT: zip2 z6.d, z0.d, z2.d
204+
; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
205+
; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
206+
; CHECK-NEXT: zip1 z2.d, z6.d, z3.d
207+
; CHECK-NEXT: zip2 z3.d, z6.d, z3.d
208+
; CHECK-NEXT: ret
209+
%retval = call <vscale x 8 x i64> @llvm.vector.interleave4.nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3)
210+
ret <vscale x 8 x i64> %retval
211+
}
212+
213+
define <vscale x 16 x i64> @interleave8_nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7) {
214+
; CHECK-LABEL: interleave8_nxv16i64:
215+
; CHECK: // %bb.0:
216+
; CHECK-NEXT: zip1 z24.d, z3.d, z7.d
217+
; CHECK-NEXT: zip1 z25.d, z1.d, z5.d
218+
; CHECK-NEXT: zip1 z26.d, z2.d, z6.d
219+
; CHECK-NEXT: zip1 z27.d, z0.d, z4.d
220+
; CHECK-NEXT: zip2 z3.d, z3.d, z7.d
221+
; CHECK-NEXT: zip2 z1.d, z1.d, z5.d
222+
; CHECK-NEXT: zip2 z2.d, z2.d, z6.d
223+
; CHECK-NEXT: zip2 z0.d, z0.d, z4.d
224+
; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
225+
; CHECK-NEXT: zip2 z6.d, z25.d, z24.d
226+
; CHECK-NEXT: zip1 z5.d, z27.d, z26.d
227+
; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
228+
; CHECK-NEXT: zip1 z24.d, z1.d, z3.d
229+
; CHECK-NEXT: zip1 z25.d, z0.d, z2.d
230+
; CHECK-NEXT: zip2 z26.d, z1.d, z3.d
231+
; CHECK-NEXT: zip2 z27.d, z0.d, z2.d
232+
; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
233+
; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
234+
; CHECK-NEXT: zip1 z2.d, z7.d, z6.d
235+
; CHECK-NEXT: zip2 z3.d, z7.d, z6.d
236+
; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
237+
; CHECK-NEXT: zip2 z5.d, z25.d, z24.d
238+
; CHECK-NEXT: zip1 z6.d, z27.d, z26.d
239+
; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
240+
; CHECK-NEXT: ret
241+
%retval = call <vscale x 16 x i64> @llvm.vector.interleave8.nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7)
242+
ret <vscale x 16 x i64> %retval
243+
}
244+
149245
; Predicated
150246

151247
define <vscale x 32 x i1> @interleave2_nxv32i1(<vscale x 16 x i1> %vec0, <vscale x 16 x i1> %vec1) {

0 commit comments

Comments
 (0)