Skip to content

Commit 248bff6

Browse files
committed
[AArch64] Lower vector.[de]interleave4
1 parent 62fd4d1 commit 248bff6

File tree

3 files changed

+185
-1
lines changed

3 files changed

+185
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29441,6 +29441,35 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
2944129441
EVT OpVT = Op.getValueType();
2944229442
assert(OpVT.isScalableVector() &&
2944329443
"Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
29444+
assert(Op->getNumOperands() == 2 ||
29445+
Op->getNumOperands() == 4 && "Expected factor to be 2 or 4.");
29446+
29447+
// Deinterleave 'ab cd ac bd' as a series of factor 2 deinterleaves.
29448+
if (Op.getNumOperands() == 4) {
29449+
SDVTList VTList = DAG.getVTList({OpVT, OpVT});
29450+
// ac ac
29451+
SDNode *LHS0 = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTList,
29452+
Op.getOperand(0), Op.getOperand(1))
29453+
.getNode();
29454+
// bd bd
29455+
SDNode *RHS0 = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTList,
29456+
Op.getOperand(2), Op.getOperand(3))
29457+
.getNode();
29458+
// aa cc
29459+
SDNode *LHS1 = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTList,
29460+
SDValue(LHS0, 0), SDValue(RHS0, 0))
29461+
.getNode();
29462+
// bb dd
29463+
SDNode *RHS1 = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTList,
29464+
SDValue(LHS0, 1), SDValue(RHS0, 1))
29465+
.getNode();
29466+
29467+
// aa bb cc dd
29468+
return DAG.getMergeValues({SDValue(LHS1, 0), SDValue(RHS1, 0),
29469+
SDValue(LHS1, 1), SDValue(RHS1, 1)},
29470+
DL);
29471+
}
29472+
2944429473
SDValue Even = DAG.getNode(AArch64ISD::UZP1, DL, OpVT, Op.getOperand(0),
2944529474
Op.getOperand(1));
2944629475
SDValue Odd = DAG.getNode(AArch64ISD::UZP2, DL, OpVT, Op.getOperand(0),
@@ -29454,6 +29483,34 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
2945429483
EVT OpVT = Op.getValueType();
2945529484
assert(OpVT.isScalableVector() &&
2945629485
"Expected scalable vector in LowerVECTOR_INTERLEAVE.");
29486+
assert(Op->getNumOperands() == 2 ||
29487+
Op->getNumOperands() == 4 && "Expected factor to be 2 or 4.");
29488+
29489+
// Interleave 'aa bb cc dd' as a series of factor 2 interleaves.
29490+
if (Op.getNumOperands() == 4) {
29491+
SDVTList VTList = DAG.getVTList({OpVT, OpVT});
29492+
// ac ac
29493+
SDNode *LHS0 = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTList,
29494+
Op.getOperand(0), Op.getOperand(2))
29495+
.getNode();
29496+
// bd bd
29497+
SDNode *RHS0 = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTList,
29498+
Op.getOperand(1), Op.getOperand(3))
29499+
.getNode();
29500+
// ab cd
29501+
SDNode *LHS1 = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTList,
29502+
SDValue(LHS0, 0), SDValue(RHS0, 0))
29503+
.getNode();
29504+
// ab cd
29505+
SDNode *RHS1 = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTList,
29506+
SDValue(LHS0, 1), SDValue(RHS0, 1))
29507+
.getNode();
29508+
29509+
// ab cd ab cd
29510+
return DAG.getMergeValues({SDValue(LHS1, 0), SDValue(LHS1, 1),
29511+
SDValue(RHS1, 0), SDValue(RHS1, 1)},
29512+
DL);
29513+
}
2945729514

2945829515
SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0),
2945929516
Op.getOperand(1));

llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,70 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv
151151
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
152152
}
153153

154+
define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv64i8(<vscale x 64 x i8> %vec) {
155+
; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
156+
; CHECK: // %bb.0:
157+
; CHECK-NEXT: uzp1 z4.b, z2.b, z3.b
158+
; CHECK-NEXT: uzp1 z5.b, z0.b, z1.b
159+
; CHECK-NEXT: uzp2 z3.b, z2.b, z3.b
160+
; CHECK-NEXT: uzp2 z6.b, z0.b, z1.b
161+
; CHECK-NEXT: uzp1 z0.b, z5.b, z4.b
162+
; CHECK-NEXT: uzp2 z2.b, z5.b, z4.b
163+
; CHECK-NEXT: uzp1 z1.b, z6.b, z3.b
164+
; CHECK-NEXT: uzp2 z3.b, z6.b, z3.b
165+
; CHECK-NEXT: ret
166+
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> %vec)
167+
ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %retval
168+
}
169+
170+
define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv32i16(<vscale x 32 x i16> %vec) {
171+
; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv32i16:
172+
; CHECK: // %bb.0:
173+
; CHECK-NEXT: uzp1 z4.h, z2.h, z3.h
174+
; CHECK-NEXT: uzp1 z5.h, z0.h, z1.h
175+
; CHECK-NEXT: uzp2 z3.h, z2.h, z3.h
176+
; CHECK-NEXT: uzp2 z6.h, z0.h, z1.h
177+
; CHECK-NEXT: uzp1 z0.h, z5.h, z4.h
178+
; CHECK-NEXT: uzp2 z2.h, z5.h, z4.h
179+
; CHECK-NEXT: uzp1 z1.h, z6.h, z3.h
180+
; CHECK-NEXT: uzp2 z3.h, z6.h, z3.h
181+
; CHECK-NEXT: ret
182+
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave4.nxv32i16(<vscale x 32 x i16> %vec)
183+
ret {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} %retval
184+
}
185+
186+
define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv16i32(<vscale x 16 x i32> %vec) {
187+
; CHECK-LABEL: vector_deinterleave_nxv4i32_nxv16i32:
188+
; CHECK: // %bb.0:
189+
; CHECK-NEXT: uzp1 z4.s, z2.s, z3.s
190+
; CHECK-NEXT: uzp1 z5.s, z0.s, z1.s
191+
; CHECK-NEXT: uzp2 z3.s, z2.s, z3.s
192+
; CHECK-NEXT: uzp2 z6.s, z0.s, z1.s
193+
; CHECK-NEXT: uzp1 z0.s, z5.s, z4.s
194+
; CHECK-NEXT: uzp2 z2.s, z5.s, z4.s
195+
; CHECK-NEXT: uzp1 z1.s, z6.s, z3.s
196+
; CHECK-NEXT: uzp2 z3.s, z6.s, z3.s
197+
; CHECK-NEXT: ret
198+
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %vec)
199+
ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %retval
200+
}
201+
202+
define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv8i64(<vscale x 8 x i64> %vec) {
203+
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv8i64:
204+
; CHECK: // %bb.0:
205+
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
206+
; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
207+
; CHECK-NEXT: uzp2 z3.d, z2.d, z3.d
208+
; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d
209+
; CHECK-NEXT: uzp1 z0.d, z5.d, z4.d
210+
; CHECK-NEXT: uzp2 z2.d, z5.d, z4.d
211+
; CHECK-NEXT: uzp1 z1.d, z6.d, z3.d
212+
; CHECK-NEXT: uzp2 z3.d, z6.d, z3.d
213+
; CHECK-NEXT: ret
214+
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave4.nxv8i64(<vscale x 8 x i64> %vec)
215+
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
216+
}
217+
154218
; Predicated
155219
define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
156220
; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
@@ -279,7 +343,6 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv
279343
ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
280344
}
281345

282-
283346
; Floating declarations
284347
declare {<vscale x 2 x half>,<vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half>)
285348
declare {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half>)

llvm/test/CodeGen/AArch64/sve-vector-interleave.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,70 @@ define <vscale x 4 x i64> @interleave2_nxv4i64(<vscale x 2 x i64> %vec0, <vscale
146146
ret <vscale x 4 x i64> %retval
147147
}
148148

149+
define <vscale x 64 x i8> @interleave4_nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3) {
150+
; CHECK-LABEL: interleave4_nxv16i8:
151+
; CHECK: // %bb.0:
152+
; CHECK-NEXT: zip1 z4.b, z1.b, z3.b
153+
; CHECK-NEXT: zip1 z5.b, z0.b, z2.b
154+
; CHECK-NEXT: zip2 z3.b, z1.b, z3.b
155+
; CHECK-NEXT: zip2 z6.b, z0.b, z2.b
156+
; CHECK-NEXT: zip1 z0.b, z5.b, z4.b
157+
; CHECK-NEXT: zip2 z1.b, z5.b, z4.b
158+
; CHECK-NEXT: zip1 z2.b, z6.b, z3.b
159+
; CHECK-NEXT: zip2 z3.b, z6.b, z3.b
160+
; CHECK-NEXT: ret
161+
%retval = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3)
162+
ret <vscale x 64 x i8> %retval
163+
}
164+
165+
define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3) {
166+
; CHECK-LABEL: interleave4_nxv8i16:
167+
; CHECK: // %bb.0:
168+
; CHECK-NEXT: zip1 z4.h, z1.h, z3.h
169+
; CHECK-NEXT: zip1 z5.h, z0.h, z2.h
170+
; CHECK-NEXT: zip2 z3.h, z1.h, z3.h
171+
; CHECK-NEXT: zip2 z6.h, z0.h, z2.h
172+
; CHECK-NEXT: zip1 z0.h, z5.h, z4.h
173+
; CHECK-NEXT: zip2 z1.h, z5.h, z4.h
174+
; CHECK-NEXT: zip1 z2.h, z6.h, z3.h
175+
; CHECK-NEXT: zip2 z3.h, z6.h, z3.h
176+
; CHECK-NEXT: ret
177+
%retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
178+
ret <vscale x 32 x i16> %retval
179+
}
180+
181+
define <vscale x 16 x i32> @interleave4_nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3) {
182+
; CHECK-LABEL: interleave4_nxv4i32:
183+
; CHECK: // %bb.0:
184+
; CHECK-NEXT: zip1 z4.s, z1.s, z3.s
185+
; CHECK-NEXT: zip1 z5.s, z0.s, z2.s
186+
; CHECK-NEXT: zip2 z3.s, z1.s, z3.s
187+
; CHECK-NEXT: zip2 z6.s, z0.s, z2.s
188+
; CHECK-NEXT: zip1 z0.s, z5.s, z4.s
189+
; CHECK-NEXT: zip2 z1.s, z5.s, z4.s
190+
; CHECK-NEXT: zip1 z2.s, z6.s, z3.s
191+
; CHECK-NEXT: zip2 z3.s, z6.s, z3.s
192+
; CHECK-NEXT: ret
193+
%retval = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3)
194+
ret <vscale x 16 x i32> %retval
195+
}
196+
197+
define <vscale x 8 x i64> @interleave4_nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3) {
198+
; CHECK-LABEL: interleave4_nxv8i64:
199+
; CHECK: // %bb.0:
200+
; CHECK-NEXT: zip1 z4.d, z1.d, z3.d
201+
; CHECK-NEXT: zip1 z5.d, z0.d, z2.d
202+
; CHECK-NEXT: zip2 z3.d, z1.d, z3.d
203+
; CHECK-NEXT: zip2 z6.d, z0.d, z2.d
204+
; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
205+
; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
206+
; CHECK-NEXT: zip1 z2.d, z6.d, z3.d
207+
; CHECK-NEXT: zip2 z3.d, z6.d, z3.d
208+
; CHECK-NEXT: ret
209+
%retval = call <vscale x 8 x i64> @llvm.vector.interleave4.nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3)
210+
ret <vscale x 8 x i64> %retval
211+
}
212+
149213
; Predicated
150214

151215
define <vscale x 32 x i1> @interleave2_nxv32i1(<vscale x 16 x i1> %vec0, <vscale x 16 x i1> %vec1) {

0 commit comments

Comments
 (0)