Skip to content

Commit c2e329d

Browse files
committed
Move to LegalizeDAG and handle arbitrary pow-of-2 factors
1 parent 248bff6 commit c2e329d

File tree

4 files changed

+126
-56
lines changed

4 files changed

+126
-56
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3558,6 +3558,62 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
35583558
Results.push_back(TLI.expandVectorSplice(Node, DAG));
35593559
break;
35603560
}
3561+
case ISD::VECTOR_DEINTERLEAVE: {
3562+
unsigned Factor = Node->getNumOperands();
3563+
if (Factor <= 2 || !isPowerOf2_32(Factor))
3564+
break;
3565+
SmallVector<SDValue, 8> Ops;
3566+
for (SDValue Op : Node->ops())
3567+
Ops.push_back(Op);
3568+
EVT VecVT = Node->getValueType(0);
3569+
SmallVector<EVT> HalfVTs(Factor / 2, VecVT);
3570+
// Deinterleave at Factor/2 so each result contains two factors interleaved:
3571+
// ab cd ab cd -> [ac bd] [ac bd]
3572+
SDValue L = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs,
3573+
ArrayRef(Ops).take_front(Factor / 2));
3574+
SDValue R = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, HalfVTs,
3575+
ArrayRef(Ops).take_back(Factor / 2));
3576+
Results.resize(Factor);
3577+
// Deinterleave the 2 factors out:
3578+
// [ac ac] [bd bd] -> aa bb cc dd
3579+
for (unsigned I = 0; I < Factor / 2; I++) {
3580+
SDValue Deinterleave =
3581+
DAG.getNode(ISD::VECTOR_DEINTERLEAVE, dl, {VecVT, VecVT},
3582+
{L.getValue(I), R.getValue(I)});
3583+
Results[I] = Deinterleave.getValue(0);
3584+
Results[I + Factor / 2] = Deinterleave.getValue(1);
3585+
}
3586+
break;
3587+
}
3588+
case ISD::VECTOR_INTERLEAVE: {
3589+
unsigned Factor = Node->getNumOperands();
3590+
if (Factor <= 2 || !isPowerOf2_32(Factor))
3591+
break;
3592+
SmallVector<SDValue, 8> Ops;
3593+
for (SDValue Op : Node->ops())
3594+
Ops.push_back(Op);
3595+
EVT VecVT = Node->getValueType(0);
3596+
SmallVector<EVT> HalfVTs(Factor / 2, VecVT);
3597+
SmallVector<SDValue, 8> LOps, ROps;
3598+
// Interleave so we have 2 factors per result:
3599+
// aa bb cc dd -> [ac bd] [ac bd]
3600+
for (unsigned I = 0; I < Factor / 2; I++) {
3601+
SDValue Interleave =
3602+
DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, {VecVT, VecVT},
3603+
{Ops[I], Ops[I + Factor / 2]});
3604+
LOps.push_back(Interleave.getValue(0));
3605+
ROps.push_back(Interleave.getValue(1));
3606+
}
3607+
// Interleave at Factor/2:
3608+
// [ac bd] [ac bd] -> ab cd ab cd
3609+
SDValue L = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, LOps);
3610+
SDValue R = DAG.getNode(ISD::VECTOR_INTERLEAVE, dl, HalfVTs, ROps);
3611+
for (unsigned I = 0; I < Factor / 2; I++)
3612+
Results.push_back(L.getValue(I));
3613+
for (unsigned I = 0; I < Factor / 2; I++)
3614+
Results.push_back(R.getValue(I));
3615+
break;
3616+
}
35613617
case ISD::EXTRACT_ELEMENT: {
35623618
EVT OpTy = Node->getOperand(0).getValueType();
35633619
if (Node->getConstantOperandVal(1)) {

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -29441,34 +29441,9 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
2944129441
EVT OpVT = Op.getValueType();
2944229442
assert(OpVT.isScalableVector() &&
2944329443
"Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
29444-
assert(Op->getNumOperands() == 2 ||
29445-
Op->getNumOperands() == 4 && "Expected factor to be 2 or 4.");
29446-
29447-
// Deinterleave 'ab cd ac bd' as a series of factor 2 deinterleaves.
29448-
if (Op.getNumOperands() == 4) {
29449-
SDVTList VTList = DAG.getVTList({OpVT, OpVT});
29450-
// ac ac
29451-
SDNode *LHS0 = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTList,
29452-
Op.getOperand(0), Op.getOperand(1))
29453-
.getNode();
29454-
// bd bd
29455-
SDNode *RHS0 = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTList,
29456-
Op.getOperand(2), Op.getOperand(3))
29457-
.getNode();
29458-
// aa cc
29459-
SDNode *LHS1 = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTList,
29460-
SDValue(LHS0, 0), SDValue(RHS0, 0))
29461-
.getNode();
29462-
// bb dd
29463-
SDNode *RHS1 = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTList,
29464-
SDValue(LHS0, 1), SDValue(RHS0, 1))
29465-
.getNode();
29466-
29467-
// aa bb cc dd
29468-
return DAG.getMergeValues({SDValue(LHS1, 0), SDValue(RHS1, 0),
29469-
SDValue(LHS1, 1), SDValue(RHS1, 1)},
29470-
DL);
29471-
}
29444+
29445+
if (Op->getNumOperands() != 2)
29446+
return SDValue();
2947229447

2947329448
SDValue Even = DAG.getNode(AArch64ISD::UZP1, DL, OpVT, Op.getOperand(0),
2947429449
Op.getOperand(1));
@@ -29483,34 +29458,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
2948329458
EVT OpVT = Op.getValueType();
2948429459
assert(OpVT.isScalableVector() &&
2948529460
"Expected scalable vector in LowerVECTOR_INTERLEAVE.");
29486-
assert(Op->getNumOperands() == 2 ||
29487-
Op->getNumOperands() == 4 && "Expected factor to be 2 or 4.");
29488-
29489-
// Interleave 'aa bb cc dd' as a series of factor 2 interleaves.
29490-
if (Op.getNumOperands() == 4) {
29491-
SDVTList VTList = DAG.getVTList({OpVT, OpVT});
29492-
// ac ac
29493-
SDNode *LHS0 = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTList,
29494-
Op.getOperand(0), Op.getOperand(2))
29495-
.getNode();
29496-
// bd bd
29497-
SDNode *RHS0 = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTList,
29498-
Op.getOperand(1), Op.getOperand(3))
29499-
.getNode();
29500-
// ab cd
29501-
SDNode *LHS1 = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTList,
29502-
SDValue(LHS0, 0), SDValue(RHS0, 0))
29503-
.getNode();
29504-
// ab cd
29505-
SDNode *RHS1 = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTList,
29506-
SDValue(LHS0, 1), SDValue(RHS0, 1))
29507-
.getNode();
29508-
29509-
// ab cd ab cd
29510-
return DAG.getMergeValues({SDValue(LHS1, 0), SDValue(LHS1, 1),
29511-
SDValue(RHS1, 0), SDValue(RHS1, 1)},
29512-
DL);
29513-
}
29461+
29462+
if (Op->getNumOperands() != 2)
29463+
return SDValue();
2951429464

2951529465
SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0),
2951629466
Op.getOperand(1));

llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,38 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
215215
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
216216
}
217217

218+
define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv16i64(<vscale x 16 x i64> %vec) {
219+
; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv16i64:
220+
; CHECK: // %bb.0:
221+
; CHECK-NEXT: uzp1 z24.d, z6.d, z7.d
222+
; CHECK-NEXT: uzp1 z25.d, z4.d, z5.d
223+
; CHECK-NEXT: uzp1 z26.d, z2.d, z3.d
224+
; CHECK-NEXT: uzp1 z27.d, z0.d, z1.d
225+
; CHECK-NEXT: uzp2 z6.d, z6.d, z7.d
226+
; CHECK-NEXT: uzp2 z4.d, z4.d, z5.d
227+
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
228+
; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
229+
; CHECK-NEXT: uzp1 z5.d, z25.d, z24.d
230+
; CHECK-NEXT: uzp2 z24.d, z25.d, z24.d
231+
; CHECK-NEXT: uzp1 z7.d, z27.d, z26.d
232+
; CHECK-NEXT: uzp1 z28.d, z4.d, z6.d
233+
; CHECK-NEXT: uzp2 z25.d, z27.d, z26.d
234+
; CHECK-NEXT: uzp1 z29.d, z0.d, z2.d
235+
; CHECK-NEXT: uzp2 z26.d, z4.d, z6.d
236+
; CHECK-NEXT: uzp2 z27.d, z0.d, z2.d
237+
; CHECK-NEXT: uzp1 z0.d, z7.d, z5.d
238+
; CHECK-NEXT: uzp1 z2.d, z25.d, z24.d
239+
; CHECK-NEXT: uzp2 z4.d, z7.d, z5.d
240+
; CHECK-NEXT: uzp1 z1.d, z29.d, z28.d
241+
; CHECK-NEXT: uzp1 z3.d, z27.d, z26.d
242+
; CHECK-NEXT: uzp2 z5.d, z29.d, z28.d
243+
; CHECK-NEXT: uzp2 z6.d, z25.d, z24.d
244+
; CHECK-NEXT: uzp2 z7.d, z27.d, z26.d
245+
; CHECK-NEXT: ret
246+
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave8.nxv16i64(<vscale x 16 x i64> %vec)
247+
ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
248+
}
249+
218250
; Predicated
219251
define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
220252
; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:

llvm/test/CodeGen/AArch64/sve-vector-interleave.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,38 @@ define <vscale x 8 x i64> @interleave4_nxv8i64(<vscale x 2 x i64> %vec0, <vscale
210210
ret <vscale x 8 x i64> %retval
211211
}
212212

213+
define <vscale x 16 x i64> @interleave8_nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7) {
214+
; CHECK-LABEL: interleave8_nxv16i64:
215+
; CHECK: // %bb.0:
216+
; CHECK-NEXT: zip1 z24.d, z3.d, z7.d
217+
; CHECK-NEXT: zip1 z25.d, z1.d, z5.d
218+
; CHECK-NEXT: zip1 z26.d, z2.d, z6.d
219+
; CHECK-NEXT: zip1 z27.d, z0.d, z4.d
220+
; CHECK-NEXT: zip2 z3.d, z3.d, z7.d
221+
; CHECK-NEXT: zip2 z1.d, z1.d, z5.d
222+
; CHECK-NEXT: zip2 z2.d, z2.d, z6.d
223+
; CHECK-NEXT: zip2 z0.d, z0.d, z4.d
224+
; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
225+
; CHECK-NEXT: zip2 z6.d, z25.d, z24.d
226+
; CHECK-NEXT: zip1 z5.d, z27.d, z26.d
227+
; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
228+
; CHECK-NEXT: zip1 z24.d, z1.d, z3.d
229+
; CHECK-NEXT: zip1 z25.d, z0.d, z2.d
230+
; CHECK-NEXT: zip2 z26.d, z1.d, z3.d
231+
; CHECK-NEXT: zip2 z27.d, z0.d, z2.d
232+
; CHECK-NEXT: zip1 z0.d, z5.d, z4.d
233+
; CHECK-NEXT: zip2 z1.d, z5.d, z4.d
234+
; CHECK-NEXT: zip1 z2.d, z7.d, z6.d
235+
; CHECK-NEXT: zip2 z3.d, z7.d, z6.d
236+
; CHECK-NEXT: zip1 z4.d, z25.d, z24.d
237+
; CHECK-NEXT: zip2 z5.d, z25.d, z24.d
238+
; CHECK-NEXT: zip1 z6.d, z27.d, z26.d
239+
; CHECK-NEXT: zip2 z7.d, z27.d, z26.d
240+
; CHECK-NEXT: ret
241+
%retval = call <vscale x 16 x i64> @llvm.vector.interleave8.nxv16i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3, <vscale x 2 x i64> %vec4, <vscale x 2 x i64> %vec5, <vscale x 2 x i64> %vec6, <vscale x 2 x i64> %vec7)
242+
ret <vscale x 16 x i64> %retval
243+
}
244+
213245
; Predicated
214246

215247
define <vscale x 32 x i1> @interleave2_nxv32i1(<vscale x 16 x i1> %vec0, <vscale x 16 x i1> %vec1) {

0 commit comments

Comments
 (0)