Skip to content

Commit 4dbf67d

Browse files
authored
[RISCV] Lower SEW<=32 vector_deinterleave(2) via vunzip2{a,b} (#136463)
This is a continuation from 22d5890c and adds the neccessary logic to handle SEW!=64 profitably. The interesting case is needing to handle e.g. a single m1 which is split via extract_subvector into two operands, and form that back into a single m1 operation - instead of letting the vslidedown by vlenb/Constant sequence be generated. This is analogous to the getSingleShuffleSrc for vnsrl, and we can share a bunch of code.
1 parent 928c333 commit 4dbf67d

File tree

2 files changed

+429
-202
lines changed

2 files changed

+429
-202
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4578,36 +4578,48 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
45784578
VL);
45794579
}
45804580

4581-
// Can this shuffle be performed on exactly one (possibly larger) input?
4582-
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
4583-
4584-
if (V2.isUndef())
4585-
return V1;
4586-
4581+
/// If concat_vector(V1,V2) could be folded away to some existing
4582+
/// vector source, return it. Note that the source may be larger
4583+
/// than the requested concat_vector (i.e. a extract_subvector
4584+
/// might be required.)
4585+
static SDValue foldConcatVector(SDValue V1, SDValue V2) {
4586+
EVT VT = V1.getValueType();
4587+
assert(VT == V2.getValueType() && "argument types must match");
45874588
// Both input must be extracts.
45884589
if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
45894590
V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
45904591
return SDValue();
45914592

45924593
// Extracting from the same source.
45934594
SDValue Src = V1.getOperand(0);
4594-
if (Src != V2.getOperand(0))
4595-
return SDValue();
4596-
4597-
// Src needs to have twice the number of elements.
4598-
unsigned NumElts = VT.getVectorNumElements();
4599-
if (!Src.getValueType().isFixedLengthVector() ||
4600-
Src.getValueType().getVectorNumElements() != (NumElts * 2))
4595+
if (Src != V2.getOperand(0) ||
4596+
VT.isScalableVector() != Src.getValueType().isScalableVector())
46014597
return SDValue();
46024598

46034599
// The extracts must extract the two halves of the source.
46044600
if (V1.getConstantOperandVal(1) != 0 ||
4605-
V2.getConstantOperandVal(1) != NumElts)
4601+
V2.getConstantOperandVal(1) != VT.getVectorMinNumElements())
46064602
return SDValue();
46074603

46084604
return Src;
46094605
}
46104606

4607+
// Can this shuffle be performed on exactly one (possibly larger) input?
4608+
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
4609+
4610+
if (V2.isUndef())
4611+
return V1;
4612+
4613+
unsigned NumElts = VT.getVectorNumElements();
4614+
// Src needs to have twice the number of elements.
4615+
// TODO: Update shuffle lowering to add the extract subvector
4616+
if (SDValue Src = foldConcatVector(V1, V2);
4617+
Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4618+
return Src;
4619+
4620+
return SDValue();
4621+
}
4622+
46114623
/// Is this shuffle interleaving contiguous elements from one vector into the
46124624
/// even elements and contiguous elements from another vector into the odd
46134625
/// elements. \p EvenSrc will contain the element that should be in the first
@@ -11519,12 +11531,27 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
1151911531
return DAG.getMergeValues(Res, DL);
1152011532
}
1152111533

11522-
// TODO: Remove the e64 restriction once the fractional LMUL lowering
11523-
// is improved to always beat the vnsrl lowering below.
11524-
if (Subtarget.hasVendorXRivosVizip() && Factor == 2 &&
11525-
VecVT.getVectorElementType().getSizeInBits() == 64) {
11534+
if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
11535+
MVT VT = Op->getSimpleValueType(0);
1152611536
SDValue V1 = Op->getOperand(0);
1152711537
SDValue V2 = Op->getOperand(1);
11538+
11539+
// For fractional LMUL, check if we can use a higher LMUL
11540+
// instruction to avoid a vslidedown.
11541+
if (SDValue Src = foldConcatVector(V1, V2);
11542+
Src && getLMUL1VT(VT).bitsGT(VT)) {
11543+
EVT NewVT = VT.getDoubleNumVectorElementsVT();
11544+
SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
11545+
Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewVT, Src, ZeroIdx);
11546+
SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
11547+
DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
11548+
SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
11549+
DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
11550+
Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Even, ZeroIdx);
11551+
Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Odd, ZeroIdx);
11552+
return DAG.getMergeValues({Even, Odd}, DL);
11553+
}
11554+
1152811555
SDValue Even =
1152911556
lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
1153011557
SDValue Odd =

0 commit comments

Comments
 (0)