Skip to content

Commit e9311f9

Browse files
committed
[RISCV] Separate single source and dual source lowering code [nfc]
The two single source cases aren't effected by the swap or select matching as those are dual operand specific. Similarly, a two source shuffle can't be a rotate. We can extend this idea for some of the shuffle types above, but some of them are validly either single or dual source. We don't want to loose that and the code complexity of versioning early and having to repeat some shuffle kinds doesn't (currently) seem worth it.
1 parent b801b60 commit e9311f9

File tree

1 file changed

+54
-53
lines changed

1 file changed

+54
-53
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 54 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4926,22 +4926,69 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
49264926
return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
49274927
}
49284928

4929-
// Detect shuffles which can be re-expressed as vector selects; these are
4930-
// shuffles in which each element in the destination is taken from an element
4931-
// at the corresponding index in either source vectors.
4932-
bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4933-
int MaskIndex = MaskIdx.value();
4934-
return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4935-
});
49364929

4930+
// Handle any remaining single source shuffles
49374931
assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4932+
if (V2.isUndef()) {
4933+
// We might be able to express the shuffle as a bitrotate. But even if we
4934+
// don't have Zvkb and have to expand, the expanded sequence of approx. 2
4935+
// shifts and a vor will have a higher throughput than a vrgather.
4936+
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4937+
return V;
4938+
4939+
// Base case for the two operand recursion below - handle the worst case
4940+
// single source shuffle.
4941+
unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
4942+
MVT IndexVT = VT.changeTypeToInteger();
4943+
// Since we can't introduce illegal index types at this stage, use i16 and
4944+
// vrgatherei16 if the corresponding index type for plain vrgather is greater
4945+
// than XLenVT.
4946+
if (IndexVT.getScalarType().bitsGT(XLenVT)) {
4947+
GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4948+
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4949+
}
4950+
4951+
// If the mask allows, we can do all the index computation in 16 bits. This
4952+
// requires less work and less register pressure at high LMUL, and creates
4953+
// smaller constants which may be cheaper to materialize.
4954+
if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
4955+
(IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
4956+
GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4957+
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4958+
}
4959+
4960+
MVT IndexContainerVT =
4961+
ContainerVT.changeVectorElementType(IndexVT.getScalarType());
4962+
4963+
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4964+
SmallVector<SDValue> GatherIndicesLHS;
4965+
for (int MaskIndex : Mask) {
4966+
bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
4967+
GatherIndicesLHS.push_back(IsLHSIndex
4968+
? DAG.getConstant(MaskIndex, DL, XLenVT)
4969+
: DAG.getUNDEF(XLenVT));
4970+
}
4971+
SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
4972+
LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
4973+
Subtarget);
4974+
SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
4975+
DAG.getUNDEF(ContainerVT), TrueMask, VL);
4976+
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4977+
}
49384978

49394979
// By default we preserve the original operand order, and use a mask to
49404980
// select LHS as true and RHS as false. However, since RVV vector selects may
49414981
// feature splats but only on the LHS, we may choose to invert our mask and
49424982
// instead select between RHS and LHS.
49434983
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
49444984

4985+
// Detect shuffles which can be re-expressed as vector selects; these are
4986+
// shuffles in which each element in the destination is taken from an element
4987+
// at the corresponding index in either source vectors.
4988+
bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4989+
int MaskIndex = MaskIdx.value();
4990+
return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4991+
});
49454992
if (IsSelect) {
49464993
// Now construct the mask that will be used by the vselect operation.
49474994
SmallVector<SDValue> MaskVals;
@@ -4959,12 +5006,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
49595006
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
49605007
}
49615008

4962-
// We might be able to express the shuffle as a bitrotate. But even if we
4963-
// don't have Zvkb and have to expand, the expanded sequence of approx. 2
4964-
// shifts and a vor will have a higher throughput than a vrgather.
4965-
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4966-
return V;
4967-
49685009
if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
49695010
// On such a large vector we're unable to use i8 as the index type.
49705011
// FIXME: We could promote the index to i16 and use vrgatherei16, but that
@@ -4998,46 +5039,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
49985039
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
49995040
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
50005041

5001-
// Base case for the recursion just below - handle the worst case
5002-
// single source permutation. Note that all the splat variants
5003-
// are handled above.
5004-
if (V2.isUndef()) {
5005-
unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5006-
MVT IndexVT = VT.changeTypeToInteger();
5007-
// Since we can't introduce illegal index types at this stage, use i16 and
5008-
// vrgatherei16 if the corresponding index type for plain vrgather is greater
5009-
// than XLenVT.
5010-
if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5011-
GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5012-
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5013-
}
5014-
5015-
// If the mask allows, we can do all the index computation in 16 bits. This
5016-
// requires less work and less register pressure at high LMUL, and creates
5017-
// smaller constants which may be cheaper to materialize.
5018-
if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5019-
(IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5020-
GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5021-
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5022-
}
5023-
5024-
MVT IndexContainerVT =
5025-
ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5026-
5027-
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5028-
SmallVector<SDValue> GatherIndicesLHS;
5029-
for (int ShuffleIdx : ShuffleMaskLHS)
5030-
GatherIndicesLHS.push_back(ShuffleIdx != -1
5031-
? DAG.getConstant(ShuffleIdx, DL, XLenVT)
5032-
: DAG.getUNDEF(XLenVT));
5033-
SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5034-
LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5035-
Subtarget);
5036-
SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5037-
DAG.getUNDEF(ContainerVT), TrueMask, VL);
5038-
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5039-
}
5040-
50415042
// Recursively invoke lowering for each operand if we had two
50425043
// independent single source shuffles, and then combine the result via a
50435044
// vselect. Note that the vselect will likely be folded back into the

0 commit comments

Comments
 (0)