@@ -4852,41 +4852,56 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4852
4852
4853
4853
assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4854
4854
4855
- SmallVector<SDValue> MaskVals;
4856
- // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4857
- // merged with a second vrgather.
4858
- SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4859
-
4860
4855
// By default we preserve the original operand order, and use a mask to
4861
4856
// select LHS as true and RHS as false. However, since RVV vector selects may
4862
4857
// feature splats but only on the LHS, we may choose to invert our mask and
4863
4858
// instead select between RHS and LHS.
4864
4859
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
4865
- bool InvertMask = IsSelect == SwapOps;
4860
+
4861
+ if (IsSelect) {
4862
+ // Now construct the mask that will be used by the vselect operation.
4863
+ SmallVector<SDValue> MaskVals;
4864
+ for (int MaskIndex : Mask) {
4865
+ bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
4866
+ MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4867
+ }
4868
+
4869
+ if (SwapOps)
4870
+ std::swap(V1, V2);
4871
+
4872
+ assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4873
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4874
+ SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4875
+ return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4876
+ }
4877
+
4878
+
4879
+ // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4880
+ // merged with a second vrgather.
4881
+ SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4866
4882
4867
4883
// Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4868
4884
// half.
4869
4885
DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4870
4886
4871
- // Now construct the mask that will be used by the vselect or blended
4872
- // vrgather operation. For vrgathers, construct the appropriate indices into
4873
- // each vector.
4887
+ SmallVector<SDValue> MaskVals;
4888
+
4889
+ // Now construct the mask that will be used by the blended vrgather operation.
4890
+ // Cconstruct the appropriate indices into each vector.
4874
4891
for (int MaskIndex : Mask) {
4875
- bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask ;
4892
+ bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps ;
4876
4893
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4877
- if (!IsSelect) {
4878
- bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4879
- GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4880
- ? DAG.getConstant(MaskIndex, DL, XLenVT)
4881
- : DAG.getUNDEF(XLenVT));
4882
- GatherIndicesRHS.push_back(
4883
- IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4884
- : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4885
- if (IsLHSOrUndefIndex && MaskIndex >= 0)
4886
- ++LHSIndexCounts[MaskIndex];
4887
- if (!IsLHSOrUndefIndex)
4888
- ++RHSIndexCounts[MaskIndex - NumElts];
4889
- }
4894
+ bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4895
+ GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4896
+ ? DAG.getConstant(MaskIndex, DL, XLenVT)
4897
+ : DAG.getUNDEF(XLenVT));
4898
+ GatherIndicesRHS.push_back(
4899
+ IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4900
+ : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4901
+ if (IsLHSOrUndefIndex && MaskIndex >= 0)
4902
+ ++LHSIndexCounts[MaskIndex];
4903
+ if (!IsLHSOrUndefIndex)
4904
+ ++RHSIndexCounts[MaskIndex - NumElts];
4890
4905
}
4891
4906
4892
4907
if (SwapOps) {
@@ -4898,9 +4913,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4898
4913
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4899
4914
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4900
4915
4901
- if (IsSelect)
4902
- return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4903
-
4904
4916
// We might be able to express the shuffle as a bitrotate. But even if we
4905
4917
// don't have Zvkb and have to expand, the expanded sequence of approx. 2
4906
4918
// shifts and a vor will have a higher throughput than a vrgather.
0 commit comments