@@ -15638,16 +15638,18 @@ static SDValue isScalarToVec(SDValue Op) {
15638
15638
// On little endian, that's just the corresponding element in the other
15639
15639
// half of the vector. On big endian, it is in the same half but right
15640
15640
// justified rather than left justified in that half.
15641
- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15642
- int LHSMaxIdx, int RHSMinIdx,
15643
- int RHSMaxIdx, int HalfVec,
15644
- unsigned ValidLaneWidth,
15645
- const PPCSubtarget &Subtarget) {
15641
+ static void fixupShuffleMaskForPermutedSToV(
15642
+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15643
+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15644
+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15646
15645
for (int i = 0, e = ShuffV.size(); i < e; i++) {
15647
15646
int Idx = ShuffV[i];
15648
- if (( Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx) )
15647
+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt )
15649
15648
ShuffV[i] +=
15650
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15649
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15650
+ if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15651
+ ShuffV[i] +=
15652
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15651
15653
}
15652
15654
}
15653
15655
@@ -15686,6 +15688,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15686
15688
OrigSToV.getOperand(0));
15687
15689
}
15688
15690
15691
+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15692
+ int HalfVec, int LHSLastElementDefined,
15693
+ int RHSLastElementDefined) {
15694
+ for (int i : seq<int>(0, ShuffV.size())) {
15695
+ int Index = ShuffV[i];
15696
+ if (Index < 0) // Skip explicitly undefined mask indices.
15697
+ continue;
15698
+ // Handle first input vector of the vector_shuffle.
15699
+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15700
+ (Index > LHSLastElementDefined))
15701
+ return false;
15702
+ // Handle second input vector of the vector_shuffle.
15703
+ if ((RHSLastElementDefined >= 0) &&
15704
+ (Index > HalfVec + RHSLastElementDefined))
15705
+ return false;
15706
+ }
15707
+ return true;
15708
+ }
15709
+
15689
15710
// On little endian subtargets, combine shuffles such as:
15690
15711
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15691
15712
// into:
@@ -15733,36 +15754,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15733
15754
SDValue SToVLHS = isScalarToVec(LHS);
15734
15755
SDValue SToVRHS = isScalarToVec(RHS);
15735
15756
if (SToVLHS || SToVRHS) {
15736
- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15737
- // same type and have differing element sizes, then do not perform
15738
- // the following transformation. The current transformation for
15739
- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15740
- // element size. This will be updated in the future to account for
15741
- // differing sizes of the LHS and RHS.
15742
- if (SToVLHS && SToVRHS &&
15743
- (SToVLHS.getValueType().getScalarSizeInBits() !=
15744
- SToVRHS.getValueType().getScalarSizeInBits()))
15745
- return Res;
15746
-
15747
- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15748
- : SToVRHS.getValueType().getVectorNumElements();
15749
- int NumEltsOut = ShuffV.size();
15757
+ EVT VT = SVN->getValueType(0);
15758
+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15759
+ int ShuffleNumElts = ShuffV.size();
15760
+ int HalfVec = ShuffleNumElts / 2;
15750
15761
// The width of the "valid lane" (i.e. the lane that contains the value that
15751
15762
// is vectorized) needs to be expressed in terms of the number of elements
15752
15763
// of the shuffle. It is thereby the ratio of the values before and after
15753
- // any bitcast.
15754
- unsigned ValidLaneWidth =
15755
- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15756
- LHS.getValueType().getScalarSizeInBits()
15757
- : SToVRHS.getValueType().getScalarSizeInBits() /
15758
- RHS.getValueType().getScalarSizeInBits();
15764
+ // any bitcast, which will be set later on if the LHS or RHS are
15765
+ // SCALAR_TO_VECTOR nodes.
15766
+ unsigned LHSNumValidElts = HalfVec;
15767
+ unsigned RHSNumValidElts = HalfVec;
15759
15768
15760
15769
// Initially assume that neither input is permuted. These will be adjusted
15761
- // accordingly if either input is.
15762
- int LHSMaxIdx = -1;
15763
- int RHSMinIdx = -1;
15764
- int RHSMaxIdx = -1;
15765
- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15770
+ // accordingly if either input is. Note, that -1 means that all elements
15771
+ // are undefined.
15772
+ int LHSFirstElt = 0;
15773
+ int RHSFirstElt = ShuffleNumElts;
15774
+ int LHSLastElt = -1;
15775
+ int RHSLastElt = -1;
15766
15776
15767
15777
// Get the permuted scalar to vector nodes for the source(s) that come from
15768
15778
// ISD::SCALAR_TO_VECTOR.
@@ -15771,33 +15781,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15771
15781
// the value into element zero. Since scalar size of LHS and RHS may differ
15772
15782
// after isScalarToVec, this should be checked using their own sizes.
15773
15783
if (SToVLHS) {
15774
- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15784
+ int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15785
+ if (!IsLittleEndian && LHSScalarSize >= 64)
15775
15786
return Res;
15776
15787
// Set up the values for the shuffle vector fixup.
15777
- LHSMaxIdx = NumEltsOut / NumEltsIn;
15788
+ LHSNumValidElts =
15789
+ LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15790
+ // The last element that comes from the LHS. For example:
15791
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15792
+ // The last element that comes from the LHS is actually 0, not 3
15793
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15794
+ LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15778
15795
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15779
15796
if (SToVLHS.getValueType() != LHS.getValueType())
15780
15797
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15781
15798
LHS = SToVLHS;
15782
15799
}
15783
15800
if (SToVRHS) {
15784
- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15801
+ int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15802
+ if (!IsLittleEndian && RHSScalarSize >= 64)
15785
15803
return Res;
15786
- RHSMinIdx = NumEltsOut;
15787
- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15804
+ RHSNumValidElts =
15805
+ RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15806
+ // The last element that comes from the RHS. For example:
15807
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15808
+ // The last element that comes from the RHS is actually 5, not 7
15809
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15810
+ // It is also not 4 because the original scalar_to_vector is wider and
15811
+ // actually contains two i32 elements.
15812
+ RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15788
15813
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15789
15814
if (SToVRHS.getValueType() != RHS.getValueType())
15790
15815
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15791
15816
RHS = SToVRHS;
15792
15817
}
15793
15818
15819
+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15820
+ return Res;
15821
+
15794
15822
// Fix up the shuffle mask to reflect where the desired element actually is.
15795
15823
// The minimum and maximum indices that correspond to element zero for both
15796
15824
// the LHS and RHS are computed and will control which shuffle mask entries
15797
15825
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15798
- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15799
- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15800
- HalfVec, ValidLaneWidth, Subtarget);
15826
+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15827
+ fixupShuffleMaskForPermutedSToV(
15828
+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15829
+ LHSNumValidElts, RHSNumValidElts, Subtarget);
15801
15830
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15802
15831
15803
15832
// We may have simplified away the shuffle. We won't be able to do anything
0 commit comments