@@ -15687,16 +15687,18 @@ static SDValue isScalarToVec(SDValue Op) {
15687
15687
// On little endian, that's just the corresponding element in the other
15688
15688
// half of the vector. On big endian, it is in the same half but right
15689
15689
// justified rather than left justified in that half.
15690
- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15691
- int LHSMaxIdx, int RHSMinIdx,
15692
- int RHSMaxIdx, int HalfVec,
15693
- unsigned ValidLaneWidth,
15694
- const PPCSubtarget &Subtarget) {
15690
+ static void fixupShuffleMaskForPermutedSToV(
15691
+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15692
+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15693
+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15695
15694
for (int i = 0, e = ShuffV.size(); i < e; i++) {
15696
15695
int Idx = ShuffV[i];
15697
- if (( Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx) )
15696
+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt )
15698
15697
ShuffV[i] +=
15699
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15698
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15699
+ if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15700
+ ShuffV[i] +=
15701
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15700
15702
}
15701
15703
}
15702
15704
@@ -15735,6 +15737,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15735
15737
OrigSToV.getOperand(0));
15736
15738
}
15737
15739
15740
+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15741
+ int HalfVec, int LHSLastElementDefined,
15742
+ int RHSLastElementDefined) {
15743
+ for (int i : seq<int>(0, ShuffV.size())) {
15744
+ int Index = ShuffV[i];
15745
+ if (Index < 0) // Skip explicitly undefined mask indices.
15746
+ continue;
15747
+ // Handle first input vector of the vector_shuffle.
15748
+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15749
+ (Index > LHSLastElementDefined))
15750
+ return false;
15751
+ // Handle second input vector of the vector_shuffle.
15752
+ if ((RHSLastElementDefined >= 0) &&
15753
+ (Index > HalfVec + RHSLastElementDefined))
15754
+ return false;
15755
+ }
15756
+ return true;
15757
+ }
15758
+
15738
15759
// On little endian subtargets, combine shuffles such as:
15739
15760
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15740
15761
// into:
@@ -15782,36 +15803,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15782
15803
SDValue SToVLHS = isScalarToVec(LHS);
15783
15804
SDValue SToVRHS = isScalarToVec(RHS);
15784
15805
if (SToVLHS || SToVRHS) {
15785
- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15786
- // same type and have differing element sizes, then do not perform
15787
- // the following transformation. The current transformation for
15788
- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15789
- // element size. This will be updated in the future to account for
15790
- // differing sizes of the LHS and RHS.
15791
- if (SToVLHS && SToVRHS &&
15792
- (SToVLHS.getValueType().getScalarSizeInBits() !=
15793
- SToVRHS.getValueType().getScalarSizeInBits()))
15794
- return Res;
15795
-
15796
- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15797
- : SToVRHS.getValueType().getVectorNumElements();
15798
- int NumEltsOut = ShuffV.size();
15806
+ EVT VT = SVN->getValueType(0);
15807
+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15808
+ int ShuffleNumElts = ShuffV.size();
15809
+ int HalfVec = ShuffleNumElts / 2;
15799
15810
// The width of the "valid lane" (i.e. the lane that contains the value that
15800
15811
// is vectorized) needs to be expressed in terms of the number of elements
15801
15812
// of the shuffle. It is thereby the ratio of the values before and after
15802
- // any bitcast.
15803
- unsigned ValidLaneWidth =
15804
- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15805
- LHS.getValueType().getScalarSizeInBits()
15806
- : SToVRHS.getValueType().getScalarSizeInBits() /
15807
- RHS.getValueType().getScalarSizeInBits();
15813
+ // any bitcast, which will be set later on if the LHS or RHS are
15814
+ // SCALAR_TO_VECTOR nodes.
15815
+ unsigned LHSNumValidElts = HalfVec;
15816
+ unsigned RHSNumValidElts = HalfVec;
15808
15817
15809
15818
// Initially assume that neither input is permuted. These will be adjusted
15810
- // accordingly if either input is.
15811
- int LHSMaxIdx = -1;
15812
- int RHSMinIdx = -1;
15813
- int RHSMaxIdx = -1;
15814
- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15819
+ // accordingly if either input is. Note, that -1 means that all elements
15820
+ // are undefined.
15821
+ int LHSFirstElt = 0;
15822
+ int RHSFirstElt = ShuffleNumElts;
15823
+ int LHSLastElt = -1;
15824
+ int RHSLastElt = -1;
15815
15825
15816
15826
// Get the permuted scalar to vector nodes for the source(s) that come from
15817
15827
// ISD::SCALAR_TO_VECTOR.
@@ -15820,33 +15830,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15820
15830
// the value into element zero. Since scalar size of LHS and RHS may differ
15821
15831
// after isScalarToVec, this should be checked using their own sizes.
15822
15832
if (SToVLHS) {
15823
- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15833
+ int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15834
+ if (!IsLittleEndian && LHSScalarSize >= 64)
15824
15835
return Res;
15825
15836
// Set up the values for the shuffle vector fixup.
15826
- LHSMaxIdx = NumEltsOut / NumEltsIn;
15837
+ LHSNumValidElts =
15838
+ LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15839
+ // The last element that comes from the LHS. For example:
15840
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15841
+ // The last element that comes from the LHS is actually 0, not 3
15842
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15843
+ LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15827
15844
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15828
15845
if (SToVLHS.getValueType() != LHS.getValueType())
15829
15846
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15830
15847
LHS = SToVLHS;
15831
15848
}
15832
15849
if (SToVRHS) {
15833
- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15850
+ int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15851
+ if (!IsLittleEndian && RHSScalarSize >= 64)
15834
15852
return Res;
15835
- RHSMinIdx = NumEltsOut;
15836
- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15853
+ RHSNumValidElts =
15854
+ RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15855
+ // The last element that comes from the RHS. For example:
15856
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15857
+ // The last element that comes from the RHS is actually 5, not 7
15858
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15859
+ // It is also not 4 because the original scalar_to_vector is wider and
15860
+ // actually contains two i32 elements.
15861
+ RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15837
15862
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15838
15863
if (SToVRHS.getValueType() != RHS.getValueType())
15839
15864
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15840
15865
RHS = SToVRHS;
15841
15866
}
15842
15867
15868
+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15869
+ return Res;
15870
+
15843
15871
// Fix up the shuffle mask to reflect where the desired element actually is.
15844
15872
// The minimum and maximum indices that correspond to element zero for both
15845
15873
// the LHS and RHS are computed and will control which shuffle mask entries
15846
15874
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15847
- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15848
- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15849
- HalfVec, ValidLaneWidth, Subtarget);
15875
+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15876
+ fixupShuffleMaskForPermutedSToV(
15877
+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15878
+ LHSNumValidElts, RHSNumValidElts, Subtarget);
15850
15879
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15851
15880
15852
15881
// We may have simplified away the shuffle. We won't be able to do anything
0 commit comments