@@ -15508,16 +15508,18 @@ static SDValue isScalarToVec(SDValue Op) {
15508
15508
// On little endian, that's just the corresponding element in the other
15509
15509
// half of the vector. On big endian, it is in the same half but right
15510
15510
// justified rather than left justified in that half.
15511
- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15512
- int LHSMaxIdx, int RHSMinIdx,
15513
- int RHSMaxIdx, int HalfVec,
15514
- unsigned ValidLaneWidth,
15515
- const PPCSubtarget &Subtarget) {
15511
+ static void fixupShuffleMaskForPermutedSToV(
15512
+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15513
+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15514
+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15516
15515
for (int i = 0, e = ShuffV.size(); i < e; i++) {
15517
15516
int Idx = ShuffV[i];
15518
- if (( Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx) )
15517
+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt )
15519
15518
ShuffV[i] +=
15520
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15519
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15520
+ if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15521
+ ShuffV[i] +=
15522
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15521
15523
}
15522
15524
}
15523
15525
@@ -15556,6 +15558,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15556
15558
OrigSToV.getOperand(0));
15557
15559
}
15558
15560
15561
+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15562
+ int HalfVec, int LHSLastElementDefined,
15563
+ int RHSLastElementDefined) {
15564
+ for (int i : seq<int>(0, ShuffV.size())) {
15565
+ int Index = ShuffV[i];
15566
+ if (Index < 0) // Skip explicitly undefined mask indices.
15567
+ continue;
15568
+ // Handle first input vector of the vector_shuffle.
15569
+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15570
+ (Index > LHSLastElementDefined))
15571
+ return false;
15572
+ // Handle second input vector of the vector_shuffle.
15573
+ if ((RHSLastElementDefined >= 0) &&
15574
+ (Index > HalfVec + RHSLastElementDefined))
15575
+ return false;
15576
+ }
15577
+ return true;
15578
+ }
15579
+
15559
15580
// On little endian subtargets, combine shuffles such as:
15560
15581
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15561
15582
// into:
@@ -15603,36 +15624,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15603
15624
SDValue SToVLHS = isScalarToVec(LHS);
15604
15625
SDValue SToVRHS = isScalarToVec(RHS);
15605
15626
if (SToVLHS || SToVRHS) {
15606
- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15607
- // same type and have differing element sizes, then do not perform
15608
- // the following transformation. The current transformation for
15609
- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15610
- // element size. This will be updated in the future to account for
15611
- // differing sizes of the LHS and RHS.
15612
- if (SToVLHS && SToVRHS &&
15613
- (SToVLHS.getValueType().getScalarSizeInBits() !=
15614
- SToVRHS.getValueType().getScalarSizeInBits()))
15615
- return Res;
15616
-
15617
- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15618
- : SToVRHS.getValueType().getVectorNumElements();
15619
- int NumEltsOut = ShuffV.size();
15627
+ EVT VT = SVN->getValueType(0);
15628
+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15629
+ int ShuffleNumElts = ShuffV.size();
15630
+ int HalfVec = ShuffleNumElts / 2;
15620
15631
// The width of the "valid lane" (i.e. the lane that contains the value that
15621
15632
// is vectorized) needs to be expressed in terms of the number of elements
15622
15633
// of the shuffle. It is thereby the ratio of the values before and after
15623
- // any bitcast.
15624
- unsigned ValidLaneWidth =
15625
- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15626
- LHS.getValueType().getScalarSizeInBits()
15627
- : SToVRHS.getValueType().getScalarSizeInBits() /
15628
- RHS.getValueType().getScalarSizeInBits();
15634
+ // any bitcast, which will be set later on if the LHS or RHS are
15635
+ // SCALAR_TO_VECTOR nodes.
15636
+ unsigned LHSNumValidElts = HalfVec;
15637
+ unsigned RHSNumValidElts = HalfVec;
15629
15638
15630
15639
// Initially assume that neither input is permuted. These will be adjusted
15631
- // accordingly if either input is.
15632
- int LHSMaxIdx = -1;
15633
- int RHSMinIdx = -1;
15634
- int RHSMaxIdx = -1;
15635
- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15640
+ // accordingly if either input is. Note, that -1 means that all elements
15641
+ // are undefined.
15642
+ int LHSFirstElt = 0;
15643
+ int RHSFirstElt = ShuffleNumElts;
15644
+ int LHSLastElt = -1;
15645
+ int RHSLastElt = -1;
15636
15646
15637
15647
// Get the permuted scalar to vector nodes for the source(s) that come from
15638
15648
// ISD::SCALAR_TO_VECTOR.
@@ -15641,33 +15651,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15641
15651
// the value into element zero. Since scalar size of LHS and RHS may differ
15642
15652
// after isScalarToVec, this should be checked using their own sizes.
15643
15653
if (SToVLHS) {
15644
- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15654
+ int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15655
+ if (!IsLittleEndian && LHSScalarSize >= 64)
15645
15656
return Res;
15646
15657
// Set up the values for the shuffle vector fixup.
15647
- LHSMaxIdx = NumEltsOut / NumEltsIn;
15658
+ LHSNumValidElts =
15659
+ LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15660
+ // The last element that comes from the LHS. For example:
15661
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15662
+ // The last element that comes from the LHS is actually 0, not 3
15663
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15664
+ LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15648
15665
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15649
15666
if (SToVLHS.getValueType() != LHS.getValueType())
15650
15667
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15651
15668
LHS = SToVLHS;
15652
15669
}
15653
15670
if (SToVRHS) {
15654
- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15671
+ int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15672
+ if (!IsLittleEndian && RHSScalarSize >= 64)
15655
15673
return Res;
15656
- RHSMinIdx = NumEltsOut;
15657
- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15674
+ RHSNumValidElts =
15675
+ RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15676
+ // The last element that comes from the RHS. For example:
15677
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15678
+ // The last element that comes from the RHS is actually 5, not 7
15679
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15680
+ // It is also not 4 because the original scalar_to_vector is wider and
15681
+ // actually contains two i32 elements.
15682
+ RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15658
15683
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15659
15684
if (SToVRHS.getValueType() != RHS.getValueType())
15660
15685
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15661
15686
RHS = SToVRHS;
15662
15687
}
15663
15688
15689
+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15690
+ return Res;
15691
+
15664
15692
// Fix up the shuffle mask to reflect where the desired element actually is.
15665
15693
// The minimum and maximum indices that correspond to element zero for both
15666
15694
// the LHS and RHS are computed and will control which shuffle mask entries
15667
15695
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15668
- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15669
- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15670
- HalfVec, ValidLaneWidth, Subtarget);
15696
+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15697
+ fixupShuffleMaskForPermutedSToV(
15698
+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15699
+ LHSNumValidElts, RHSNumValidElts, Subtarget);
15671
15700
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15672
15701
15673
15702
// We may have simplified away the shuffle. We won't be able to do anything
0 commit comments