@@ -15386,16 +15386,18 @@ static SDValue isScalarToVec(SDValue Op) {
15386
15386
// On little endian, that's just the corresponding element in the other
15387
15387
// half of the vector. On big endian, it is in the same half but right
15388
15388
// justified rather than left justified in that half.
15389
- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15390
- int LHSMaxIdx, int RHSMinIdx,
15391
- int RHSMaxIdx, int HalfVec,
15392
- unsigned ValidLaneWidth,
15393
- const PPCSubtarget &Subtarget) {
15389
+ static void fixupShuffleMaskForPermutedSToV(
15390
+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15391
+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15392
+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15394
15393
for (int i = 0, e = ShuffV.size(); i < e; i++) {
15395
15394
int Idx = ShuffV[i];
15396
- if (( Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx) )
15395
+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt )
15397
15396
ShuffV[i] +=
15398
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15397
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15398
+ if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15399
+ ShuffV[i] +=
15400
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15399
15401
}
15400
15402
}
15401
15403
@@ -15434,6 +15436,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15434
15436
OrigSToV.getOperand(0));
15435
15437
}
15436
15438
15439
+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15440
+ int HalfVec, int LHSLastElementDefined,
15441
+ int RHSLastElementDefined) {
15442
+ for (int i : seq<int>(0, ShuffV.size())) {
15443
+ int Index = ShuffV[i];
15444
+ if (Index < 0) // Skip explicitly undefined mask indices.
15445
+ continue;
15446
+ // Handle first input vector of the vector_shuffle.
15447
+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15448
+ (Index > LHSLastElementDefined))
15449
+ return false;
15450
+ // Handle second input vector of the vector_shuffle.
15451
+ if ((RHSLastElementDefined >= 0) &&
15452
+ (Index > HalfVec + RHSLastElementDefined))
15453
+ return false;
15454
+ }
15455
+ return true;
15456
+ }
15457
+
15437
15458
// On little endian subtargets, combine shuffles such as:
15438
15459
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15439
15460
// into:
@@ -15481,36 +15502,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15481
15502
SDValue SToVLHS = isScalarToVec(LHS);
15482
15503
SDValue SToVRHS = isScalarToVec(RHS);
15483
15504
if (SToVLHS || SToVRHS) {
15484
- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15485
- // same type and have differing element sizes, then do not perform
15486
- // the following transformation. The current transformation for
15487
- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15488
- // element size. This will be updated in the future to account for
15489
- // differing sizes of the LHS and RHS.
15490
- if (SToVLHS && SToVRHS &&
15491
- (SToVLHS.getValueType().getScalarSizeInBits() !=
15492
- SToVRHS.getValueType().getScalarSizeInBits()))
15493
- return Res;
15494
-
15495
- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15496
- : SToVRHS.getValueType().getVectorNumElements();
15497
- int NumEltsOut = ShuffV.size();
15505
+ EVT VT = SVN->getValueType(0);
15506
+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15507
+ int ShuffleNumElts = ShuffV.size();
15508
+ int HalfVec = ShuffleNumElts / 2;
15498
15509
// The width of the "valid lane" (i.e. the lane that contains the value that
15499
15510
// is vectorized) needs to be expressed in terms of the number of elements
15500
15511
// of the shuffle. It is thereby the ratio of the values before and after
15501
- // any bitcast.
15502
- unsigned ValidLaneWidth =
15503
- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15504
- LHS.getValueType().getScalarSizeInBits()
15505
- : SToVRHS.getValueType().getScalarSizeInBits() /
15506
- RHS.getValueType().getScalarSizeInBits();
15512
+ // any bitcast, which will be set later on if the LHS or RHS are
15513
+ // SCALAR_TO_VECTOR nodes.
15514
+ unsigned LHSNumValidElts = HalfVec;
15515
+ unsigned RHSNumValidElts = HalfVec;
15507
15516
15508
15517
// Initially assume that neither input is permuted. These will be adjusted
15509
- // accordingly if either input is.
15510
- int LHSMaxIdx = -1;
15511
- int RHSMinIdx = -1;
15512
- int RHSMaxIdx = -1;
15513
- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15518
+ // accordingly if either input is. Note, that -1 means that all elements
15519
+ // are undefined.
15520
+ int LHSFirstElt = 0;
15521
+ int RHSFirstElt = ShuffleNumElts;
15522
+ int LHSLastElt = -1;
15523
+ int RHSLastElt = -1;
15514
15524
15515
15525
// Get the permuted scalar to vector nodes for the source(s) that come from
15516
15526
// ISD::SCALAR_TO_VECTOR.
@@ -15519,33 +15529,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15519
15529
// the value into element zero. Since scalar size of LHS and RHS may differ
15520
15530
// after isScalarToVec, this should be checked using their own sizes.
15521
15531
if (SToVLHS) {
15522
- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15532
+ int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15533
+ if (!IsLittleEndian && LHSScalarSize >= 64)
15523
15534
return Res;
15524
15535
// Set up the values for the shuffle vector fixup.
15525
- LHSMaxIdx = NumEltsOut / NumEltsIn;
15536
+ LHSNumValidElts =
15537
+ LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15538
+ // The last element that comes from the LHS. For example:
15539
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15540
+ // The last element that comes from the LHS is actually 0, not 3
15541
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15542
+ LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15526
15543
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15527
15544
if (SToVLHS.getValueType() != LHS.getValueType())
15528
15545
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15529
15546
LHS = SToVLHS;
15530
15547
}
15531
15548
if (SToVRHS) {
15532
- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15549
+ int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15550
+ if (!IsLittleEndian && RHSScalarSize >= 64)
15533
15551
return Res;
15534
- RHSMinIdx = NumEltsOut;
15535
- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15552
+ RHSNumValidElts =
15553
+ RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15554
+ // The last element that comes from the RHS. For example:
15555
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15556
+ // The last element that comes from the RHS is actually 5, not 7
15557
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15558
+ // It is also not 4 because the original scalar_to_vector is wider and
15559
+ // actually contains two i32 elements.
15560
+ RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15536
15561
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15537
15562
if (SToVRHS.getValueType() != RHS.getValueType())
15538
15563
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15539
15564
RHS = SToVRHS;
15540
15565
}
15541
15566
15567
+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15568
+ return Res;
15569
+
15542
15570
// Fix up the shuffle mask to reflect where the desired element actually is.
15543
15571
// The minimum and maximum indices that correspond to element zero for both
15544
15572
// the LHS and RHS are computed and will control which shuffle mask entries
15545
15573
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15546
- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15547
- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15548
- HalfVec, ValidLaneWidth, Subtarget);
15574
+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15575
+ fixupShuffleMaskForPermutedSToV(
15576
+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15577
+ LHSNumValidElts, RHSNumValidElts, Subtarget);
15549
15578
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15550
15579
15551
15580
// We may have simplified away the shuffle. We won't be able to do anything
0 commit comments