@@ -15390,14 +15390,16 @@ static void fixupShuffleMaskForPermutedSToV(
15390
15390
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15391
15391
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15392
15392
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15393
+ int LHSEltFixup =
15394
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15395
+ int RHSEltFixup =
15396
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15393
15397
for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15394
15398
int Idx = ShuffV[I];
15395
15399
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15396
- ShuffV[I] +=
15397
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15400
+ ShuffV[I] += LHSEltFixup;
15398
15401
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15399
- ShuffV[I] +=
15400
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15402
+ ShuffV[I] += RHSEltFixup;
15401
15403
}
15402
15404
}
15403
15405
@@ -15455,6 +15457,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15455
15457
return true;
15456
15458
}
15457
15459
15460
+ static SDValue generateSToVPermutedForVecShuffle(
15461
+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15462
+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15463
+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15464
+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15465
+ // Set up the values for the shuffle vector fixup.
15466
+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15467
+ // The last element depends on if the input comes from the LHS or RHS.
15468
+ //
15469
+ // For example:
15470
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15471
+ //
15472
+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15473
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15474
+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15475
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15476
+ // It is also not 4 because the original scalar_to_vector is wider and
15477
+ // actually contains two i32 elements.
15478
+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
15479
+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15480
+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15481
+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15482
+ return SToVPermuted;
15483
+ }
15484
+
15458
15485
// On little endian subtargets, combine shuffles such as:
15459
15486
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15460
15487
// into:
@@ -15532,36 +15559,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15532
15559
int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15533
15560
if (!IsLittleEndian && LHSScalarSize >= 64)
15534
15561
return Res;
15535
- // Set up the values for the shuffle vector fixup.
15536
- LHSNumValidElts =
15537
- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15538
- // The last element that comes from the LHS. For example:
15539
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15540
- // The last element that comes from the LHS is actually 0, not 3
15541
- // because elements 1 and higher of a scalar_to_vector are undefined.
15542
- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15543
- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15544
- if (SToVLHS.getValueType() != LHS.getValueType())
15545
- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15546
- LHS = SToVLHS;
15562
+ LHS = generateSToVPermutedForVecShuffle(
15563
+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15564
+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15547
15565
}
15548
15566
if (SToVRHS) {
15549
15567
int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15550
15568
if (!IsLittleEndian && RHSScalarSize >= 64)
15551
15569
return Res;
15552
- RHSNumValidElts =
15553
- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15554
- // The last element that comes from the RHS. For example:
15555
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15556
- // The last element that comes from the RHS is actually 5, not 7
15557
- // because elements 1 and higher of a scalar_to_vector are undefined.
15558
- // It is also not 4 because the original scalar_to_vector is wider and
15559
- // actually contains two i32 elements.
15560
- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15561
- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15562
- if (SToVRHS.getValueType() != RHS.getValueType())
15563
- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15564
- RHS = SToVRHS;
15570
+ RHS = generateSToVPermutedForVecShuffle(
15571
+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15572
+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15565
15573
}
15566
15574
15567
15575
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
0 commit comments