@@ -15512,14 +15512,16 @@ static void fixupShuffleMaskForPermutedSToV(
15512
15512
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15513
15513
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15514
15514
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15515
+ int LHSEltFixup =
15516
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15517
+ int RHSEltFixup =
15518
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15515
15519
for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15516
15520
int Idx = ShuffV[I];
15517
15521
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15518
- ShuffV[I] +=
15519
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15522
+ ShuffV[I] += LHSEltFixup;
15520
15523
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15521
- ShuffV[I] +=
15522
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15524
+ ShuffV[I] += RHSEltFixup;
15523
15525
}
15524
15526
}
15525
15527
@@ -15577,6 +15579,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15577
15579
return true;
15578
15580
}
15579
15581
15582
+ static SDValue generateSToVPermutedForVecShuffle(
15583
+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15584
+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15585
+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15586
+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15587
+ // Set up the values for the shuffle vector fixup.
15588
+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15589
+ // The last element depends on if the input comes from the LHS or RHS.
15590
+ //
15591
+ // For example:
15592
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15593
+ //
15594
+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15595
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15596
+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15597
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15598
+ // It is also not 4 because the original scalar_to_vector is wider and
15599
+ // actually contains two i32 elements.
15600
+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
15601
+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15602
+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15603
+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15604
+ return SToVPermuted;
15605
+ }
15606
+
15580
15607
// On little endian subtargets, combine shuffles such as:
15581
15608
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15582
15609
// into:
@@ -15654,36 +15681,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15654
15681
int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15655
15682
if (!IsLittleEndian && LHSScalarSize >= 64)
15656
15683
return Res;
15657
- // Set up the values for the shuffle vector fixup.
15658
- LHSNumValidElts =
15659
- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15660
- // The last element that comes from the LHS. For example:
15661
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15662
- // The last element that comes from the LHS is actually 0, not 3
15663
- // because elements 1 and higher of a scalar_to_vector are undefined.
15664
- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15665
- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15666
- if (SToVLHS.getValueType() != LHS.getValueType())
15667
- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15668
- LHS = SToVLHS;
15684
+ LHS = generateSToVPermutedForVecShuffle(
15685
+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15686
+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15669
15687
}
15670
15688
if (SToVRHS) {
15671
15689
int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15672
15690
if (!IsLittleEndian && RHSScalarSize >= 64)
15673
15691
return Res;
15674
- RHSNumValidElts =
15675
- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15676
- // The last element that comes from the RHS. For example:
15677
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15678
- // The last element that comes from the RHS is actually 5, not 7
15679
- // because elements 1 and higher of a scalar_to_vector are undefined.
15680
- // It is also not 4 because the original scalar_to_vector is wider and
15681
- // actually contains two i32 elements.
15682
- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15683
- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15684
- if (SToVRHS.getValueType() != RHS.getValueType())
15685
- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15686
- RHS = SToVRHS;
15692
+ RHS = generateSToVPermutedForVecShuffle(
15693
+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15694
+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15687
15695
}
15688
15696
15689
15697
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
0 commit comments