@@ -15642,14 +15642,16 @@ static void fixupShuffleMaskForPermutedSToV(
15642
15642
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15643
15643
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15644
15644
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15645
+ int LHSEltFixup =
15646
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15647
+ int RHSEltFixup =
15648
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15645
15649
for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15646
15650
int Idx = ShuffV[I];
15647
15651
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15648
- ShuffV[I] +=
15649
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15652
+ ShuffV[I] += LHSEltFixup;
15650
15653
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15651
- ShuffV[I] +=
15652
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15654
+ ShuffV[I] += RHSEltFixup;
15653
15655
}
15654
15656
}
15655
15657
@@ -15707,6 +15709,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15707
15709
return true;
15708
15710
}
15709
15711
15712
+ static SDValue generateSToVPermutedForVecShuffle(
15713
+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15714
+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15715
+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15716
+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15717
+ // Set up the values for the shuffle vector fixup.
15718
+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15719
+ // The last element depends on if the input comes from the LHS or RHS.
15720
+ //
15721
+ // For example:
15722
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15723
+ //
15724
+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15725
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15726
+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15727
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15728
+ // It is also not 4 because the original scalar_to_vector is wider and
15729
+ // actually contains two i32 elements.
15730
+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
15731
+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15732
+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15733
+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15734
+ return SToVPermuted;
15735
+ }
15736
+
15710
15737
// On little endian subtargets, combine shuffles such as:
15711
15738
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15712
15739
// into:
@@ -15784,36 +15811,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15784
15811
int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15785
15812
if (!IsLittleEndian && LHSScalarSize >= 64)
15786
15813
return Res;
15787
- // Set up the values for the shuffle vector fixup.
15788
- LHSNumValidElts =
15789
- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15790
- // The last element that comes from the LHS. For example:
15791
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15792
- // The last element that comes from the LHS is actually 0, not 3
15793
- // because elements 1 and higher of a scalar_to_vector are undefined.
15794
- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15795
- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15796
- if (SToVLHS.getValueType() != LHS.getValueType())
15797
- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15798
- LHS = SToVLHS;
15814
+ LHS = generateSToVPermutedForVecShuffle(
15815
+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15816
+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15799
15817
}
15800
15818
if (SToVRHS) {
15801
15819
int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15802
15820
if (!IsLittleEndian && RHSScalarSize >= 64)
15803
15821
return Res;
15804
- RHSNumValidElts =
15805
- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15806
- // The last element that comes from the RHS. For example:
15807
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15808
- // The last element that comes from the RHS is actually 5, not 7
15809
- // because elements 1 and higher of a scalar_to_vector are undefined.
15810
- // It is also not 4 because the original scalar_to_vector is wider and
15811
- // actually contains two i32 elements.
15812
- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15813
- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15814
- if (SToVRHS.getValueType() != RHS.getValueType())
15815
- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15816
- RHS = SToVRHS;
15822
+ RHS = generateSToVPermutedForVecShuffle(
15823
+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15824
+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15817
15825
}
15818
15826
15819
15827
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
0 commit comments