@@ -15691,14 +15691,16 @@ static void fixupShuffleMaskForPermutedSToV(
15691
15691
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15692
15692
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15693
15693
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15694
+ int LHSEltFixup =
15695
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15696
+ int RHSEltFixup =
15697
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15694
15698
for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15695
15699
int Idx = ShuffV[I];
15696
15700
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15697
- ShuffV[I] +=
15698
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15701
+ ShuffV[I] += LHSEltFixup;
15699
15702
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15700
- ShuffV[I] +=
15701
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15703
+ ShuffV[I] += RHSEltFixup;
15702
15704
}
15703
15705
}
15704
15706
@@ -15756,6 +15758,31 @@ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15756
15758
return true;
15757
15759
}
15758
15760
15761
+ static SDValue generateSToVPermutedForVecShuffle(
15762
+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15763
+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15764
+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15765
+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15766
+ // Set up the values for the shuffle vector fixup.
15767
+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15768
+ // The last element depends on if the input comes from the LHS or RHS.
15769
+ //
15770
+ // For example:
15771
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15772
+ //
15773
+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15774
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15775
+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15776
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15777
+ // It is also not 4 because the original scalar_to_vector is wider and
15778
+ // actually contains two i32 elements.
15779
+ LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
15780
+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15781
+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15782
+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15783
+ return SToVPermuted;
15784
+ }
15785
+
15759
15786
// On little endian subtargets, combine shuffles such as:
15760
15787
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15761
15788
// into:
@@ -15833,36 +15860,17 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15833
15860
int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15834
15861
if (!IsLittleEndian && LHSScalarSize >= 64)
15835
15862
return Res;
15836
- // Set up the values for the shuffle vector fixup.
15837
- LHSNumValidElts =
15838
- LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15839
- // The last element that comes from the LHS. For example:
15840
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15841
- // The last element that comes from the LHS is actually 0, not 3
15842
- // because elements 1 and higher of a scalar_to_vector are undefined.
15843
- LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
15844
- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15845
- if (SToVLHS.getValueType() != LHS.getValueType())
15846
- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15847
- LHS = SToVLHS;
15863
+ LHS = generateSToVPermutedForVecShuffle(
15864
+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15865
+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15848
15866
}
15849
15867
if (SToVRHS) {
15850
15868
int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15851
15869
if (!IsLittleEndian && RHSScalarSize >= 64)
15852
15870
return Res;
15853
- RHSNumValidElts =
15854
- RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15855
- // The last element that comes from the RHS. For example:
15856
- // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15857
- // The last element that comes from the RHS is actually 5, not 7
15858
- // because elements 1 and higher of a scalar_to_vector are undefined.
15859
- // It is also not 4 because the original scalar_to_vector is wider and
15860
- // actually contains two i32 elements.
15861
- RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
15862
- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15863
- if (SToVRHS.getValueType() != RHS.getValueType())
15864
- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15865
- RHS = SToVRHS;
15871
+ RHS = generateSToVPermutedForVecShuffle(
15872
+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15873
+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15866
15874
}
15867
15875
15868
15876
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
0 commit comments