Skip to content

Commit c5f20d3

Browse files
committed
[PowerPC] Fix vector_shuffle combines when inputs are scalar_to_vector of differing types.
This patch fixes the combines for vector_shuffles when either or both of its left and right hand side inputs are scalar_to_vector nodes. Previously, when both left and right side inputs are scalar_to_vector nodes, the current combine could not handle this situation, as the shuffle mask was updated incorrectly. To temporarily solve this solution, this combine was simply disabled and not performed. Now, not only does this patch aim to resolve the previous issue of the incorrect shuffle mask adjustments respectively, but it also updates any test cases that are affected by this change. Patch migrated from https://reviews.llvm.org/D130487.
1 parent 60af154 commit c5f20d3

File tree

6 files changed

+811
-934
lines changed

6 files changed

+811
-934
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 69 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15638,16 +15638,18 @@ static SDValue isScalarToVec(SDValue Op) {
1563815638
// On little endian, that's just the corresponding element in the other
1563915639
// half of the vector. On big endian, it is in the same half but right
1564015640
// justified rather than left justified in that half.
15641-
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15642-
int LHSMaxIdx, int RHSMinIdx,
15643-
int RHSMaxIdx, int HalfVec,
15644-
unsigned ValidLaneWidth,
15645-
const PPCSubtarget &Subtarget) {
15641+
static void fixupShuffleMaskForPermutedSToV(
15642+
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15643+
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15644+
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
1564615645
for (int i = 0, e = ShuffV.size(); i < e; i++) {
1564715646
int Idx = ShuffV[i];
15648-
if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15647+
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
1564915648
ShuffV[i] +=
15650-
Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15649+
Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15650+
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15651+
ShuffV[i] +=
15652+
Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1565115653
}
1565215654
}
1565315655

@@ -15686,6 +15688,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
1568615688
OrigSToV.getOperand(0));
1568715689
}
1568815690

15691+
static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15692+
int HalfVec, int LHSLastElementDefined,
15693+
int RHSLastElementDefined) {
15694+
for (int i : seq<int>(0, ShuffV.size())) {
15695+
int Index = ShuffV[i];
15696+
if (Index < 0) // Skip explicitly undefined mask indices.
15697+
continue;
15698+
// Handle first input vector of the vector_shuffle.
15699+
if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15700+
(Index > LHSLastElementDefined))
15701+
return false;
15702+
// Handle second input vector of the vector_shuffle.
15703+
if ((RHSLastElementDefined >= 0) &&
15704+
(Index > HalfVec + RHSLastElementDefined))
15705+
return false;
15706+
}
15707+
return true;
15708+
}
15709+
1568915710
// On little endian subtargets, combine shuffles such as:
1569015711
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1569115712
// into:
@@ -15733,36 +15754,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1573315754
SDValue SToVLHS = isScalarToVec(LHS);
1573415755
SDValue SToVRHS = isScalarToVec(RHS);
1573515756
if (SToVLHS || SToVRHS) {
15736-
// FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15737-
// same type and have differing element sizes, then do not perform
15738-
// the following transformation. The current transformation for
15739-
// SCALAR_TO_VECTOR assumes that both input vectors have the same
15740-
// element size. This will be updated in the future to account for
15741-
// differing sizes of the LHS and RHS.
15742-
if (SToVLHS && SToVRHS &&
15743-
(SToVLHS.getValueType().getScalarSizeInBits() !=
15744-
SToVRHS.getValueType().getScalarSizeInBits()))
15745-
return Res;
15746-
15747-
int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15748-
: SToVRHS.getValueType().getVectorNumElements();
15749-
int NumEltsOut = ShuffV.size();
15757+
EVT VT = SVN->getValueType(0);
15758+
uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15759+
int ShuffleNumElts = ShuffV.size();
15760+
int HalfVec = ShuffleNumElts / 2;
1575015761
// The width of the "valid lane" (i.e. the lane that contains the value that
1575115762
// is vectorized) needs to be expressed in terms of the number of elements
1575215763
// of the shuffle. It is thereby the ratio of the values before and after
15753-
// any bitcast.
15754-
unsigned ValidLaneWidth =
15755-
SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15756-
LHS.getValueType().getScalarSizeInBits()
15757-
: SToVRHS.getValueType().getScalarSizeInBits() /
15758-
RHS.getValueType().getScalarSizeInBits();
15764+
// any bitcast, which will be set later on if the LHS or RHS are
15765+
// SCALAR_TO_VECTOR nodes.
15766+
unsigned LHSNumValidElts = HalfVec;
15767+
unsigned RHSNumValidElts = HalfVec;
1575915768

1576015769
// Initially assume that neither input is permuted. These will be adjusted
15761-
// accordingly if either input is.
15762-
int LHSMaxIdx = -1;
15763-
int RHSMinIdx = -1;
15764-
int RHSMaxIdx = -1;
15765-
int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15770+
// accordingly if either input is. Note, that -1 means that all elements
15771+
// are undefined.
15772+
int LHSFirstElt = 0;
15773+
int RHSFirstElt = ShuffleNumElts;
15774+
int LHSLastElt = -1;
15775+
int RHSLastElt = -1;
1576615776

1576715777
// Get the permuted scalar to vector nodes for the source(s) that come from
1576815778
// ISD::SCALAR_TO_VECTOR.
@@ -15771,33 +15781,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1577115781
// the value into element zero. Since scalar size of LHS and RHS may differ
1577215782
// after isScalarToVec, this should be checked using their own sizes.
1577315783
if (SToVLHS) {
15774-
if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15784+
int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15785+
if (!IsLittleEndian && LHSScalarSize >= 64)
1577515786
return Res;
1577615787
// Set up the values for the shuffle vector fixup.
15777-
LHSMaxIdx = NumEltsOut / NumEltsIn;
15788+
LHSNumValidElts =
15789+
LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15790+
// The last element that comes from the LHS. For example:
15791+
// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15792+
// The last element that comes from the LHS is actually 0, not 3
15793+
// because elements 1 and higher of a scalar_to_vector are undefined.
15794+
LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
1577815795
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
1577915796
if (SToVLHS.getValueType() != LHS.getValueType())
1578015797
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
1578115798
LHS = SToVLHS;
1578215799
}
1578315800
if (SToVRHS) {
15784-
if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15801+
int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15802+
if (!IsLittleEndian && RHSScalarSize >= 64)
1578515803
return Res;
15786-
RHSMinIdx = NumEltsOut;
15787-
RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15804+
RHSNumValidElts =
15805+
RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15806+
// The last element that comes from the RHS. For example:
15807+
// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15808+
// The last element that comes from the RHS is actually 5, not 7
15809+
// because elements 1 and higher of a scalar_to_vector are undefined.
15810+
// It is also not 4 because the original scalar_to_vector is wider and
15811+
// actually contains two i32 elements.
15812+
RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
1578815813
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
1578915814
if (SToVRHS.getValueType() != RHS.getValueType())
1579015815
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
1579115816
RHS = SToVRHS;
1579215817
}
1579315818

15819+
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15820+
return Res;
15821+
1579415822
// Fix up the shuffle mask to reflect where the desired element actually is.
1579515823
// The minimum and maximum indices that correspond to element zero for both
1579615824
// the LHS and RHS are computed and will control which shuffle mask entries
1579715825
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15798-
// entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15799-
fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15800-
HalfVec, ValidLaneWidth, Subtarget);
15826+
// entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15827+
fixupShuffleMaskForPermutedSToV(
15828+
ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15829+
LHSNumValidElts, RHSNumValidElts, Subtarget);
1580115830
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
1580215831

1580315832
// We may have simplified away the shuffle. We won't be able to do anything

llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,11 +2499,9 @@ define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
24992499
;
25002500
; CHECK-LE-LABEL: buildi2:
25012501
; CHECK-LE: # %bb.0: # %entry
2502-
; CHECK-LE-NEXT: mtfprd f0, r4
2502+
; CHECK-LE-NEXT: mtfprwz f0, r4
25032503
; CHECK-LE-NEXT: mtfprd f1, r3
2504-
; CHECK-LE-NEXT: xxswapd vs0, vs0
2505-
; CHECK-LE-NEXT: xxswapd v2, vs1
2506-
; CHECK-LE-NEXT: xxmrgld v2, v2, vs0
2504+
; CHECK-LE-NEXT: xxmrghd v2, vs1, vs0
25072505
; CHECK-LE-NEXT: blr
25082506
;
25092507
; CHECK-AIX-LABEL: buildi2:

0 commit comments

Comments
 (0)