Skip to content

Commit 606ea91

Browse files
committed
[PowerPC] Fix vector_shuffle combines when inputs are scalar_to_vector of differing types.
This patch fixes the combines for vector_shuffles when either or both of its left and right hand side inputs are scalar_to_vector nodes. Previously, when both left and right side inputs are scalar_to_vector nodes, the current combine could not handle this situation, as the shuffle mask was updated incorrectly. To temporarily solve this solution, this combine was simply disabled and not performed. Now, not only does this patch aim to resolve the previous issue of the incorrect shuffle mask adjustments respectively, but it also updates any test cases that are affected by this change. Patch migrated from https://reviews.llvm.org/D130487.
1 parent a269195 commit 606ea91

File tree

6 files changed

+811
-966
lines changed

6 files changed

+811
-966
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 69 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15386,16 +15386,18 @@ static SDValue isScalarToVec(SDValue Op) {
1538615386
// On little endian, that's just the corresponding element in the other
1538715387
// half of the vector. On big endian, it is in the same half but right
1538815388
// justified rather than left justified in that half.
15389-
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15390-
int LHSMaxIdx, int RHSMinIdx,
15391-
int RHSMaxIdx, int HalfVec,
15392-
unsigned ValidLaneWidth,
15393-
const PPCSubtarget &Subtarget) {
15389+
static void fixupShuffleMaskForPermutedSToV(
15390+
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15391+
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15392+
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
1539415393
for (int i = 0, e = ShuffV.size(); i < e; i++) {
1539515394
int Idx = ShuffV[i];
15396-
if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15395+
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
1539715396
ShuffV[i] +=
15398-
Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15397+
Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15398+
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15399+
ShuffV[i] +=
15400+
Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1539915401
}
1540015402
}
1540115403

@@ -15434,6 +15436,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
1543415436
OrigSToV.getOperand(0));
1543515437
}
1543615438

15439+
static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15440+
int HalfVec, int LHSLastElementDefined,
15441+
int RHSLastElementDefined) {
15442+
for (int i : seq<int>(0, ShuffV.size())) {
15443+
int Index = ShuffV[i];
15444+
if (Index < 0) // Skip explicitly undefined mask indices.
15445+
continue;
15446+
// Handle first input vector of the vector_shuffle.
15447+
if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15448+
(Index > LHSLastElementDefined))
15449+
return false;
15450+
// Handle second input vector of the vector_shuffle.
15451+
if ((RHSLastElementDefined >= 0) &&
15452+
(Index > HalfVec + RHSLastElementDefined))
15453+
return false;
15454+
}
15455+
return true;
15456+
}
15457+
1543715458
// On little endian subtargets, combine shuffles such as:
1543815459
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1543915460
// into:
@@ -15481,36 +15502,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1548115502
SDValue SToVLHS = isScalarToVec(LHS);
1548215503
SDValue SToVRHS = isScalarToVec(RHS);
1548315504
if (SToVLHS || SToVRHS) {
15484-
// FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15485-
// same type and have differing element sizes, then do not perform
15486-
// the following transformation. The current transformation for
15487-
// SCALAR_TO_VECTOR assumes that both input vectors have the same
15488-
// element size. This will be updated in the future to account for
15489-
// differing sizes of the LHS and RHS.
15490-
if (SToVLHS && SToVRHS &&
15491-
(SToVLHS.getValueType().getScalarSizeInBits() !=
15492-
SToVRHS.getValueType().getScalarSizeInBits()))
15493-
return Res;
15494-
15495-
int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15496-
: SToVRHS.getValueType().getVectorNumElements();
15497-
int NumEltsOut = ShuffV.size();
15505+
EVT VT = SVN->getValueType(0);
15506+
uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15507+
int ShuffleNumElts = ShuffV.size();
15508+
int HalfVec = ShuffleNumElts / 2;
1549815509
// The width of the "valid lane" (i.e. the lane that contains the value that
1549915510
// is vectorized) needs to be expressed in terms of the number of elements
1550015511
// of the shuffle. It is thereby the ratio of the values before and after
15501-
// any bitcast.
15502-
unsigned ValidLaneWidth =
15503-
SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15504-
LHS.getValueType().getScalarSizeInBits()
15505-
: SToVRHS.getValueType().getScalarSizeInBits() /
15506-
RHS.getValueType().getScalarSizeInBits();
15512+
// any bitcast, which will be set later on if the LHS or RHS are
15513+
// SCALAR_TO_VECTOR nodes.
15514+
unsigned LHSNumValidElts = HalfVec;
15515+
unsigned RHSNumValidElts = HalfVec;
1550715516

1550815517
// Initially assume that neither input is permuted. These will be adjusted
15509-
// accordingly if either input is.
15510-
int LHSMaxIdx = -1;
15511-
int RHSMinIdx = -1;
15512-
int RHSMaxIdx = -1;
15513-
int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15518+
// accordingly if either input is. Note, that -1 means that all elements
15519+
// are undefined.
15520+
int LHSFirstElt = 0;
15521+
int RHSFirstElt = ShuffleNumElts;
15522+
int LHSLastElt = -1;
15523+
int RHSLastElt = -1;
1551415524

1551515525
// Get the permuted scalar to vector nodes for the source(s) that come from
1551615526
// ISD::SCALAR_TO_VECTOR.
@@ -15519,33 +15529,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1551915529
// the value into element zero. Since scalar size of LHS and RHS may differ
1552015530
// after isScalarToVec, this should be checked using their own sizes.
1552115531
if (SToVLHS) {
15522-
if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15532+
int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15533+
if (!IsLittleEndian && LHSScalarSize >= 64)
1552315534
return Res;
1552415535
// Set up the values for the shuffle vector fixup.
15525-
LHSMaxIdx = NumEltsOut / NumEltsIn;
15536+
LHSNumValidElts =
15537+
LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15538+
// The last element that comes from the LHS. For example:
15539+
// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15540+
// The last element that comes from the LHS is actually 0, not 3
15541+
// because elements 1 and higher of a scalar_to_vector are undefined.
15542+
LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
1552615543
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
1552715544
if (SToVLHS.getValueType() != LHS.getValueType())
1552815545
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
1552915546
LHS = SToVLHS;
1553015547
}
1553115548
if (SToVRHS) {
15532-
if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15549+
int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15550+
if (!IsLittleEndian && RHSScalarSize >= 64)
1553315551
return Res;
15534-
RHSMinIdx = NumEltsOut;
15535-
RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15552+
RHSNumValidElts =
15553+
RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15554+
// The last element that comes from the RHS. For example:
15555+
// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15556+
// The last element that comes from the RHS is actually 5, not 7
15557+
// because elements 1 and higher of a scalar_to_vector are undefined.
15558+
// It is also not 4 because the original scalar_to_vector is wider and
15559+
// actually contains two i32 elements.
15560+
RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
1553615561
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
1553715562
if (SToVRHS.getValueType() != RHS.getValueType())
1553815563
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
1553915564
RHS = SToVRHS;
1554015565
}
1554115566

15567+
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15568+
return Res;
15569+
1554215570
// Fix up the shuffle mask to reflect where the desired element actually is.
1554315571
// The minimum and maximum indices that correspond to element zero for both
1554415572
// the LHS and RHS are computed and will control which shuffle mask entries
1554515573
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15546-
// entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15547-
fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15548-
HalfVec, ValidLaneWidth, Subtarget);
15574+
// entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15575+
fixupShuffleMaskForPermutedSToV(
15576+
ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15577+
LHSNumValidElts, RHSNumValidElts, Subtarget);
1554915578
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
1555015579

1555115580
// We may have simplified away the shuffle. We won't be able to do anything

llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2505,11 +2505,9 @@ define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
25052505
;
25062506
; CHECK-LE-LABEL: buildi2:
25072507
; CHECK-LE: # %bb.0: # %entry
2508-
; CHECK-LE-NEXT: mtfprd f0, r4
2508+
; CHECK-LE-NEXT: mtfprwz f0, r4
25092509
; CHECK-LE-NEXT: mtfprd f1, r3
2510-
; CHECK-LE-NEXT: xxswapd vs0, vs0
2511-
; CHECK-LE-NEXT: xxswapd v2, vs1
2512-
; CHECK-LE-NEXT: xxmrgld v2, v2, vs0
2510+
; CHECK-LE-NEXT: xxmrghd v2, vs1, vs0
25132511
; CHECK-LE-NEXT: blr
25142512
;
25152513
; CHECK-AIX-LABEL: buildi2:

0 commit comments

Comments
 (0)