Skip to content

Commit 1cf6d97

Browse files
committed
[PowerPC] Fix vector_shuffle combines when inputs are scalar_to_vector of differing types.
This patch fixes the combines for vector_shuffles when either or both of its left and right hand side inputs are scalar_to_vector nodes. Previously, when both left and right side inputs are scalar_to_vector nodes, the current combine could not handle this situation, as the shuffle mask was updated incorrectly. To temporarily solve this solution, this combine was simply disabled and not performed. Now, not only does this patch aim to resolve the previous issue of the incorrect shuffle mask adjustments respectively, but it also updates any test cases that are affected by this change. Patch migrated from https://reviews.llvm.org/D130487.
1 parent bd9f2c2 commit 1cf6d97

File tree

6 files changed

+811
-966
lines changed

6 files changed

+811
-966
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 69 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15508,16 +15508,18 @@ static SDValue isScalarToVec(SDValue Op) {
1550815508
// On little endian, that's just the corresponding element in the other
1550915509
// half of the vector. On big endian, it is in the same half but right
1551015510
// justified rather than left justified in that half.
15511-
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15512-
int LHSMaxIdx, int RHSMinIdx,
15513-
int RHSMaxIdx, int HalfVec,
15514-
unsigned ValidLaneWidth,
15515-
const PPCSubtarget &Subtarget) {
15511+
static void fixupShuffleMaskForPermutedSToV(
15512+
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15513+
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15514+
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
1551615515
for (int i = 0, e = ShuffV.size(); i < e; i++) {
1551715516
int Idx = ShuffV[i];
15518-
if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15517+
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
1551915518
ShuffV[i] +=
15520-
Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15519+
Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15520+
if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15521+
ShuffV[i] +=
15522+
Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
1552115523
}
1552215524
}
1552315525

@@ -15556,6 +15558,25 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
1555615558
OrigSToV.getOperand(0));
1555715559
}
1555815560

15561+
static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15562+
int HalfVec, int LHSLastElementDefined,
15563+
int RHSLastElementDefined) {
15564+
for (int i : seq<int>(0, ShuffV.size())) {
15565+
int Index = ShuffV[i];
15566+
if (Index < 0) // Skip explicitly undefined mask indices.
15567+
continue;
15568+
// Handle first input vector of the vector_shuffle.
15569+
if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15570+
(Index > LHSLastElementDefined))
15571+
return false;
15572+
// Handle second input vector of the vector_shuffle.
15573+
if ((RHSLastElementDefined >= 0) &&
15574+
(Index > HalfVec + RHSLastElementDefined))
15575+
return false;
15576+
}
15577+
return true;
15578+
}
15579+
1555915580
// On little endian subtargets, combine shuffles such as:
1556015581
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1556115582
// into:
@@ -15603,36 +15624,25 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1560315624
SDValue SToVLHS = isScalarToVec(LHS);
1560415625
SDValue SToVRHS = isScalarToVec(RHS);
1560515626
if (SToVLHS || SToVRHS) {
15606-
// FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15607-
// same type and have differing element sizes, then do not perform
15608-
// the following transformation. The current transformation for
15609-
// SCALAR_TO_VECTOR assumes that both input vectors have the same
15610-
// element size. This will be updated in the future to account for
15611-
// differing sizes of the LHS and RHS.
15612-
if (SToVLHS && SToVRHS &&
15613-
(SToVLHS.getValueType().getScalarSizeInBits() !=
15614-
SToVRHS.getValueType().getScalarSizeInBits()))
15615-
return Res;
15616-
15617-
int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15618-
: SToVRHS.getValueType().getVectorNumElements();
15619-
int NumEltsOut = ShuffV.size();
15627+
EVT VT = SVN->getValueType(0);
15628+
uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15629+
int ShuffleNumElts = ShuffV.size();
15630+
int HalfVec = ShuffleNumElts / 2;
1562015631
// The width of the "valid lane" (i.e. the lane that contains the value that
1562115632
// is vectorized) needs to be expressed in terms of the number of elements
1562215633
// of the shuffle. It is thereby the ratio of the values before and after
15623-
// any bitcast.
15624-
unsigned ValidLaneWidth =
15625-
SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15626-
LHS.getValueType().getScalarSizeInBits()
15627-
: SToVRHS.getValueType().getScalarSizeInBits() /
15628-
RHS.getValueType().getScalarSizeInBits();
15634+
// any bitcast, which will be set later on if the LHS or RHS are
15635+
// SCALAR_TO_VECTOR nodes.
15636+
unsigned LHSNumValidElts = HalfVec;
15637+
unsigned RHSNumValidElts = HalfVec;
1562915638

1563015639
// Initially assume that neither input is permuted. These will be adjusted
15631-
// accordingly if either input is.
15632-
int LHSMaxIdx = -1;
15633-
int RHSMinIdx = -1;
15634-
int RHSMaxIdx = -1;
15635-
int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15640+
// accordingly if either input is. Note, that -1 means that all elements
15641+
// are undefined.
15642+
int LHSFirstElt = 0;
15643+
int RHSFirstElt = ShuffleNumElts;
15644+
int LHSLastElt = -1;
15645+
int RHSLastElt = -1;
1563615646

1563715647
// Get the permuted scalar to vector nodes for the source(s) that come from
1563815648
// ISD::SCALAR_TO_VECTOR.
@@ -15641,33 +15651,52 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1564115651
// the value into element zero. Since scalar size of LHS and RHS may differ
1564215652
// after isScalarToVec, this should be checked using their own sizes.
1564315653
if (SToVLHS) {
15644-
if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15654+
int LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15655+
if (!IsLittleEndian && LHSScalarSize >= 64)
1564515656
return Res;
1564615657
// Set up the values for the shuffle vector fixup.
15647-
LHSMaxIdx = NumEltsOut / NumEltsIn;
15658+
LHSNumValidElts =
15659+
LHSScalarSize / LHS.getValueType().getScalarSizeInBits();
15660+
// The last element that comes from the LHS. For example:
15661+
// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15662+
// The last element that comes from the LHS is actually 0, not 3
15663+
// because elements 1 and higher of a scalar_to_vector are undefined.
15664+
LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
1564815665
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
1564915666
if (SToVLHS.getValueType() != LHS.getValueType())
1565015667
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
1565115668
LHS = SToVLHS;
1565215669
}
1565315670
if (SToVRHS) {
15654-
if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15671+
int RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15672+
if (!IsLittleEndian && RHSScalarSize >= 64)
1565515673
return Res;
15656-
RHSMinIdx = NumEltsOut;
15657-
RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15674+
RHSNumValidElts =
15675+
RHSScalarSize / RHS.getValueType().getScalarSizeInBits();
15676+
// The last element that comes from the RHS. For example:
15677+
// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15678+
// The last element that comes from the RHS is actually 5, not 7
15679+
// because elements 1 and higher of a scalar_to_vector are undefined.
15680+
// It is also not 4 because the original scalar_to_vector is wider and
15681+
// actually contains two i32 elements.
15682+
RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
1565815683
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
1565915684
if (SToVRHS.getValueType() != RHS.getValueType())
1566015685
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
1566115686
RHS = SToVRHS;
1566215687
}
1566315688

15689+
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15690+
return Res;
15691+
1566415692
// Fix up the shuffle mask to reflect where the desired element actually is.
1566515693
// The minimum and maximum indices that correspond to element zero for both
1566615694
// the LHS and RHS are computed and will control which shuffle mask entries
1566715695
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15668-
// entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15669-
fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15670-
HalfVec, ValidLaneWidth, Subtarget);
15696+
// entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15697+
fixupShuffleMaskForPermutedSToV(
15698+
ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15699+
LHSNumValidElts, RHSNumValidElts, Subtarget);
1567115700
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
1567215701

1567315702
// We may have simplified away the shuffle. We won't be able to do anything

llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,11 +2499,9 @@ define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
24992499
;
25002500
; CHECK-LE-LABEL: buildi2:
25012501
; CHECK-LE: # %bb.0: # %entry
2502-
; CHECK-LE-NEXT: mtfprd f0, r4
2502+
; CHECK-LE-NEXT: mtfprwz f0, r4
25032503
; CHECK-LE-NEXT: mtfprd f1, r3
2504-
; CHECK-LE-NEXT: xxswapd vs0, vs0
2505-
; CHECK-LE-NEXT: xxswapd v2, vs1
2506-
; CHECK-LE-NEXT: xxmrgld v2, v2, vs0
2504+
; CHECK-LE-NEXT: xxmrghd v2, vs1, vs0
25072505
; CHECK-LE-NEXT: blr
25082506
;
25092507
; CHECK-AIX-LABEL: buildi2:

0 commit comments

Comments
 (0)