Skip to content

[PowerPC] Fix vector_shuffle combines when inputs are scalar_to_vector of differing types. #80784

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 94 additions & 52 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15687,16 +15687,20 @@ static SDValue isScalarToVec(SDValue Op) {
// On little endian, that's just the corresponding element in the other
// half of the vector. On big endian, it is in the same half but right
// justified rather than left justified in that half.
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
int LHSMaxIdx, int RHSMinIdx,
int RHSMaxIdx, int HalfVec,
unsigned ValidLaneWidth,
const PPCSubtarget &Subtarget) {
for (int i = 0, e = ShuffV.size(); i < e; i++) {
int Idx = ShuffV[i];
if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
ShuffV[i] +=
Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
static void fixupShuffleMaskForPermutedSToV(
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
int LHSEltFixup =
Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
int RHSEltFixup =
Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
for (int I = 0, E = ShuffV.size(); I < E; ++I) {
int Idx = ShuffV[I];
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
ShuffV[I] += LHSEltFixup;
else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
ShuffV[I] += RHSEltFixup;
}
}

Expand Down Expand Up @@ -15735,6 +15739,51 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
OrigSToV.getOperand(0));
}

static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
int HalfVec, int LHSLastElementDefined,
int RHSLastElementDefined) {
for (int Index : ShuffV) {
if (Index < 0) // Skip explicitly undefined mask indices.
continue;
// Handle first input vector of the vector_shuffle.
if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
(Index > LHSLastElementDefined))
return false;
// Handle second input vector of the vector_shuffle.
if ((RHSLastElementDefined >= 0) &&
(Index > HalfVec + RHSLastElementDefined))
return false;
}
return true;
}

static SDValue generateSToVPermutedForVecShuffle(
int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
EVT VecShuffOperandType = VecShuffOperand.getValueType();
// Set up the values for the shuffle vector fixup.
NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
// The last element depends on if the input comes from the LHS or RHS.
//
// For example:
// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
//
// For the LHS: The last element that comes from the LHS is actually 0, not 3
// because elements 1 and higher of a scalar_to_vector are undefined.
// For the RHS: The last element that comes from the RHS is actually 5, not 7
// because elements 1 and higher of a scalar_to_vector are undefined.
// It is also not 4 because the original scalar_to_vector is wider and
// actually contains two i32 elements.
LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
? ScalarSize / ShuffleEltWidth - 1 + FirstElt
: FirstElt;
SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
if (SToVPermuted.getValueType() != VecShuffOperandType)
SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
return SToVPermuted;
}

// On little endian subtargets, combine shuffles such as:
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
// into:
Expand Down Expand Up @@ -15782,71 +15831,64 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
SDValue SToVLHS = isScalarToVec(LHS);
SDValue SToVRHS = isScalarToVec(RHS);
if (SToVLHS || SToVRHS) {
// FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
// same type and have differing element sizes, then do not perform
// the following transformation. The current transformation for
// SCALAR_TO_VECTOR assumes that both input vectors have the same
// element size. This will be updated in the future to account for
// differing sizes of the LHS and RHS.
if (SToVLHS && SToVRHS &&
(SToVLHS.getValueType().getScalarSizeInBits() !=
SToVRHS.getValueType().getScalarSizeInBits()))
return Res;

int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
: SToVRHS.getValueType().getVectorNumElements();
int NumEltsOut = ShuffV.size();
EVT VT = SVN->getValueType(0);
uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
int ShuffleNumElts = ShuffV.size();
int HalfVec = ShuffleNumElts / 2;
// The width of the "valid lane" (i.e. the lane that contains the value that
// is vectorized) needs to be expressed in terms of the number of elements
// of the shuffle. It is thereby the ratio of the values before and after
// any bitcast.
unsigned ValidLaneWidth =
SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
LHS.getValueType().getScalarSizeInBits()
: SToVRHS.getValueType().getScalarSizeInBits() /
RHS.getValueType().getScalarSizeInBits();
// any bitcast, which will be set later on if the LHS or RHS are
// SCALAR_TO_VECTOR nodes.
unsigned LHSNumValidElts = HalfVec;
unsigned RHSNumValidElts = HalfVec;

// Initially assume that neither input is permuted. These will be adjusted
// accordingly if either input is.
int LHSMaxIdx = -1;
int RHSMinIdx = -1;
int RHSMaxIdx = -1;
int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
// accordingly if either input is. Note, that -1 means that all elements
// are undefined.
int LHSFirstElt = 0;
int RHSFirstElt = ShuffleNumElts;
int LHSLastElt = -1;
int RHSLastElt = -1;

// Get the permuted scalar to vector nodes for the source(s) that come from
// ISD::SCALAR_TO_VECTOR.
// On big endian systems, this only makes sense for element sizes smaller
// than 64 bits since for 64-bit elements, all instructions already put
// the value into element zero. Since scalar size of LHS and RHS may differ
// after isScalarToVec, this should be checked using their own sizes.
int LHSScalarSize = 0;
int RHSScalarSize = 0;
if (SToVLHS) {
if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
if (!IsLittleEndian && LHSScalarSize >= 64)
return Res;
// Set up the values for the shuffle vector fixup.
LHSMaxIdx = NumEltsOut / NumEltsIn;
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
if (SToVLHS.getValueType() != LHS.getValueType())
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
LHS = SToVLHS;
}
if (SToVRHS) {
if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
if (!IsLittleEndian && RHSScalarSize >= 64)
return Res;
RHSMinIdx = NumEltsOut;
RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
if (SToVRHS.getValueType() != RHS.getValueType())
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
RHS = SToVRHS;
}
if (LHSScalarSize != 0)
LHS = generateSToVPermutedForVecShuffle(
LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
if (RHSScalarSize != 0)
RHS = generateSToVPermutedForVecShuffle(
RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
RHSLastElt, RHS, SToVRHS, DAG, Subtarget);

if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
return Res;

// Fix up the shuffle mask to reflect where the desired element actually is.
// The minimum and maximum indices that correspond to element zero for both
// the LHS and RHS are computed and will control which shuffle mask entries
// are to be changed. For example, if the RHS is permuted, any shuffle mask
// entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
HalfVec, ValidLaneWidth, Subtarget);
// entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
fixupShuffleMaskForPermutedSToV(
ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
LHSNumValidElts, RHSNumValidElts, Subtarget);
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);

// We may have simplified away the shuffle. We won't be able to do anything
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2499,11 +2499,9 @@ define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
;
; CHECK-LE-LABEL: buildi2:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: mtfprd f0, r4
; CHECK-LE-NEXT: mtfprwz f0, r4
; CHECK-LE-NEXT: mtfprd f1, r3
; CHECK-LE-NEXT: xxswapd vs0, vs0
; CHECK-LE-NEXT: xxswapd v2, vs1
; CHECK-LE-NEXT: xxmrgld v2, v2, vs0
; CHECK-LE-NEXT: xxmrghd v2, vs1, vs0
; CHECK-LE-NEXT: blr
;
; CHECK-AIX-LABEL: buildi2:
Expand Down
Loading
Loading