Skip to content

Commit 2a0d5da

Browse files
committed
[X86][SSE] foldShuffleOfHorizOp - remove broadcast handling.
Remove VBROADCAST/MOVDDUP/splat-shuffle handling from foldShuffleOfHorizOp This can all be handled by canonicalizeShuffleMaskWithHorizOp along as we check that the HADD/SUB are only used once (to prevent infinite loops on slow-horizop targets which will try to reuse the nodes again followed by a post-hop shuffle).
1 parent 43279d1 commit 2a0d5da

File tree

1 file changed

+13
-80
lines changed

1 file changed

+13
-80
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -36076,6 +36076,12 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
3607636076
if (!isHoriz && !isPack)
3607736077
return SDValue();
3607836078

36079+
// Do all ops have a single use?
36080+
bool OneUseOps = llvm::all_of(Ops, [](SDValue Op) {
36081+
return Op.hasOneUse() &&
36082+
peekThroughBitcasts(Op) == peekThroughOneUseBitcasts(Op);
36083+
});
36084+
3607936085
int NumElts = VT0.getVectorNumElements();
3608036086
int NumLanes = VT0.getSizeInBits() / 128;
3608136087
int NumEltsPerLane = NumElts / NumLanes;
@@ -36170,7 +36176,8 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
3617036176
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
3617136177
assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
3617236178
bool SingleOp = (Ops.size() == 1);
36173-
if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
36179+
if (!isHoriz || OneUseOps ||
36180+
shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
3617436181
SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
3617536182
SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
3617636183
Lo = Lo.getOperand(WideMask128[0] & 1);
@@ -37875,28 +37882,15 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
3787537882
return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask);
3787637883
}
3787737884

37878-
/// Eliminate a redundant shuffle of a horizontal math op.
37885+
// Eliminate a redundant shuffle of a horizontal math op.
37886+
// TODO: Merge this into canonicalizeShuffleMaskWithHorizOp.
3787937887
static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
37880-
// TODO: Can we use getTargetShuffleInputs instead?
3788137888
unsigned Opcode = N->getOpcode();
37882-
if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
37883-
if (Opcode != X86ISD::UNPCKL && Opcode != X86ISD::UNPCKH)
37884-
if (Opcode != X86ISD::SHUFP)
37885-
if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
37886-
return SDValue();
37889+
if (Opcode != X86ISD::UNPCKL && Opcode != X86ISD::UNPCKH)
37890+
if (Opcode != X86ISD::SHUFP)
37891+
return SDValue();
3788737892

37888-
// For a broadcast, peek through an extract element of index 0 to find the
37889-
// horizontal op: broadcast (ext_vec_elt HOp, 0)
3789037893
EVT VT = N->getValueType(0);
37891-
if (Opcode == X86ISD::VBROADCAST) {
37892-
SDValue SrcOp = N->getOperand(0);
37893-
if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
37894-
SrcOp.getValueType() == MVT::f64 &&
37895-
SrcOp.getOperand(0).getValueType() == VT &&
37896-
isNullConstant(SrcOp.getOperand(1)))
37897-
N = SrcOp.getNode();
37898-
}
37899-
3790037894
SDValue HOp = N->getOperand(0);
3790137895
if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD &&
3790237896
HOp.getOpcode() != X86ISD::HSUB && HOp.getOpcode() != X86ISD::FHSUB)
@@ -37950,67 +37944,6 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
3795037944
return SDValue();
3795137945
}
3795237946

37953-
// 128-bit horizontal math instructions are defined to operate on adjacent
37954-
// lanes of each operand as:
37955-
// v4X32: A[0] + A[1] , A[2] + A[3] , B[0] + B[1] , B[2] + B[3]
37956-
// ...similarly for v2f64 and v8i16.
37957-
if (!HOp.getOperand(0).isUndef() && !HOp.getOperand(1).isUndef() &&
37958-
HOp.getOperand(0) != HOp.getOperand(1))
37959-
return SDValue();
37960-
37961-
// The shuffle that we are eliminating may have allowed the horizontal op to
37962-
// have an undemanded (undefined) operand. Duplicate the other (defined)
37963-
// operand to ensure that the results are defined across all lanes without the
37964-
// shuffle.
37965-
auto updateHOp = [](SDValue HorizOp, SelectionDAG &DAG) {
37966-
SDValue X;
37967-
if (HorizOp.getOperand(0).isUndef()) {
37968-
assert(!HorizOp.getOperand(1).isUndef() && "Not expecting foldable h-op");
37969-
X = HorizOp.getOperand(1);
37970-
} else if (HorizOp.getOperand(1).isUndef()) {
37971-
assert(!HorizOp.getOperand(0).isUndef() && "Not expecting foldable h-op");
37972-
X = HorizOp.getOperand(0);
37973-
} else {
37974-
return HorizOp;
37975-
}
37976-
return DAG.getNode(HorizOp.getOpcode(), SDLoc(HorizOp),
37977-
HorizOp.getValueType(), X, X);
37978-
};
37979-
37980-
// When the operands of a horizontal math op are identical, the low half of
37981-
// the result is the same as the high half. If a target shuffle is also
37982-
// replicating low and high halves (and without changing the type/length of
37983-
// the vector), we don't need the shuffle.
37984-
if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
37985-
if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) {
37986-
// movddup (hadd X, X) --> hadd X, X
37987-
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
37988-
assert((HOp.getValueType() == MVT::v2f64 ||
37989-
HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
37990-
return updateHOp(HOp, DAG);
37991-
}
37992-
return SDValue();
37993-
}
37994-
37995-
// shuffle (hadd X, X), undef, [low half...high half] --> hadd X, X
37996-
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
37997-
37998-
// TODO: Other mask possibilities like {1,1} and {1,0} could be added here,
37999-
// but this should be tied to whatever horizontal op matching and shuffle
38000-
// canonicalization are producing.
38001-
if (HOp.getValueSizeInBits() == 128 &&
38002-
(isShuffleEquivalent(Mask, {0, 0}) ||
38003-
isShuffleEquivalent(Mask, {0, 1, 0, 1}) ||
38004-
isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3})))
38005-
return updateHOp(HOp, DAG);
38006-
38007-
if (HOp.getValueSizeInBits() == 256 &&
38008-
(isShuffleEquivalent(Mask, {0, 0, 2, 2}) ||
38009-
isShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) ||
38010-
isShuffleEquivalent(
38011-
Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11})))
38012-
return updateHOp(HOp, DAG);
38013-
3801437947
return SDValue();
3801537948
}
3801637949

0 commit comments

Comments
 (0)