@@ -36076,6 +36076,12 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
36076
36076
if (!isHoriz && !isPack)
36077
36077
return SDValue();
36078
36078
36079
+ // Do all ops have a single use?
36080
+ bool OneUseOps = llvm::all_of(Ops, [](SDValue Op) {
36081
+ return Op.hasOneUse() &&
36082
+ peekThroughBitcasts(Op) == peekThroughOneUseBitcasts(Op);
36083
+ });
36084
+
36079
36085
int NumElts = VT0.getVectorNumElements();
36080
36086
int NumLanes = VT0.getSizeInBits() / 128;
36081
36087
int NumEltsPerLane = NumElts / NumLanes;
@@ -36170,7 +36176,8 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
36170
36176
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
36171
36177
assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle");
36172
36178
bool SingleOp = (Ops.size() == 1);
36173
- if (!isHoriz || shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
36179
+ if (!isHoriz || OneUseOps ||
36180
+ shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) {
36174
36181
SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1;
36175
36182
SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1;
36176
36183
Lo = Lo.getOperand(WideMask128[0] & 1);
@@ -37875,28 +37882,15 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
37875
37882
return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask);
37876
37883
}
37877
37884
37878
- /// Eliminate a redundant shuffle of a horizontal math op.
37885
+ // Eliminate a redundant shuffle of a horizontal math op.
37886
+ // TODO: Merge this into canonicalizeShuffleMaskWithHorizOp.
37879
37887
static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
37880
- // TODO: Can we use getTargetShuffleInputs instead?
37881
37888
unsigned Opcode = N->getOpcode();
37882
- if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
37883
- if (Opcode != X86ISD::UNPCKL && Opcode != X86ISD::UNPCKH)
37884
- if (Opcode != X86ISD::SHUFP)
37885
- if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
37886
- return SDValue();
37889
+ if (Opcode != X86ISD::UNPCKL && Opcode != X86ISD::UNPCKH)
37890
+ if (Opcode != X86ISD::SHUFP)
37891
+ return SDValue();
37887
37892
37888
- // For a broadcast, peek through an extract element of index 0 to find the
37889
- // horizontal op: broadcast (ext_vec_elt HOp, 0)
37890
37893
EVT VT = N->getValueType(0);
37891
- if (Opcode == X86ISD::VBROADCAST) {
37892
- SDValue SrcOp = N->getOperand(0);
37893
- if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
37894
- SrcOp.getValueType() == MVT::f64 &&
37895
- SrcOp.getOperand(0).getValueType() == VT &&
37896
- isNullConstant(SrcOp.getOperand(1)))
37897
- N = SrcOp.getNode();
37898
- }
37899
-
37900
37894
SDValue HOp = N->getOperand(0);
37901
37895
if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD &&
37902
37896
HOp.getOpcode() != X86ISD::HSUB && HOp.getOpcode() != X86ISD::FHSUB)
@@ -37950,67 +37944,6 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
37950
37944
return SDValue();
37951
37945
}
37952
37946
37953
- // 128-bit horizontal math instructions are defined to operate on adjacent
37954
- // lanes of each operand as:
37955
- // v4X32: A[0] + A[1] , A[2] + A[3] , B[0] + B[1] , B[2] + B[3]
37956
- // ...similarly for v2f64 and v8i16.
37957
- if (!HOp.getOperand(0).isUndef() && !HOp.getOperand(1).isUndef() &&
37958
- HOp.getOperand(0) != HOp.getOperand(1))
37959
- return SDValue();
37960
-
37961
- // The shuffle that we are eliminating may have allowed the horizontal op to
37962
- // have an undemanded (undefined) operand. Duplicate the other (defined)
37963
- // operand to ensure that the results are defined across all lanes without the
37964
- // shuffle.
37965
- auto updateHOp = [](SDValue HorizOp, SelectionDAG &DAG) {
37966
- SDValue X;
37967
- if (HorizOp.getOperand(0).isUndef()) {
37968
- assert(!HorizOp.getOperand(1).isUndef() && "Not expecting foldable h-op");
37969
- X = HorizOp.getOperand(1);
37970
- } else if (HorizOp.getOperand(1).isUndef()) {
37971
- assert(!HorizOp.getOperand(0).isUndef() && "Not expecting foldable h-op");
37972
- X = HorizOp.getOperand(0);
37973
- } else {
37974
- return HorizOp;
37975
- }
37976
- return DAG.getNode(HorizOp.getOpcode(), SDLoc(HorizOp),
37977
- HorizOp.getValueType(), X, X);
37978
- };
37979
-
37980
- // When the operands of a horizontal math op are identical, the low half of
37981
- // the result is the same as the high half. If a target shuffle is also
37982
- // replicating low and high halves (and without changing the type/length of
37983
- // the vector), we don't need the shuffle.
37984
- if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
37985
- if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) {
37986
- // movddup (hadd X, X) --> hadd X, X
37987
- // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
37988
- assert((HOp.getValueType() == MVT::v2f64 ||
37989
- HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
37990
- return updateHOp(HOp, DAG);
37991
- }
37992
- return SDValue();
37993
- }
37994
-
37995
- // shuffle (hadd X, X), undef, [low half...high half] --> hadd X, X
37996
- ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
37997
-
37998
- // TODO: Other mask possibilities like {1,1} and {1,0} could be added here,
37999
- // but this should be tied to whatever horizontal op matching and shuffle
38000
- // canonicalization are producing.
38001
- if (HOp.getValueSizeInBits() == 128 &&
38002
- (isShuffleEquivalent(Mask, {0, 0}) ||
38003
- isShuffleEquivalent(Mask, {0, 1, 0, 1}) ||
38004
- isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3})))
38005
- return updateHOp(HOp, DAG);
38006
-
38007
- if (HOp.getValueSizeInBits() == 256 &&
38008
- (isShuffleEquivalent(Mask, {0, 0, 2, 2}) ||
38009
- isShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) ||
38010
- isShuffleEquivalent(
38011
- Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11})))
38012
- return updateHOp(HOp, DAG);
38013
-
38014
37947
return SDValue();
38015
37948
}
38016
37949
0 commit comments