Skip to content

Commit 703920d

Browse files
authored
[DAG] Matched FixedWidth pattern for ISD::AVGFLOORU (#84903)
Fixes: #84749
1 parent 5e486d1 commit 703920d

File tree

2 files changed

+33
-0
lines changed

2 files changed

+33
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2820,6 +2820,23 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
28202820
return SDValue();
28212821
}
28222822

2823+
// Attempt to form avgflooru(A, B) from (A & B) + ((A ^ B) >> 1)
2824+
static SDValue combineFixedwidthToAVGFLOORU(SDNode *N, SelectionDAG &DAG) {
2825+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2826+
SDValue N0 = N->getOperand(0);
2827+
EVT VT = N0.getValueType();
2828+
SDLoc DL(N);
2829+
if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
2830+
SDValue A, B;
2831+
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2832+
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
2833+
m_SpecificInt(1))))) {
2834+
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2835+
}
2836+
}
2837+
return SDValue();
2838+
}
2839+
28232840
SDValue DAGCombiner::visitADD(SDNode *N) {
28242841
SDValue N0 = N->getOperand(0);
28252842
SDValue N1 = N->getOperand(1);
@@ -2835,6 +2852,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
28352852
if (SDValue V = foldAddSubOfSignBit(N, DAG))
28362853
return V;
28372854

2855+
// Try to match AVGFLOORU fixedwidth pattern
2856+
if (SDValue V = combineFixedwidthToAVGFLOORU(N, DAG))
2857+
return V;
2858+
28382859
// fold (a+b) -> (a|b) iff a and b share no bits.
28392860
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
28402861
DAG.haveNoCommonBitsSet(N0, N1))

llvm/test/CodeGen/AArch64/hadd-combine.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,18 @@ define <4 x i32> @urhadd_v4i32(<4 x i32> %x) {
859859
ret <4 x i32> %r
860860
}
861861

862+
define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
863+
; CHECK-LABEL: uhadd_fixedwidth_v4i32:
864+
; CHECK: // %bb.0:
865+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
866+
; CHECK-NEXT: ret
867+
%and = and <8 x i16> %a0, %a1
868+
%xor = xor <8 x i16> %a0, %a1
869+
%srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
870+
%res = add <8 x i16> %and, %srl
871+
ret <8 x i16> %res
872+
}
873+
862874
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
863875
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
864876
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)

0 commit comments

Comments
 (0)