Skip to content

Commit f24f251

Browse files
committed
Matched some basic ISD::AVGFLOORU patterns
1 parent ba13fa2 commit f24f251

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2826,6 +2826,36 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
28262826
return SDValue();
28272827
}
28282828

2829+
// Attempt to form ext(avgflooru(A, B)) from add(and(A, B), lshr(xor(A, B), 1))
2830+
static SDValue combineFixedwidthToAVG(SDNode *N, SelectionDAG &DAG) {
2831+
assert(N->getOpcode() == ISD::ADD && "ADD node is required here");
2832+
SDValue And = N->getOperand(0);
2833+
SDValue Lshr = N->getOperand(1);
2834+
if (And.getOpcode() != ISD::AND || Lshr.getOpcode() != ISD::SRL)
2835+
return SDValue();
2836+
SDValue Xor = Lshr.getOperand(0);
2837+
if (Xor.getOpcode() != ISD::XOR)
2838+
return SDValue();
2839+
SDValue And1 = And.getOperand(0);
2840+
SDValue And2 = And.getOperand(1);
2841+
SDValue Xor1 = Xor.getOperand(0);
2842+
SDValue Xor2 = Xor.getOperand(1);
2843+
if (Xor1 != And1 && Xor2 != And2)
2844+
return SDValue();
2845+
// Is the right shift using an immediate value of 1?
2846+
ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
2847+
if (!N1C || N1C->getAPIntValue() != 1)
2848+
return SDValue();
2849+
EVT VT = And.getValueType();
2850+
SDLoc DL(N);
2851+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2852+
if (!TLI.isOperationLegalOrCustom(ISD::AVGFLOORU, VT))
2853+
return SDValue();
2854+
return DAG.getNode(ISD::AVGFLOORU, DL, VT,
2855+
DAG.getExtOrTrunc(false, And1, DL, VT),
2856+
DAG.getExtOrTrunc(false, And2, DL, VT));
2857+
}
2858+
28292859
SDValue DAGCombiner::visitADD(SDNode *N) {
28302860
SDValue N0 = N->getOperand(0);
28312861
SDValue N1 = N->getOperand(1);
@@ -2841,6 +2871,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
28412871
if (SDValue V = foldAddSubOfSignBit(N, DAG))
28422872
return V;
28432873

2874+
// Try to match AVG fixedwidth pattern
2875+
if (SDValue V = combineFixedwidthToAVG(N, DAG))
2876+
return V;
2877+
28442878
// fold (a+b) -> (a|b) iff a and b share no bits.
28452879
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
28462880
DAG.haveNoCommonBitsSet(N0, N1))

llvm/test/CodeGen/AArch64/hadd-combine.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,21 @@ define <4 x i32> @urhadd_v4i32(<4 x i32> %x) {
859859
ret <4 x i32> %r
860860
}
861861

862+
define i4 @fixedwidth(i4 %a0, i4 %a1) {
863+
; CHECK-LABEL: fixedwidth:
864+
; CHECK: // %bb.0:
865+
; CHECK-NEXT: eor w8, w0, w1
866+
; CHECK-NEXT: and w9, w0, w1
867+
; CHECK-NEXT: and w8, w8, #0xe
868+
; CHECK-NEXT: add w0, w9, w8, lsr #1
869+
; CHECK-NEXT: ret
870+
%and = and i4 %a0, %a1
871+
%xor = xor i4 %a0, %a1
872+
%srl = lshr i4 %xor, 1
873+
%res = add i4 %and, %srl
874+
ret i4 %res
875+
}
876+
862877
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
863878
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
864879
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)

0 commit comments

Comments
 (0)