Skip to content

Commit 92764c9

Browse files
authored
[DAG] Matched Fixedwidth Pattern for ISD::AVGCEILU (#85031)
Fixes: #84753
1 parent ef520ca commit 92764c9

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,6 +2529,23 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
25292529
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
25302530
}
25312531

2532+
// Attempt to form avgceilu(A, B) from (A | B) - ((A ^ B) >> 1)
2533+
static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
2534+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2535+
SDValue N0 = N->getOperand(0);
2536+
EVT VT = N0.getValueType();
2537+
SDLoc DL(N);
2538+
if (TLI.isOperationLegal(ISD::AVGCEILU, VT)) {
2539+
SDValue A, B;
2540+
if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2541+
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
2542+
m_SpecificInt(1))))) {
2543+
return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2544+
}
2545+
}
2546+
return SDValue();
2547+
}
2548+
25322549
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
25332550
/// a shift and add with a different constant.
25342551
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
@@ -3849,6 +3866,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
38493866
if (SDValue V = foldAddSubOfSignBit(N, DAG))
38503867
return V;
38513868

3869+
// Try to match AVGCEILU fixedwidth pattern
3870+
if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
3871+
return V;
3872+
38523873
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
38533874
return V;
38543875

llvm/test/CodeGen/AArch64/hadd-combine.ll

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,17 @@ define <8 x i16> @hadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
329329
ret <8 x i16> %result
330330
}
331331

332-
333-
334-
332+
define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
333+
; CHECK-LABEL: sub_fixedwidth_v4i32:
334+
; CHECK: // %bb.0:
335+
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
336+
; CHECK-NEXT: ret
337+
%or = or <8 x i16> %a0, %a1
338+
%xor = xor <8 x i16> %a0, %a1
339+
%srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
340+
%res = sub <8 x i16> %or, %srl
341+
ret <8 x i16> %res
342+
}
335343

336344
define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
337345
; CHECK-LABEL: rhaddu_base:

0 commit comments

Comments
 (0)