Skip to content

Commit 070c44d

Browse files
committed
Matched Fixed width Pattern for ISD::AVGCEILU
1 parent 0d98582 commit 070c44d

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,6 +2529,43 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
25292529
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
25302530
}
25312531

2532+
// Attempt to form avgceilu(A, B) from sub(or(A, B), lshr(xor(A, B), 1))
2533+
static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
2534+
assert(N->getOpcode() == ISD::SUB and "SUB node is required here");
2535+
SDValue Or = N->getOperand(0);
2536+
SDValue Lshr = N->getOperand(1);
2537+
if (Or.getOpcode() != ISD::OR or Lshr.getOpcode() != ISD::SRL)
2538+
return SDValue();
2539+
SDValue Xor = Lshr.getOperand(0);
2540+
if (Xor.getOpcode() != ISD::XOR)
2541+
return SDValue();
2542+
SDValue Or1 = Or.getOperand(0);
2543+
SDValue Or2 = Or.getOperand(1);
2544+
SDValue Xor1 = Xor.getOperand(0);
2545+
SDValue Xor2 = Xor.getOperand(1);
2546+
if (Or1 == Xor2 and Or2 == Xor1) {
2547+
SDValue temp = Or1;
2548+
Or1 = Or2;
2549+
Or2 = temp;
2550+
} else if (Or1 != Xor1 or Or2 != Xor2)
2551+
return SDValue();
2552+
// Is the right shift using an immediate value of 1?
2553+
ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
2554+
if (!N1C or N1C->getAPIntValue() != 1)
2555+
return SDValue();
2556+
EVT VT = Or1.getValueType();
2557+
EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
2558+
if (VT.isVector())
2559+
VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
2560+
else
2561+
VT = NVT;
2562+
SDLoc DL(N);
2563+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2564+
if (!TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT))
2565+
return SDValue();
2566+
return DAG.getNode(ISD::AVGCEILU, DL, VT, Or1, Or2);
2567+
}
2568+
25322569
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
25332570
/// a shift and add with a different constant.
25342571
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
@@ -3859,6 +3896,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
38593896
if (SDValue V = foldAddSubOfSignBit(N, DAG))
38603897
return V;
38613898

3899+
// Try to match AVGCEILU fixedwidth pattern
3900+
if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
3901+
return V;
3902+
38623903
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
38633904
return V;
38643905

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -debugify-and-strip-all-safe -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
3+
4+
define i4 @sub_fixedwidth_i4(i4 %a0, i4 %a1) {
5+
; CHECK-LABEL: sub_fixedwidth_i4:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: eor w8, w0, w1
8+
; CHECK-NEXT: orr w9, w0, w1
9+
; CHECK-NEXT: and w8, w8, #0xe
10+
; CHECK-NEXT: sub w0, w9, w8, lsr #1
11+
; CHECK-NEXT: ret
12+
%or = or i4 %a0, %a1
13+
%xor = xor i4 %a0, %a1
14+
%srl = lshr i4 %xor, 1
15+
%res = sub i4 %or, %srl
16+
ret i4 %res
17+
}
18+
19+
define <4 x i32> @sub_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
20+
; CHECK-LABEL: sub_fixedwidth_v4i32:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: eor v2.16b, v0.16b, v1.16b
23+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
24+
; CHECK-NEXT: ushr v1.4s, v2.4s, #1
25+
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
26+
; CHECK-NEXT: ret
27+
%or = or <4 x i32> %a0, %a1
28+
%xor = xor <4 x i32> %a0, %a1
29+
%srl = lshr <4 x i32> %xor, <i32 1,i32 1,i32 1,i32 1>
30+
%res = sub <4 x i32> %or, %srl
31+
ret <4 x i32> %res
32+
}
33+
34+

0 commit comments

Comments
 (0)