Skip to content

Commit 0e346ee

Browse files
authored
[DAG] fold avgu(zext(x), zext(y)) -> zext(avgu(x, y)) (#95134)
close: #86301
1 parent fc1c34b commit 0e346ee

File tree

3 files changed

+101
-4
lines changed

3 files changed

+101
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5236,6 +5236,23 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
52365236
return DAG.getNode(ISD::SRL, DL, VT, X,
52375237
DAG.getShiftAmountConstant(1, VT, DL));
52385238

5239+
// fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5240+
SDValue A;
5241+
SDValue B;
5242+
if (sd_match(
5243+
N, m_BinOp(ISD::AVGFLOORU, m_ZExt(m_Value(A)), m_ZExt(m_Value(B)))) &&
5244+
A.getValueType() == B.getValueType() &&
5245+
hasOperation(ISD::AVGFLOORU, A.getValueType())) {
5246+
SDValue AvgFloorU = DAG.getNode(ISD::AVGFLOORU, DL, A.getValueType(), A, B);
5247+
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgFloorU);
5248+
}
5249+
if (sd_match(
5250+
N, m_BinOp(ISD::AVGCEILU, m_ZExt(m_Value(A)), m_ZExt(m_Value(B)))) &&
5251+
A.getValueType() == B.getValueType() &&
5252+
hasOperation(ISD::AVGCEILU, A.getValueType())) {
5253+
SDValue AvgCeilU = DAG.getNode(ISD::AVGCEILU, DL, A.getValueType(), A, B);
5254+
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgCeilU);
5255+
}
52395256
return SDValue();
52405257
}
52415258

llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
99
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
1010
; CHECK-LABEL: haddu_zext:
1111
; CHECK: // %bb.0:
12+
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
1213
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
13-
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
14-
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
1514
; CHECK-NEXT: ret
1615
%x0 = zext <8 x i8> %a0 to <8 x i16>
1716
%x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -23,9 +22,8 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
2322
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
2423
; CHECK-LABEL: rhaddu_zext:
2524
; CHECK: // %bb.0:
25+
; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
2626
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
27-
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
28-
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
2927
; CHECK-NEXT: ret
3028
%x0 = zext <8 x i8> %a0 to <8 x i16>
3129
%x1 = zext <8 x i8> %a1 to <8 x i16>

llvm/test/CodeGen/AArch64/avg.ll

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
3+
4+
define <16 x i16> @zext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
5+
; CHECK-LABEL: zext_avgflooru:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
8+
; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
9+
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
10+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
11+
; CHECK-NEXT: uhadd v1.8b, v2.8b, v3.8b
12+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
13+
; CHECK-NEXT: ret
14+
%x0 = zext <16 x i8> %a0 to <16 x i16>
15+
%x1 = zext <16 x i8> %a1 to <16 x i16>
16+
%and = and <16 x i16> %x0, %x1
17+
%xor = xor <16 x i16> %x0, %x1
18+
%shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
19+
%avg = add <16 x i16> %and, %shift
20+
ret <16 x i16> %avg
21+
}
22+
23+
define <16 x i16> @zext_avgflooru_negative(<16 x i8> %a0, <16 x i4> %a1) {
24+
; CHECK-LABEL: zext_avgflooru_negative:
25+
; CHECK: // %bb.0:
26+
; CHECK-NEXT: movi v2.16b, #15
27+
; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
28+
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
29+
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
30+
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
31+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
32+
; CHECK-NEXT: uhadd v1.8b, v3.8b, v2.8b
33+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
34+
; CHECK-NEXT: ret
35+
%x0 = zext <16 x i8> %a0 to <16 x i16>
36+
%x1 = zext <16 x i4> %a1 to <16 x i16>
37+
%and = and <16 x i16> %x0, %x1
38+
%xor = xor <16 x i16> %x0, %x1
39+
%shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
40+
%avg = add <16 x i16> %and, %shift
41+
ret <16 x i16> %avg
42+
}
43+
44+
define <16 x i16> @zext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
45+
; CHECK-LABEL: zext_avgceilu:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
48+
; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
49+
; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
50+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
51+
; CHECK-NEXT: urhadd v1.8b, v2.8b, v3.8b
52+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
53+
; CHECK-NEXT: ret
54+
%x0 = zext <16 x i8> %a0 to <16 x i16>
55+
%x1 = zext <16 x i8> %a1 to <16 x i16>
56+
%or = or <16 x i16> %x0, %x1
57+
%xor = xor <16 x i16> %x0, %x1
58+
%shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
59+
%avg = sub <16 x i16> %or, %shift
60+
ret <16 x i16> %avg
61+
}
62+
63+
define <16 x i16> @zext_avgceilu_negative(<16 x i4> %a0, <16 x i8> %a1) {
64+
; CHECK-LABEL: zext_avgceilu_negative:
65+
; CHECK: // %bb.0:
66+
; CHECK-NEXT: movi v2.16b, #15
67+
; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
68+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
69+
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
70+
; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
71+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
72+
; CHECK-NEXT: urhadd v1.8b, v2.8b, v3.8b
73+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
74+
; CHECK-NEXT: ret
75+
%x0 = zext <16 x i4> %a0 to <16 x i16>
76+
%x1 = zext <16 x i8> %a1 to <16 x i16>
77+
%or = or <16 x i16> %x0, %x1
78+
%xor = xor <16 x i16> %x0, %x1
79+
%shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
80+
%avg = sub <16 x i16> %or, %shift
81+
ret <16 x i16> %avg
82+
}

0 commit comments

Comments
 (0)