Skip to content

Commit 251df1b

Browse files
Dinar TemirbulatovDinar Temirbulatov
authored andcommitted
Added signed reduction support
1 parent b48eff8 commit 251df1b

File tree

4 files changed

+53
-80
lines changed

4 files changed

+53
-80
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17504,10 +17504,12 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
1750417504
}
1750517505

1750617506
static SDValue
17507-
performVecReduceAddZextCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
17508-
const AArch64TargetLowering &TLI) {
17509-
if (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND)
17507+
performVecReduceAddExtCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
17508+
const AArch64TargetLowering &TLI) {
17509+
if (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
17510+
N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND)
1751017511
return SDValue();
17512+
bool IsSigned = N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND;
1751117513

1751217514
SelectionDAG &DAG = DCI.DAG;
1751317515
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
@@ -17564,9 +17566,12 @@ performVecReduceAddZextCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1756417566
EVT ContainerVT = getContainerForFixedLengthVector(DAG, RdxVT);
1756517567
Reg = convertToScalableVector(DAG, ContainerVT, Reg);
1756617568
}
17567-
SDValue Res = DAG.getNode(
17568-
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64,
17569-
DAG.getConstant(Intrinsic::aarch64_sve_uaddv, DL, MVT::i64), Pg, Reg);
17569+
SDValue Res =
17570+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64,
17571+
DAG.getConstant(IsSigned ? Intrinsic::aarch64_sve_saddv
17572+
: Intrinsic::aarch64_sve_uaddv,
17573+
DL, MVT::i64),
17574+
Pg, Reg);
1757017575
if (ElemType != MVT::i64)
1757117576
Res = DAG.getAnyExtOrTrunc(Res, DL, ElemType);
1757217577
Results.push_back(Res);
@@ -25265,7 +25270,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2526525270
case ISD::VECREDUCE_ADD: {
2526625271
if (SDValue Val = performVecReduceAddCombine(N, DCI.DAG, Subtarget))
2526725272
return Val;
25268-
return performVecReduceAddZextCombine(N, DCI, *this);
25273+
return performVecReduceAddExtCombine(N, DCI, *this);
2526925274
}
2527025275
case AArch64ISD::UADDV:
2527125276
return performUADDVCombine(N, DAG);

llvm/test/CodeGen/AArch64/sve-fixed-vector-zext.ll

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12

23
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-v1 -O3 -aarch64-sve-vector-bits-min=256 -verify-machineinstrs | FileCheck %s --check-prefixes=SVE256
34
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-v1 -O3 -aarch64-sve-vector-bits-min=128 -verify-machineinstrs | FileCheck %s --check-prefixes=NEON
@@ -6,24 +7,31 @@
67

78
define internal i32 @test(ptr nocapture readonly %p1, i32 %i1, ptr nocapture readonly %p2, i32 %i2) {
89
; SVE256-LABEL: test:
9-
; SVE256: ld1b { z0.h }, p0/z,
10-
; SVE256: ld1b { z1.h }, p0/z,
11-
; SVE256: sub z0.h, z0.h, z1.h
12-
; SVE256-NEXT: sunpklo z1.s, z0.h
13-
; SVE256-NEXT: ext z0.b, z0.b, z0.b, #16
14-
; SVE256-NEXT: sunpklo z0.s, z0.h
15-
; SVE256-NEXT: add z0.s, z1.s, z0.s
16-
; SVE256-NEXT: uaddv d0, p1, z0.s
10+
; SVE256: // %bb.0: // %L.entry
11+
; SVE256-NEXT: ptrue p0.h, vl16
12+
; SVE256-NEXT: mov w9, wzr
13+
; SVE256-NEXT: mov w10, wzr
14+
; SVE256-NEXT: mov w8, wzr
15+
; SVE256-NEXT: mov w11, #-16 // =0xfffffff0
16+
; SVE256-NEXT: .p2align 5, , 16
17+
; SVE256-NEXT: .LBB0_1: // %L1
18+
; SVE256-NEXT: // =>This Inner Loop Header: Depth=1
19+
; SVE256-NEXT: sxtw x12, w9
20+
; SVE256-NEXT: sxtw x13, w10
21+
; SVE256-NEXT: adds w11, w11, #1
22+
; SVE256-NEXT: add w10, w10, w3
23+
; SVE256-NEXT: ld1b { z0.h }, p0/z, [x0, x12]
24+
; SVE256-NEXT: ld1b { z1.h }, p0/z, [x2, x13]
25+
; SVE256-NEXT: add w9, w9, w1
26+
; SVE256-NEXT: sub z0.h, z0.h, z1.h
27+
; SVE256-NEXT: saddv d0, p0, z0.h
28+
; SVE256-NEXT: fmov w12, s0
29+
; SVE256-NEXT: add w8, w12, w8
30+
; SVE256-NEXT: b.lo .LBB0_1
31+
; SVE256-NEXT: // %bb.2: // %L2
32+
; SVE256-NEXT: mov w0, w8
33+
; SVE256-NEXT: ret
1734

18-
; NEON-LABEL: test:
19-
; NEON: ldr q0, [x0, w9, sxtw]
20-
; NEON: ldr q1, [x2, w10, sxtw]
21-
; NEON: usubl2 v2.8h, v0.16b, v1.16b
22-
; NEON-NEXT: usubl v0.8h, v0.8b, v1.8b
23-
; NEON: saddl2 v1.4s, v0.8h, v2.8h
24-
; NEON-NEXT: saddl v0.4s, v0.4h, v2.4h
25-
; NEON-NEXT: add v0.4s, v0.4s, v1.4s
26-
; NEON-NEXT: addv s0, v0.4s
2735

2836
L.entry:
2937
br label %L1
@@ -55,3 +63,5 @@ L2: ; preds = %L1
5563
}
5664

5765
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
66+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
67+
; NEON: {{.*}}

llvm/test/CodeGen/AArch64/sve-int-reduce.ll

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -240,17 +240,8 @@ define i32 @uaddv_nxv16i16_nxv16i32(<vscale x 32 x i16> %a) {
240240
define i32 @saddv_nxv16i8_nxv16i32(<vscale x 16 x i8> %a) {
241241
; CHECK-LABEL: saddv_nxv16i8_nxv16i32:
242242
; CHECK: // %bb.0:
243-
; CHECK-NEXT: sunpkhi z1.h, z0.b
244-
; CHECK-NEXT: sunpklo z0.h, z0.b
245-
; CHECK-NEXT: ptrue p0.s
246-
; CHECK-NEXT: sunpklo z2.s, z1.h
247-
; CHECK-NEXT: sunpklo z3.s, z0.h
248-
; CHECK-NEXT: sunpkhi z1.s, z1.h
249-
; CHECK-NEXT: sunpkhi z0.s, z0.h
250-
; CHECK-NEXT: add z0.s, z0.s, z1.s
251-
; CHECK-NEXT: add z1.s, z3.s, z2.s
252-
; CHECK-NEXT: add z0.s, z1.s, z0.s
253-
; CHECK-NEXT: uaddv d0, p0, z0.s
243+
; CHECK-NEXT: ptrue p0.b
244+
; CHECK-NEXT: saddv d0, p0, z0.b
254245
; CHECK-NEXT: fmov x0, d0
255246
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
256247
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll

Lines changed: 12 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define i32 @reduce_uadd_v16i8(<32 x i8> %a) #0 {
4141
; SVE_MIN_256-NEXT: // kill: def $q0 killed $q0 def $z0
4242
; SVE_MIN_256-NEXT: // kill: def $q1 killed $q1 def $z1
4343
; SVE_MIN_256-NEXT: splice z0.b, p0, z0.b, z1.b
44-
; SVE_MIN_256-NEXT: ptrue p0.b
44+
; SVE_MIN_256-NEXT: ptrue p0.b, vl32
4545
; SVE_MIN_256-NEXT: uaddv d0, p0, z0.b
4646
; SVE_MIN_256-NEXT: fmov x0, d0
4747
; SVE_MIN_256-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -71,57 +71,24 @@ define i32 @reduce_sadd_v16i8(<32 x i8> %a) #0 {
7171
;
7272
; SVE_128-LABEL: reduce_sadd_v16i8:
7373
; SVE_128: // %bb.0:
74+
; SVE_128-NEXT: ptrue p0.b
7475
; SVE_128-NEXT: // kill: def $q1 killed $q1 def $z1
75-
; SVE_128-NEXT: sunpklo z2.h, z1.b
7676
; SVE_128-NEXT: // kill: def $q0 killed $q0 def $z0
77-
; SVE_128-NEXT: sunpklo z3.h, z0.b
78-
; SVE_128-NEXT: ptrue p0.s
79-
; SVE_128-NEXT: ext z1.b, z1.b, z1.b, #8
80-
; SVE_128-NEXT: ext z0.b, z0.b, z0.b, #8
81-
; SVE_128-NEXT: sunpklo z1.h, z1.b
82-
; SVE_128-NEXT: sunpklo z0.h, z0.b
83-
; SVE_128-NEXT: sunpklo z4.s, z2.h
84-
; SVE_128-NEXT: ext z2.b, z2.b, z2.b, #8
85-
; SVE_128-NEXT: sunpklo z6.s, z3.h
86-
; SVE_128-NEXT: ext z3.b, z3.b, z3.b, #8
87-
; SVE_128-NEXT: mov z5.d, z1.d
88-
; SVE_128-NEXT: sunpklo z7.s, z0.h
89-
; SVE_128-NEXT: ext z0.b, z0.b, z0.b, #8
90-
; SVE_128-NEXT: sunpklo z2.s, z2.h
91-
; SVE_128-NEXT: sunpklo z3.s, z3.h
92-
; SVE_128-NEXT: add z4.s, z6.s, z4.s
93-
; SVE_128-NEXT: ext z5.b, z5.b, z1.b, #8
94-
; SVE_128-NEXT: sunpklo z1.s, z1.h
95-
; SVE_128-NEXT: sunpklo z0.s, z0.h
96-
; SVE_128-NEXT: add z2.s, z3.s, z2.s
97-
; SVE_128-NEXT: sunpklo z5.s, z5.h
98-
; SVE_128-NEXT: add z1.s, z7.s, z1.s
99-
; SVE_128-NEXT: add z0.s, z0.s, z5.s
100-
; SVE_128-NEXT: add z1.s, z4.s, z1.s
101-
; SVE_128-NEXT: add z0.s, z2.s, z0.s
102-
; SVE_128-NEXT: add z0.s, z1.s, z0.s
103-
; SVE_128-NEXT: uaddv d0, p0, z0.s
104-
; SVE_128-NEXT: fmov x0, d0
105-
; SVE_128-NEXT: // kill: def $w0 killed $w0 killed $x0
77+
; SVE_128-NEXT: saddv d1, p0, z1.b
78+
; SVE_128-NEXT: saddv d0, p0, z0.b
79+
; SVE_128-NEXT: fmov x8, d1
80+
; SVE_128-NEXT: fmov x9, d0
81+
; SVE_128-NEXT: add w0, w9, w8
10682
; SVE_128-NEXT: ret
10783
;
10884
; SVE_MIN_256-LABEL: reduce_sadd_v16i8:
10985
; SVE_MIN_256: // %bb.0:
110-
; SVE_MIN_256-NEXT: // kill: def $q1 killed $q1 def $z1
86+
; SVE_MIN_256-NEXT: ptrue p0.b, vl16
11187
; SVE_MIN_256-NEXT: // kill: def $q0 killed $q0 def $z0
112-
; SVE_MIN_256-NEXT: sunpklo z2.h, z1.b
113-
; SVE_MIN_256-NEXT: sunpklo z3.h, z0.b
114-
; SVE_MIN_256-NEXT: ptrue p0.s, vl8
115-
; SVE_MIN_256-NEXT: ext z1.b, z1.b, z1.b, #8
116-
; SVE_MIN_256-NEXT: ext z0.b, z0.b, z0.b, #8
117-
; SVE_MIN_256-NEXT: sunpklo z1.h, z1.b
118-
; SVE_MIN_256-NEXT: sunpklo z0.h, z0.b
119-
; SVE_MIN_256-NEXT: add z2.h, z3.h, z2.h
120-
; SVE_MIN_256-NEXT: add z0.h, z0.h, z1.h
121-
; SVE_MIN_256-NEXT: sunpklo z1.s, z2.h
122-
; SVE_MIN_256-NEXT: sunpklo z0.s, z0.h
123-
; SVE_MIN_256-NEXT: add z0.s, z1.s, z0.s
124-
; SVE_MIN_256-NEXT: uaddv d0, p0, z0.s
88+
; SVE_MIN_256-NEXT: // kill: def $q1 killed $q1 def $z1
89+
; SVE_MIN_256-NEXT: splice z0.b, p0, z0.b, z1.b
90+
; SVE_MIN_256-NEXT: ptrue p0.b, vl32
91+
; SVE_MIN_256-NEXT: saddv d0, p0, z0.b
12592
; SVE_MIN_256-NEXT: fmov x0, d0
12693
; SVE_MIN_256-NEXT: // kill: def $w0 killed $w0 killed $x0
12794
; SVE_MIN_256-NEXT: ret

0 commit comments

Comments
 (0)