Skip to content

Commit 39c05a1

Browse files
committed
[AArch64][GlobalISel] Add selection support for v2s32 and v2s64 reductions for FADD/ADD.
We'll need legalizer lower() support for the other types to work. Differential Revision: https://reviews.llvm.org/D89159
1 parent 53b6982 commit 39c05a1

File tree

4 files changed

+215
-9
lines changed

4 files changed

+215
-9
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ class AArch64InstructionSelector : public InstructionSelector {
152152
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
153153
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
154154
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
155+
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
155156

156157
unsigned emitConstantPoolEntry(const Constant *CPVal,
157158
MachineFunction &MF) const;
@@ -2959,11 +2960,52 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
29592960
return selectConcatVectors(I, MRI);
29602961
case TargetOpcode::G_JUMP_TABLE:
29612962
return selectJumpTable(I, MRI);
2963+
case TargetOpcode::G_VECREDUCE_FADD:
2964+
case TargetOpcode::G_VECREDUCE_ADD:
2965+
return selectReduction(I, MRI);
29622966
}
29632967

29642968
return false;
29652969
}
29662970

2971+
bool AArch64InstructionSelector::selectReduction(
2972+
MachineInstr &I, MachineRegisterInfo &MRI) const {
2973+
Register VecReg = I.getOperand(1).getReg();
2974+
LLT VecTy = MRI.getType(VecReg);
2975+
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
2976+
unsigned Opc = 0;
2977+
if (VecTy == LLT::vector(16, 8))
2978+
Opc = AArch64::ADDVv16i8v;
2979+
else if (VecTy == LLT::vector(8, 16))
2980+
Opc = AArch64::ADDVv8i16v;
2981+
else if (VecTy == LLT::vector(4, 32))
2982+
Opc = AArch64::ADDVv4i32v;
2983+
else if (VecTy == LLT::vector(2, 64))
2984+
Opc = AArch64::ADDPv2i64p;
2985+
else {
2986+
LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
2987+
return false;
2988+
}
2989+
I.setDesc(TII.get(Opc));
2990+
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2991+
}
2992+
2993+
if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
2994+
unsigned Opc = 0;
2995+
if (VecTy == LLT::vector(2, 32))
2996+
Opc = AArch64::FADDPv2i32p;
2997+
else if (VecTy == LLT::vector(2, 64))
2998+
Opc = AArch64::FADDPv2i64p;
2999+
else {
3000+
LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3001+
return false;
3002+
}
3003+
I.setDesc(TII.get(Opc));
3004+
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3005+
}
3006+
return false;
3007+
}
3008+
29673009
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
29683010
MachineRegisterInfo &MRI) const {
29693011
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
3+
---
4+
name: add_B
5+
alignment: 4
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
liveins:
10+
- { reg: '$x0' }
11+
body: |
12+
bb.1:
13+
liveins: $x0
14+
15+
; CHECK-LABEL: name: add_B
16+
; CHECK: liveins: $x0
17+
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
18+
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
19+
; CHECK: [[ADDVv16i8v:%[0-9]+]]:fpr8 = ADDVv16i8v [[LDRQui]]
20+
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[ADDVv16i8v]], %subreg.bsub
21+
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
22+
; CHECK: $w0 = COPY [[COPY1]]
23+
; CHECK: RET_ReallyLR implicit $w0
24+
%0:gpr(p0) = COPY $x0
25+
%1:fpr(<16 x s8>) = G_LOAD %0(p0) :: (load 16)
26+
%2:fpr(s8) = G_VECREDUCE_ADD %1(<16 x s8>)
27+
%4:gpr(s8) = COPY %2(s8)
28+
%3:gpr(s32) = G_ANYEXT %4(s8)
29+
$w0 = COPY %3(s32)
30+
RET_ReallyLR implicit $w0
31+
32+
...
33+
---
34+
name: add_H
35+
alignment: 4
36+
legalized: true
37+
regBankSelected: true
38+
tracksRegLiveness: true
39+
liveins:
40+
- { reg: '$x0' }
41+
body: |
42+
bb.1:
43+
liveins: $x0
44+
45+
; CHECK-LABEL: name: add_H
46+
; CHECK: liveins: $x0
47+
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
48+
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
49+
; CHECK: [[ADDVv8i16v:%[0-9]+]]:fpr16 = ADDVv8i16v [[LDRQui]]
50+
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[ADDVv8i16v]], %subreg.hsub
51+
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
52+
; CHECK: $w0 = COPY [[COPY1]]
53+
; CHECK: RET_ReallyLR implicit $w0
54+
%0:gpr(p0) = COPY $x0
55+
%1:fpr(<8 x s16>) = G_LOAD %0(p0) :: (load 16)
56+
%2:fpr(s16) = G_VECREDUCE_ADD %1(<8 x s16>)
57+
%4:gpr(s16) = COPY %2(s16)
58+
%3:gpr(s32) = G_ANYEXT %4(s16)
59+
$w0 = COPY %3(s32)
60+
RET_ReallyLR implicit $w0
61+
62+
...
63+
---
64+
name: add_S
65+
alignment: 4
66+
legalized: true
67+
regBankSelected: true
68+
tracksRegLiveness: true
69+
liveins:
70+
- { reg: '$x0' }
71+
body: |
72+
bb.1:
73+
liveins: $x0
74+
75+
; CHECK-LABEL: name: add_S
76+
; CHECK: liveins: $x0
77+
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
78+
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
79+
; CHECK: [[ADDVv4i32v:%[0-9]+]]:fpr32 = ADDVv4i32v [[LDRQui]]
80+
; CHECK: $w0 = COPY [[ADDVv4i32v]]
81+
; CHECK: RET_ReallyLR implicit $w0
82+
%0:gpr(p0) = COPY $x0
83+
%1:fpr(<4 x s32>) = G_LOAD %0(p0) :: (load 16)
84+
%2:fpr(s32) = G_VECREDUCE_ADD %1(<4 x s32>)
85+
$w0 = COPY %2(s32)
86+
RET_ReallyLR implicit $w0
87+
88+
...
89+
---
90+
name: add_D
91+
alignment: 4
92+
legalized: true
93+
regBankSelected: true
94+
tracksRegLiveness: true
95+
liveins:
96+
- { reg: '$x0' }
97+
body: |
98+
bb.1:
99+
liveins: $x0
100+
101+
; CHECK-LABEL: name: add_D
102+
; CHECK: liveins: $x0
103+
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
104+
; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
105+
; CHECK: [[ADDPv2i64p:%[0-9]+]]:fpr64 = ADDPv2i64p [[LDRQui]]
106+
; CHECK: $x0 = COPY [[ADDPv2i64p]]
107+
; CHECK: RET_ReallyLR implicit $x0
108+
%0:gpr(p0) = COPY $x0
109+
%1:fpr(<2 x s64>) = G_LOAD %0(p0) :: (load 16)
110+
%2:fpr(s64) = G_VECREDUCE_ADD %1(<2 x s64>)
111+
$x0 = COPY %2(s64)
112+
RET_ReallyLR implicit $x0
113+
114+
...
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
3+
---
4+
name: fadd_v2s32
5+
legalized: true
6+
regBankSelected: true
7+
tracksRegLiveness: true
8+
body: |
9+
bb.1:
10+
liveins: $d0
11+
12+
; CHECK-LABEL: name: fadd_v2s32
13+
; CHECK: liveins: $d0
14+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
15+
; CHECK: [[FADDPv2i32p:%[0-9]+]]:fpr32 = FADDPv2i32p [[COPY]]
16+
; CHECK: $w0 = COPY [[FADDPv2i32p]]
17+
; CHECK: RET_ReallyLR implicit $w0
18+
%0:fpr(<2 x s32>) = COPY $d0
19+
%1:fpr(s32) = G_VECREDUCE_FADD %0(<2 x s32>)
20+
$w0 = COPY %1(s32)
21+
RET_ReallyLR implicit $w0
22+
23+
...
24+
---
25+
name: fadd_v2s64
26+
legalized: true
27+
regBankSelected: true
28+
tracksRegLiveness: true
29+
body: |
30+
bb.1:
31+
liveins: $q0
32+
33+
; CHECK-LABEL: name: fadd_v2s64
34+
; CHECK: liveins: $q0
35+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
36+
; CHECK: [[FADDPv2i64p:%[0-9]+]]:fpr64 = FADDPv2i64p [[COPY]]
37+
; CHECK: $x0 = COPY [[FADDPv2i64p]]
38+
; CHECK: RET_ReallyLR implicit $x0
39+
%0:fpr(<2 x s64>) = COPY $q0
40+
%2:fpr(s64) = G_VECREDUCE_FADD %0(<2 x s64>)
41+
$x0 = COPY %2(s64)
42+
RET_ReallyLR implicit $x0
43+
44+
...

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
142142
}
143143

144144
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
145-
declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
145+
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
146146

147147
define i16 @uabd16b_rdx(<16 x i8>* %a, <16 x i8>* %b) {
148148
; CHECK-LABEL: uabd16b_rdx
@@ -168,7 +168,7 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
168168
%abcmp = icmp slt <16 x i32> %abdiff, zeroinitializer
169169
%ababs = sub nsw <16 x i32> zeroinitializer, %abdiff
170170
%absel = select <16 x i1> %abcmp, <16 x i32> %ababs, <16 x i32> %abdiff
171-
%reduced_v = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %absel)
171+
%reduced_v = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %absel)
172172
ret i32 %reduced_v
173173
}
174174

@@ -181,13 +181,13 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
181181
%abcmp = icmp slt <16 x i32> %abdiff, zeroinitializer
182182
%ababs = sub nsw <16 x i32> zeroinitializer, %abdiff
183183
%absel = select <16 x i1> %abcmp, <16 x i32> %ababs, <16 x i32> %abdiff
184-
%reduced_v = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %absel)
184+
%reduced_v = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %absel)
185185
ret i32 %reduced_v
186186
}
187187

188188

189189
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
190-
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
190+
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
191191

192192
define i32 @uabd8h_rdx(<8 x i16>* %a, <8 x i16>* %b) {
193193
; CHECK-LABEL: uabd8h_rdx
@@ -219,19 +219,22 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
219219

220220
define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
221221
; CHECK-LABEL: uabdl4s_rdx_i32
222-
; CHECK: uabdl.4s
222+
; DAG: uabdl.4s
223+
224+
; GISel doesn't match this pattern yet.
225+
; GISEL: addv.4s
223226
%aext = zext <4 x i16> %a to <4 x i32>
224227
%bext = zext <4 x i16> %b to <4 x i32>
225228
%abdiff = sub nsw <4 x i32> %aext, %bext
226229
%abcmp = icmp slt <4 x i32> %abdiff, zeroinitializer
227230
%ababs = sub nsw <4 x i32> zeroinitializer, %abdiff
228231
%absel = select <4 x i1> %abcmp, <4 x i32> %ababs, <4 x i32> %abdiff
229-
%reduced_v = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %absel)
232+
%reduced_v = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %absel)
230233
ret i32 %reduced_v
231234
}
232235

233236
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
234-
declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
237+
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
235238

236239
define i64 @uabd4s_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
237240
; CHECK: uabd4s_rdx
@@ -263,14 +266,17 @@ define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) {
263266

264267
define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
265268
; CHECK-LABEL: uabdl2d_rdx_i64
266-
; CHECK: uabdl.2d
269+
; DAG: uabdl.2d
270+
271+
; GISel doesn't match this pattern yet
272+
; GISEL: addp.2d
267273
%aext = zext <2 x i32> %a to <2 x i64>
268274
%bext = zext <2 x i32> %b to <2 x i64>
269275
%abdiff = sub nsw <2 x i64> %aext, %bext
270276
%abcmp = icmp slt <2 x i64> %abdiff, zeroinitializer
271277
%ababs = sub nsw <2 x i64> zeroinitializer, %abdiff
272278
%absel = select <2 x i1> %abcmp, <2 x i64> %ababs, <2 x i64> %abdiff
273-
%reduced_v = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %absel)
279+
%reduced_v = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %absel)
274280
ret i64 %reduced_v
275281
}
276282

0 commit comments

Comments
 (0)