Skip to content

Commit 2c552d3

Browse files
authored
[AArch64][GlobalISel] Legalize G_ABS for Larger/Smaller Vectors (#79117)
Legalize G_ABS for larger/smaller width vectors with legal element sizes Fallsback for the smaller width vector tests because it is unable to legalize for G_ANYEXT smaller width vectors
1 parent 3bf21ba commit 2c552d3

File tree

4 files changed

+482
-113
lines changed

4 files changed

+482
-113
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5254,6 +5254,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
52545254
case TargetOpcode::G_BSWAP:
52555255
case TargetOpcode::G_FCANONICALIZE:
52565256
case TargetOpcode::G_SEXT_INREG:
5257+
case TargetOpcode::G_ABS:
52575258
if (TypeIdx != 0)
52585259
return UnableToLegalize;
52595260
Observer.changingInstr(MI);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -988,9 +988,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
988988
if (HasCSSC)
989989
ABSActions
990990
.legalFor({s32, s64});
991-
ABSActions
992-
.legalFor(PackedVectorAllTypeList)
993-
.lowerIf(isScalar(0));
991+
ABSActions.legalFor(PackedVectorAllTypeList)
992+
.clampNumElements(0, v8s8, v16s8)
993+
.clampNumElements(0, v4s16, v8s16)
994+
.clampNumElements(0, v2s32, v4s32)
995+
.clampNumElements(0, v2s64, v2s64)
996+
.moreElementsToNextPow2(0)
997+
.lower();
994998

995999
// For fadd reductions we have pairwise operations available. We treat the
9961000
// usual legal types as legal and handle the lowering to pairwise instructions

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 372 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,372 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
; CHECK-GI: warning: Instruction selection used fallback path for abs_v4i8
6+
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_v2i16
7+
8+
; ===== Legal Scalars =====
9+
10+
define i8 @abs_i8(i8 %a){
11+
; CHECK-SD-LABEL: abs_i8:
12+
; CHECK-SD: // %bb.0: // %entry
13+
; CHECK-SD-NEXT: sxtb w8, w0
14+
; CHECK-SD-NEXT: cmp w8, #0
15+
; CHECK-SD-NEXT: cneg w0, w8, mi
16+
; CHECK-SD-NEXT: ret
17+
;
18+
; CHECK-GI-LABEL: abs_i8:
19+
; CHECK-GI: // %bb.0: // %entry
20+
; CHECK-GI-NEXT: sxtb w8, w0
21+
; CHECK-GI-NEXT: asr w8, w8, #7
22+
; CHECK-GI-NEXT: add w9, w0, w8
23+
; CHECK-GI-NEXT: eor w0, w9, w8
24+
; CHECK-GI-NEXT: ret
25+
entry:
26+
%res = call i8 @llvm.abs.i8(i8 %a, i1 0)
27+
ret i8 %res
28+
}
29+
declare i8 @llvm.abs.i8(i8, i1)
30+
31+
define i16 @abs_i16(i16 %a){
32+
; CHECK-SD-LABEL: abs_i16:
33+
; CHECK-SD: // %bb.0: // %entry
34+
; CHECK-SD-NEXT: sxth w8, w0
35+
; CHECK-SD-NEXT: cmp w8, #0
36+
; CHECK-SD-NEXT: cneg w0, w8, mi
37+
; CHECK-SD-NEXT: ret
38+
;
39+
; CHECK-GI-LABEL: abs_i16:
40+
; CHECK-GI: // %bb.0: // %entry
41+
; CHECK-GI-NEXT: sxth w8, w0
42+
; CHECK-GI-NEXT: asr w8, w8, #15
43+
; CHECK-GI-NEXT: add w9, w0, w8
44+
; CHECK-GI-NEXT: eor w0, w9, w8
45+
; CHECK-GI-NEXT: ret
46+
entry:
47+
%res = call i16 @llvm.abs.i16(i16 %a, i1 0)
48+
ret i16 %res
49+
}
50+
declare i16 @llvm.abs.i16(i16, i1)
51+
52+
define i32 @abs_i32(i32 %a){
53+
; CHECK-SD-LABEL: abs_i32:
54+
; CHECK-SD: // %bb.0: // %entry
55+
; CHECK-SD-NEXT: cmp w0, #0
56+
; CHECK-SD-NEXT: cneg w0, w0, mi
57+
; CHECK-SD-NEXT: ret
58+
;
59+
; CHECK-GI-LABEL: abs_i32:
60+
; CHECK-GI: // %bb.0: // %entry
61+
; CHECK-GI-NEXT: asr w8, w0, #31
62+
; CHECK-GI-NEXT: add w9, w0, w8
63+
; CHECK-GI-NEXT: eor w0, w9, w8
64+
; CHECK-GI-NEXT: ret
65+
entry:
66+
%res = call i32 @llvm.abs.i32(i32 %a, i1 0)
67+
ret i32 %res
68+
}
69+
declare i32 @llvm.abs.i32(i32, i1)
70+
71+
define i64 @abs_i64(i64 %a){
72+
; CHECK-SD-LABEL: abs_i64:
73+
; CHECK-SD: // %bb.0: // %entry
74+
; CHECK-SD-NEXT: cmp x0, #0
75+
; CHECK-SD-NEXT: cneg x0, x0, mi
76+
; CHECK-SD-NEXT: ret
77+
;
78+
; CHECK-GI-LABEL: abs_i64:
79+
; CHECK-GI: // %bb.0: // %entry
80+
; CHECK-GI-NEXT: asr x8, x0, #63
81+
; CHECK-GI-NEXT: add x9, x0, x8
82+
; CHECK-GI-NEXT: eor x0, x9, x8
83+
; CHECK-GI-NEXT: ret
84+
entry:
85+
%res = call i64 @llvm.abs.i64(i64 %a, i1 0)
86+
ret i64 %res
87+
}
88+
declare i64 @llvm.abs.i64(i64, i1)
89+
90+
define i128 @abs_i128(i128 %a){
91+
; CHECK-SD-LABEL: abs_i128:
92+
; CHECK-SD: // %bb.0: // %entry
93+
; CHECK-SD-NEXT: asr x8, x1, #63
94+
; CHECK-SD-NEXT: eor x9, x0, x8
95+
; CHECK-SD-NEXT: eor x10, x1, x8
96+
; CHECK-SD-NEXT: subs x0, x9, x8
97+
; CHECK-SD-NEXT: sbc x1, x10, x8
98+
; CHECK-SD-NEXT: ret
99+
;
100+
; CHECK-GI-LABEL: abs_i128:
101+
; CHECK-GI: // %bb.0: // %entry
102+
; CHECK-GI-NEXT: asr x8, x1, #63
103+
; CHECK-GI-NEXT: adds x9, x0, x8
104+
; CHECK-GI-NEXT: adc x10, x1, x8
105+
; CHECK-GI-NEXT: eor x0, x9, x8
106+
; CHECK-GI-NEXT: eor x1, x10, x8
107+
; CHECK-GI-NEXT: ret
108+
entry:
109+
%res = call i128 @llvm.abs.i128(i128 %a, i1 0)
110+
ret i128 %res
111+
}
112+
declare i128 @llvm.abs.i128(i128, i1)
113+
114+
; ===== Legal Vector Types =====
115+
116+
define <8 x i8> @abs_v8i8(<8 x i8> %a){
117+
; CHECK-LABEL: abs_v8i8:
118+
; CHECK: // %bb.0: // %entry
119+
; CHECK-NEXT: abs v0.8b, v0.8b
120+
; CHECK-NEXT: ret
121+
entry:
122+
%res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %a, i1 0)
123+
ret <8 x i8> %res
124+
}
125+
declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
126+
127+
define <16 x i8> @abs_v16i8(<16 x i8> %a){
128+
; CHECK-LABEL: abs_v16i8:
129+
; CHECK: // %bb.0: // %entry
130+
; CHECK-NEXT: abs v0.16b, v0.16b
131+
; CHECK-NEXT: ret
132+
entry:
133+
%res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 0)
134+
ret <16 x i8> %res
135+
}
136+
declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
137+
138+
define <4 x i16> @abs_v4i16(<4 x i16> %a){
139+
; CHECK-LABEL: abs_v4i16:
140+
; CHECK: // %bb.0: // %entry
141+
; CHECK-NEXT: abs v0.4h, v0.4h
142+
; CHECK-NEXT: ret
143+
entry:
144+
%res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %a, i1 0)
145+
ret <4 x i16> %res
146+
}
147+
declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
148+
149+
define <8 x i16> @abs_v8i16(<8 x i16> %a){
150+
; CHECK-LABEL: abs_v8i16:
151+
; CHECK: // %bb.0: // %entry
152+
; CHECK-NEXT: abs v0.8h, v0.8h
153+
; CHECK-NEXT: ret
154+
entry:
155+
%res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 0)
156+
ret <8 x i16> %res
157+
}
158+
declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
159+
160+
define <2 x i32> @abs_v2i32(<2 x i32> %a){
161+
; CHECK-LABEL: abs_v2i32:
162+
; CHECK: // %bb.0: // %entry
163+
; CHECK-NEXT: abs v0.2s, v0.2s
164+
; CHECK-NEXT: ret
165+
entry:
166+
%res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %a, i1 0)
167+
ret <2 x i32> %res
168+
}
169+
declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
170+
171+
define <4 x i32> @abs_v4i32(<4 x i32> %a){
172+
; CHECK-LABEL: abs_v4i32:
173+
; CHECK: // %bb.0: // %entry
174+
; CHECK-NEXT: abs v0.4s, v0.4s
175+
; CHECK-NEXT: ret
176+
entry:
177+
%res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 0)
178+
ret <4 x i32> %res
179+
}
180+
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
181+
182+
define <2 x i64> @abs_v2i64(<2 x i64> %a){
183+
; CHECK-LABEL: abs_v2i64:
184+
; CHECK: // %bb.0: // %entry
185+
; CHECK-NEXT: abs v0.2d, v0.2d
186+
; CHECK-NEXT: ret
187+
entry:
188+
%res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 0)
189+
ret <2 x i64> %res
190+
}
191+
declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
192+
193+
; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
194+
195+
define <4 x i8> @abs_v4i8(<4 x i8> %a){
196+
; CHECK-LABEL: abs_v4i8:
197+
; CHECK: // %bb.0: // %entry
198+
; CHECK-NEXT: shl v0.4h, v0.4h, #8
199+
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
200+
; CHECK-NEXT: abs v0.4h, v0.4h
201+
; CHECK-NEXT: ret
202+
entry:
203+
%res = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %a, i1 0)
204+
ret <4 x i8> %res
205+
}
206+
declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1)
207+
208+
define <32 x i8> @abs_v32i8(<32 x i8> %a){
209+
; CHECK-LABEL: abs_v32i8:
210+
; CHECK: // %bb.0: // %entry
211+
; CHECK-NEXT: abs v0.16b, v0.16b
212+
; CHECK-NEXT: abs v1.16b, v1.16b
213+
; CHECK-NEXT: ret
214+
entry:
215+
%res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 0)
216+
ret <32 x i8> %res
217+
}
218+
declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
219+
220+
define <2 x i16> @abs_v2i16(<2 x i16> %a){
221+
; CHECK-LABEL: abs_v2i16:
222+
; CHECK: // %bb.0: // %entry
223+
; CHECK-NEXT: shl v0.2s, v0.2s, #16
224+
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
225+
; CHECK-NEXT: abs v0.2s, v0.2s
226+
; CHECK-NEXT: ret
227+
entry:
228+
%res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %a, i1 0)
229+
ret <2 x i16> %res
230+
}
231+
declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
232+
233+
define <16 x i16> @abs_v16i16(<16 x i16> %a){
234+
; CHECK-LABEL: abs_v16i16:
235+
; CHECK: // %bb.0: // %entry
236+
; CHECK-NEXT: abs v0.8h, v0.8h
237+
; CHECK-NEXT: abs v1.8h, v1.8h
238+
; CHECK-NEXT: ret
239+
entry:
240+
%res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 0)
241+
ret <16 x i16> %res
242+
}
243+
declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
244+
245+
define <1 x i32> @abs_v1i32(<1 x i32> %a){
246+
; CHECK-SD-LABEL: abs_v1i32:
247+
; CHECK-SD: // %bb.0: // %entry
248+
; CHECK-SD-NEXT: abs v0.2s, v0.2s
249+
; CHECK-SD-NEXT: ret
250+
;
251+
; CHECK-GI-LABEL: abs_v1i32:
252+
; CHECK-GI: // %bb.0: // %entry
253+
; CHECK-GI-NEXT: fmov x8, d0
254+
; CHECK-GI-NEXT: asr w9, w8, #31
255+
; CHECK-GI-NEXT: add w8, w8, w9
256+
; CHECK-GI-NEXT: eor w8, w8, w9
257+
; CHECK-GI-NEXT: fmov s0, w8
258+
; CHECK-GI-NEXT: mov v0.s[1], w8
259+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
260+
; CHECK-GI-NEXT: ret
261+
entry:
262+
%res = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %a, i1 0)
263+
ret <1 x i32> %res
264+
}
265+
declare <1 x i32> @llvm.abs.v1i32(<1 x i32>, i1)
266+
267+
define <8 x i32> @abs_v8i32(<8 x i32> %a){
268+
; CHECK-LABEL: abs_v8i32:
269+
; CHECK: // %bb.0: // %entry
270+
; CHECK-NEXT: abs v0.4s, v0.4s
271+
; CHECK-NEXT: abs v1.4s, v1.4s
272+
; CHECK-NEXT: ret
273+
entry:
274+
%res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 0)
275+
ret <8 x i32> %res
276+
}
277+
declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
278+
279+
define <4 x i64> @abs_v4i64(<4 x i64> %a){
280+
; CHECK-LABEL: abs_v4i64:
281+
; CHECK: // %bb.0: // %entry
282+
; CHECK-NEXT: abs v0.2d, v0.2d
283+
; CHECK-NEXT: abs v1.2d, v1.2d
284+
; CHECK-NEXT: ret
285+
entry:
286+
%res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 0)
287+
ret <4 x i64> %res
288+
}
289+
declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
290+
291+
; ===== Vectors with Non-Pow 2 Widths =====
292+
293+
define <3 x i8> @abs_v3i8(<3 x i8> %a){
294+
; CHECK-SD-LABEL: abs_v3i8:
295+
; CHECK-SD: // %bb.0: // %entry
296+
; CHECK-SD-NEXT: fmov s0, w0
297+
; CHECK-SD-NEXT: mov v0.h[1], w1
298+
; CHECK-SD-NEXT: mov v0.h[2], w2
299+
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
300+
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
301+
; CHECK-SD-NEXT: abs v0.4h, v0.4h
302+
; CHECK-SD-NEXT: umov w0, v0.h[0]
303+
; CHECK-SD-NEXT: umov w1, v0.h[1]
304+
; CHECK-SD-NEXT: umov w2, v0.h[2]
305+
; CHECK-SD-NEXT: ret
306+
;
307+
; CHECK-GI-LABEL: abs_v3i8:
308+
; CHECK-GI: // %bb.0: // %entry
309+
; CHECK-GI-NEXT: fmov s0, w0
310+
; CHECK-GI-NEXT: fmov s1, w1
311+
; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
312+
; CHECK-GI-NEXT: fmov s1, w2
313+
; CHECK-GI-NEXT: mov v0.b[2], v1.b[0]
314+
; CHECK-GI-NEXT: mov v0.b[3], v0.b[0]
315+
; CHECK-GI-NEXT: mov v0.b[4], v0.b[0]
316+
; CHECK-GI-NEXT: mov v0.b[5], v0.b[0]
317+
; CHECK-GI-NEXT: mov v0.b[6], v0.b[0]
318+
; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
319+
; CHECK-GI-NEXT: abs v0.8b, v0.8b
320+
; CHECK-GI-NEXT: umov w0, v0.b[0]
321+
; CHECK-GI-NEXT: umov w1, v0.b[1]
322+
; CHECK-GI-NEXT: umov w2, v0.b[2]
323+
; CHECK-GI-NEXT: ret
324+
entry:
325+
%res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %a, i1 0)
326+
ret <3 x i8> %res
327+
}
328+
declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
329+
330+
define <7 x i8> @abs_v7i8(<7 x i8> %a){
331+
; CHECK-LABEL: abs_v7i8:
332+
; CHECK: // %bb.0: // %entry
333+
; CHECK-NEXT: abs v0.8b, v0.8b
334+
; CHECK-NEXT: ret
335+
entry:
336+
%res = call <7 x i8> @llvm.abs.v7i8(<7 x i8> %a, i1 0)
337+
ret <7 x i8> %res
338+
}
339+
declare <7 x i8> @llvm.abs.v7i8(<7 x i8>, i1)
340+
341+
define <3 x i16> @abs_v3i16(<3 x i16> %a){
342+
; CHECK-LABEL: abs_v3i16:
343+
; CHECK: // %bb.0: // %entry
344+
; CHECK-NEXT: abs v0.4h, v0.4h
345+
; CHECK-NEXT: ret
346+
entry:
347+
%res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %a, i1 0)
348+
ret <3 x i16> %res
349+
}
350+
declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
351+
352+
define <7 x i16> @abs_v7i16(<7 x i16> %a){
353+
; CHECK-LABEL: abs_v7i16:
354+
; CHECK: // %bb.0: // %entry
355+
; CHECK-NEXT: abs v0.8h, v0.8h
356+
; CHECK-NEXT: ret
357+
entry:
358+
%res = call <7 x i16> @llvm.abs.v7i16(<7 x i16> %a, i1 0)
359+
ret <7 x i16> %res
360+
}
361+
declare <7 x i16> @llvm.abs.v7i16(<7 x i16>, i1)
362+
363+
define <3 x i32> @abs_v3i32(<3 x i32> %a){
364+
; CHECK-LABEL: abs_v3i32:
365+
; CHECK: // %bb.0: // %entry
366+
; CHECK-NEXT: abs v0.4s, v0.4s
367+
; CHECK-NEXT: ret
368+
entry:
369+
%res = call <3 x i32> @llvm.abs.v3i32(<3 x i32> %a, i1 0)
370+
ret <3 x i32> %res
371+
}
372+
declare <3 x i32> @llvm.abs.v3i32(<3 x i32>, i1)

0 commit comments

Comments
 (0)