Skip to content

Commit 6df192c

Browse files
committed
[AArch64] Addition gisel tests for udiv by constant and zext/sext/trunc. NFC
1 parent 4254f27 commit 6df192c

File tree

5 files changed

+1122
-54
lines changed

5 files changed

+1122
-54
lines changed
Lines changed: 204 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,153 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2+
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
; CHECK-GI: warning: Instruction selection used fallback path for udiv_v2i64
36

47
define <16 x i8> @div16xi8(<16 x i8> %x) {
5-
; CHECK-LABEL: div16xi8:
6-
; CHECK: // %bb.0:
7-
; CHECK-NEXT: movi v1.16b, #41
8-
; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
9-
; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
10-
; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
11-
; CHECK-NEXT: sshr v0.16b, v0.16b, #2
12-
; CHECK-NEXT: usra v0.16b, v0.16b, #7
13-
; CHECK-NEXT: ret
8+
; CHECK-SD-LABEL: div16xi8:
9+
; CHECK-SD: // %bb.0:
10+
; CHECK-SD-NEXT: movi v1.16b, #41
11+
; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b
12+
; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b
13+
; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v2.16b
14+
; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2
15+
; CHECK-SD-NEXT: usra v0.16b, v0.16b, #7
16+
; CHECK-SD-NEXT: ret
17+
;
18+
; CHECK-GI-LABEL: div16xi8:
19+
; CHECK-GI: // %bb.0:
20+
; CHECK-GI-NEXT: smov w9, v0.b[0]
21+
; CHECK-GI-NEXT: mov w8, #25 // =0x19
22+
; CHECK-GI-NEXT: smov w10, v0.b[1]
23+
; CHECK-GI-NEXT: smov w11, v0.b[2]
24+
; CHECK-GI-NEXT: smov w12, v0.b[3]
25+
; CHECK-GI-NEXT: smov w13, v0.b[4]
26+
; CHECK-GI-NEXT: smov w14, v0.b[5]
27+
; CHECK-GI-NEXT: smov w15, v0.b[6]
28+
; CHECK-GI-NEXT: smov w16, v0.b[7]
29+
; CHECK-GI-NEXT: smov w17, v0.b[8]
30+
; CHECK-GI-NEXT: smov w18, v0.b[9]
31+
; CHECK-GI-NEXT: sdiv w9, w9, w8
32+
; CHECK-GI-NEXT: sdiv w10, w10, w8
33+
; CHECK-GI-NEXT: fmov s1, w9
34+
; CHECK-GI-NEXT: sdiv w11, w11, w8
35+
; CHECK-GI-NEXT: mov v1.b[1], w10
36+
; CHECK-GI-NEXT: smov w10, v0.b[10]
37+
; CHECK-GI-NEXT: sdiv w12, w12, w8
38+
; CHECK-GI-NEXT: mov v1.b[2], w11
39+
; CHECK-GI-NEXT: smov w11, v0.b[11]
40+
; CHECK-GI-NEXT: sdiv w13, w13, w8
41+
; CHECK-GI-NEXT: mov v1.b[3], w12
42+
; CHECK-GI-NEXT: smov w12, v0.b[12]
43+
; CHECK-GI-NEXT: sdiv w14, w14, w8
44+
; CHECK-GI-NEXT: mov v1.b[4], w13
45+
; CHECK-GI-NEXT: smov w13, v0.b[13]
46+
; CHECK-GI-NEXT: sdiv w15, w15, w8
47+
; CHECK-GI-NEXT: mov v1.b[5], w14
48+
; CHECK-GI-NEXT: sdiv w16, w16, w8
49+
; CHECK-GI-NEXT: mov v1.b[6], w15
50+
; CHECK-GI-NEXT: sdiv w17, w17, w8
51+
; CHECK-GI-NEXT: mov v1.b[7], w16
52+
; CHECK-GI-NEXT: sdiv w9, w18, w8
53+
; CHECK-GI-NEXT: mov v1.b[8], w17
54+
; CHECK-GI-NEXT: sdiv w10, w10, w8
55+
; CHECK-GI-NEXT: mov v1.b[9], w9
56+
; CHECK-GI-NEXT: smov w9, v0.b[14]
57+
; CHECK-GI-NEXT: sdiv w11, w11, w8
58+
; CHECK-GI-NEXT: mov v1.b[10], w10
59+
; CHECK-GI-NEXT: smov w10, v0.b[15]
60+
; CHECK-GI-NEXT: sdiv w12, w12, w8
61+
; CHECK-GI-NEXT: mov v1.b[11], w11
62+
; CHECK-GI-NEXT: sdiv w13, w13, w8
63+
; CHECK-GI-NEXT: mov v1.b[12], w12
64+
; CHECK-GI-NEXT: sdiv w9, w9, w8
65+
; CHECK-GI-NEXT: mov v1.b[13], w13
66+
; CHECK-GI-NEXT: sdiv w8, w10, w8
67+
; CHECK-GI-NEXT: mov v1.b[14], w9
68+
; CHECK-GI-NEXT: mov v1.b[15], w8
69+
; CHECK-GI-NEXT: mov v0.16b, v1.16b
70+
; CHECK-GI-NEXT: ret
1471
%div = sdiv <16 x i8> %x, <i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25, i8 25>
1572
ret <16 x i8> %div
1673
}
1774

1875
define <8 x i16> @div8xi16(<8 x i16> %x) {
19-
; CHECK-LABEL: div8xi16:
20-
; CHECK: // %bb.0:
21-
; CHECK-NEXT: mov w8, #40815 // =0x9f6f
22-
; CHECK-NEXT: dup v1.8h, w8
23-
; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
24-
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
25-
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
26-
; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
27-
; CHECK-NEXT: sshr v0.8h, v0.8h, #12
28-
; CHECK-NEXT: usra v0.8h, v0.8h, #15
29-
; CHECK-NEXT: ret
76+
; CHECK-SD-LABEL: div8xi16:
77+
; CHECK-SD: // %bb.0:
78+
; CHECK-SD-NEXT: mov w8, #40815 // =0x9f6f
79+
; CHECK-SD-NEXT: dup v1.8h, w8
80+
; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h
81+
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
82+
; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
83+
; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
84+
; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #12
85+
; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15
86+
; CHECK-SD-NEXT: ret
87+
;
88+
; CHECK-GI-LABEL: div8xi16:
89+
; CHECK-GI: // %bb.0:
90+
; CHECK-GI-NEXT: smov w9, v0.h[0]
91+
; CHECK-GI-NEXT: mov w8, #6577 // =0x19b1
92+
; CHECK-GI-NEXT: smov w10, v0.h[1]
93+
; CHECK-GI-NEXT: smov w11, v0.h[2]
94+
; CHECK-GI-NEXT: smov w12, v0.h[3]
95+
; CHECK-GI-NEXT: smov w13, v0.h[4]
96+
; CHECK-GI-NEXT: smov w14, v0.h[5]
97+
; CHECK-GI-NEXT: sdiv w9, w9, w8
98+
; CHECK-GI-NEXT: sdiv w10, w10, w8
99+
; CHECK-GI-NEXT: fmov s1, w9
100+
; CHECK-GI-NEXT: sdiv w11, w11, w8
101+
; CHECK-GI-NEXT: mov v1.h[1], w10
102+
; CHECK-GI-NEXT: smov w10, v0.h[6]
103+
; CHECK-GI-NEXT: sdiv w12, w12, w8
104+
; CHECK-GI-NEXT: mov v1.h[2], w11
105+
; CHECK-GI-NEXT: smov w11, v0.h[7]
106+
; CHECK-GI-NEXT: sdiv w13, w13, w8
107+
; CHECK-GI-NEXT: mov v1.h[3], w12
108+
; CHECK-GI-NEXT: sdiv w9, w14, w8
109+
; CHECK-GI-NEXT: mov v1.h[4], w13
110+
; CHECK-GI-NEXT: sdiv w10, w10, w8
111+
; CHECK-GI-NEXT: mov v1.h[5], w9
112+
; CHECK-GI-NEXT: sdiv w8, w11, w8
113+
; CHECK-GI-NEXT: mov v1.h[6], w10
114+
; CHECK-GI-NEXT: mov v1.h[7], w8
115+
; CHECK-GI-NEXT: mov v0.16b, v1.16b
116+
; CHECK-GI-NEXT: ret
30117
%div = sdiv <8 x i16> %x, <i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577, i16 6577>
31118
ret <8 x i16> %div
32119
}
33120

34121
define <4 x i32> @div32xi4(<4 x i32> %x) {
35-
; CHECK-LABEL: div32xi4:
36-
; CHECK: // %bb.0:
37-
; CHECK-NEXT: mov w8, #7527 // =0x1d67
38-
; CHECK-NEXT: movk w8, #28805, lsl #16
39-
; CHECK-NEXT: dup v1.4s, w8
40-
; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
41-
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
42-
; CHECK-NEXT: uzp2 v1.4s, v0.4s, v2.4s
43-
; CHECK-NEXT: sshr v0.4s, v1.4s, #22
44-
; CHECK-NEXT: usra v0.4s, v1.4s, #31
45-
; CHECK-NEXT: ret
122+
; CHECK-SD-LABEL: div32xi4:
123+
; CHECK-SD: // %bb.0:
124+
; CHECK-SD-NEXT: mov w8, #7527 // =0x1d67
125+
; CHECK-SD-NEXT: movk w8, #28805, lsl #16
126+
; CHECK-SD-NEXT: dup v1.4s, w8
127+
; CHECK-SD-NEXT: smull2 v2.2d, v0.4s, v1.4s
128+
; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s
129+
; CHECK-SD-NEXT: uzp2 v1.4s, v0.4s, v2.4s
130+
; CHECK-SD-NEXT: sshr v0.4s, v1.4s, #22
131+
; CHECK-SD-NEXT: usra v0.4s, v1.4s, #31
132+
; CHECK-SD-NEXT: ret
133+
;
134+
; CHECK-GI-LABEL: div32xi4:
135+
; CHECK-GI: // %bb.0:
136+
; CHECK-GI-NEXT: fmov w9, s0
137+
; CHECK-GI-NEXT: mov w8, #39957 // =0x9c15
138+
; CHECK-GI-NEXT: mov w10, v0.s[1]
139+
; CHECK-GI-NEXT: movk w8, #145, lsl #16
140+
; CHECK-GI-NEXT: mov w11, v0.s[2]
141+
; CHECK-GI-NEXT: mov w12, v0.s[3]
142+
; CHECK-GI-NEXT: sdiv w9, w9, w8
143+
; CHECK-GI-NEXT: sdiv w10, w10, w8
144+
; CHECK-GI-NEXT: mov v0.s[0], w9
145+
; CHECK-GI-NEXT: sdiv w11, w11, w8
146+
; CHECK-GI-NEXT: mov v0.s[1], w10
147+
; CHECK-GI-NEXT: sdiv w8, w12, w8
148+
; CHECK-GI-NEXT: mov v0.s[2], w11
149+
; CHECK-GI-NEXT: mov v0.s[3], w8
150+
; CHECK-GI-NEXT: ret
46151
%div = sdiv <4 x i32> %x, <i32 9542677, i32 9542677, i32 9542677, i32 9542677>
47152
ret <4 x i32> %div
48153
}
@@ -61,32 +166,78 @@ define <16 x i8> @udiv16xi8(<16 x i8> %x) {
61166
}
62167

63168
define <8 x i16> @udiv8xi16(<8 x i16> %x) {
64-
; CHECK-LABEL: udiv8xi16:
65-
; CHECK: // %bb.0:
66-
; CHECK-NEXT: mov w8, #16593 // =0x40d1
67-
; CHECK-NEXT: dup v1.8h, w8
68-
; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
69-
; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
70-
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
71-
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
72-
; CHECK-NEXT: usra v1.8h, v0.8h, #1
73-
; CHECK-NEXT: ushr v0.8h, v1.8h, #12
74-
; CHECK-NEXT: ret
169+
; CHECK-SD-LABEL: udiv8xi16:
170+
; CHECK-SD: // %bb.0:
171+
; CHECK-SD-NEXT: mov w8, #16593 // =0x40d1
172+
; CHECK-SD-NEXT: dup v1.8h, w8
173+
; CHECK-SD-NEXT: umull2 v2.4s, v0.8h, v1.8h
174+
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
175+
; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
176+
; CHECK-SD-NEXT: sub v0.8h, v0.8h, v1.8h
177+
; CHECK-SD-NEXT: usra v1.8h, v0.8h, #1
178+
; CHECK-SD-NEXT: ushr v0.8h, v1.8h, #12
179+
; CHECK-SD-NEXT: ret
180+
;
181+
; CHECK-GI-LABEL: udiv8xi16:
182+
; CHECK-GI: // %bb.0:
183+
; CHECK-GI-NEXT: adrp x8, .LCPI4_0
184+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
185+
; CHECK-GI-NEXT: umull2 v2.4s, v0.8h, v1.8h
186+
; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
187+
; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h
188+
; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
189+
; CHECK-GI-NEXT: usra v1.8h, v0.8h, #1
190+
; CHECK-GI-NEXT: ushr v0.8h, v1.8h, #12
191+
; CHECK-GI-NEXT: ret
75192
%div = udiv <8 x i16> %x, <i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537, i16 6537>
76193
ret <8 x i16> %div
77194
}
78195

79196
define <4 x i32> @udiv32xi4(<4 x i32> %x) {
80-
; CHECK-LABEL: udiv32xi4:
81-
; CHECK: // %bb.0:
82-
; CHECK-NEXT: mov w8, #16747 // =0x416b
83-
; CHECK-NEXT: movk w8, #31439, lsl #16
84-
; CHECK-NEXT: dup v1.4s, w8
85-
; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
86-
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
87-
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
88-
; CHECK-NEXT: ushr v0.4s, v0.4s, #22
89-
; CHECK-NEXT: ret
197+
; CHECK-SD-LABEL: udiv32xi4:
198+
; CHECK-SD: // %bb.0:
199+
; CHECK-SD-NEXT: mov w8, #16747 // =0x416b
200+
; CHECK-SD-NEXT: movk w8, #31439, lsl #16
201+
; CHECK-SD-NEXT: dup v1.4s, w8
202+
; CHECK-SD-NEXT: umull2 v2.2d, v0.4s, v1.4s
203+
; CHECK-SD-NEXT: umull v0.2d, v0.2s, v1.2s
204+
; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v2.4s
205+
; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #22
206+
; CHECK-SD-NEXT: ret
207+
;
208+
; CHECK-GI-LABEL: udiv32xi4:
209+
; CHECK-GI: // %bb.0:
210+
; CHECK-GI-NEXT: adrp x8, .LCPI5_0
211+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
212+
; CHECK-GI-NEXT: umull2 v2.2d, v0.4s, v1.4s
213+
; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
214+
; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v2.4s
215+
; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #22
216+
; CHECK-GI-NEXT: ret
90217
%div = udiv <4 x i32> %x, <i32 8743143, i32 8743143, i32 8743143, i32 8743143>
91218
ret <4 x i32> %div
92219
}
220+
221+
define <2 x i64> @udiv_v2i64(<2 x i64> %a) {
222+
; CHECK-LABEL: udiv_v2i64:
223+
; CHECK: // %bb.0:
224+
; CHECK-NEXT: mov x8, #9363 // =0x2493
225+
; CHECK-NEXT: fmov x10, d0
226+
; CHECK-NEXT: mov x9, v0.d[1]
227+
; CHECK-NEXT: movk x8, #37449, lsl #16
228+
; CHECK-NEXT: movk x8, #18724, lsl #32
229+
; CHECK-NEXT: movk x8, #9362, lsl #48
230+
; CHECK-NEXT: umulh x11, x10, x8
231+
; CHECK-NEXT: umulh x8, x9, x8
232+
; CHECK-NEXT: sub x10, x10, x11
233+
; CHECK-NEXT: add x10, x11, x10, lsr #1
234+
; CHECK-NEXT: sub x9, x9, x8
235+
; CHECK-NEXT: add x8, x8, x9, lsr #1
236+
; CHECK-NEXT: lsr x9, x10, #2
237+
; CHECK-NEXT: fmov d0, x9
238+
; CHECK-NEXT: lsr x8, x8, #2
239+
; CHECK-NEXT: mov v0.d[1], x8
240+
; CHECK-NEXT: ret
241+
%r = udiv <2 x i64> %a, splat (i64 7)
242+
ret <2 x i64> %r
243+
}

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s
33
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

5+
; CHECK-GI: warning: Instruction selection used fallback path for uabd_i64
6+
57
define <8 x i16> @sabdl8h(ptr %A, ptr %B) nounwind {
68
; CHECK-LABEL: sabdl8h:
79
; CHECK: // %bb.0:

llvm/test/CodeGen/AArch64/sext.ll

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
3+
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
; CHECK-GI: warning: Instruction selection used fallback path for sext_v2i64_v2i128
46

57
define i16 @sext_i8_to_i16(i8 %a) {
68
; CHECK-LABEL: sext_i8_to_i16:
@@ -1241,3 +1243,18 @@ entry:
12411243
%c = sext <16 x i10> %a to <16 x i64>
12421244
ret <16 x i64> %c
12431245
}
1246+
1247+
define <2 x i128> @sext_v2i64_v2i128(<2 x i64> %a) {
1248+
; CHECK-LABEL: sext_v2i64_v2i128:
1249+
; CHECK: // %bb.0: // %entry
1250+
; CHECK-NEXT: mov x8, v0.d[1]
1251+
; CHECK-NEXT: dup v1.2d, v0.d[1]
1252+
; CHECK-NEXT: fmov x0, d0
1253+
; CHECK-NEXT: fmov x2, d1
1254+
; CHECK-NEXT: asr x1, x0, #63
1255+
; CHECK-NEXT: asr x3, x8, #63
1256+
; CHECK-NEXT: ret
1257+
entry:
1258+
%c = sext <2 x i64> %a to <2 x i128>
1259+
ret <2 x i128> %c
1260+
}

0 commit comments

Comments
 (0)