1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2
- ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2
+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3
+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
+
5
+ ; CHECK-GI: warning: Instruction selection used fallback path for udiv_v2i64
3
6
4
7
define <16 x i8 > @div16xi8 (<16 x i8 > %x ) {
5
- ; CHECK-LABEL: div16xi8:
6
- ; CHECK: // %bb.0:
7
- ; CHECK-NEXT: movi v1.16b, #41
8
- ; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
9
- ; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
10
- ; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
11
- ; CHECK-NEXT: sshr v0.16b, v0.16b, #2
12
- ; CHECK-NEXT: usra v0.16b, v0.16b, #7
13
- ; CHECK-NEXT: ret
8
+ ; CHECK-SD-LABEL: div16xi8:
9
+ ; CHECK-SD: // %bb.0:
10
+ ; CHECK-SD-NEXT: movi v1.16b, #41
11
+ ; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b
12
+ ; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b
13
+ ; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v2.16b
14
+ ; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2
15
+ ; CHECK-SD-NEXT: usra v0.16b, v0.16b, #7
16
+ ; CHECK-SD-NEXT: ret
17
+ ;
18
+ ; CHECK-GI-LABEL: div16xi8:
19
+ ; CHECK-GI: // %bb.0:
20
+ ; CHECK-GI-NEXT: smov w9, v0.b[0]
21
+ ; CHECK-GI-NEXT: mov w8, #25 // =0x19
22
+ ; CHECK-GI-NEXT: smov w10, v0.b[1]
23
+ ; CHECK-GI-NEXT: smov w11, v0.b[2]
24
+ ; CHECK-GI-NEXT: smov w12, v0.b[3]
25
+ ; CHECK-GI-NEXT: smov w13, v0.b[4]
26
+ ; CHECK-GI-NEXT: smov w14, v0.b[5]
27
+ ; CHECK-GI-NEXT: smov w15, v0.b[6]
28
+ ; CHECK-GI-NEXT: smov w16, v0.b[7]
29
+ ; CHECK-GI-NEXT: smov w17, v0.b[8]
30
+ ; CHECK-GI-NEXT: smov w18, v0.b[9]
31
+ ; CHECK-GI-NEXT: sdiv w9, w9, w8
32
+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
33
+ ; CHECK-GI-NEXT: fmov s1, w9
34
+ ; CHECK-GI-NEXT: sdiv w11, w11, w8
35
+ ; CHECK-GI-NEXT: mov v1.b[1], w10
36
+ ; CHECK-GI-NEXT: smov w10, v0.b[10]
37
+ ; CHECK-GI-NEXT: sdiv w12, w12, w8
38
+ ; CHECK-GI-NEXT: mov v1.b[2], w11
39
+ ; CHECK-GI-NEXT: smov w11, v0.b[11]
40
+ ; CHECK-GI-NEXT: sdiv w13, w13, w8
41
+ ; CHECK-GI-NEXT: mov v1.b[3], w12
42
+ ; CHECK-GI-NEXT: smov w12, v0.b[12]
43
+ ; CHECK-GI-NEXT: sdiv w14, w14, w8
44
+ ; CHECK-GI-NEXT: mov v1.b[4], w13
45
+ ; CHECK-GI-NEXT: smov w13, v0.b[13]
46
+ ; CHECK-GI-NEXT: sdiv w15, w15, w8
47
+ ; CHECK-GI-NEXT: mov v1.b[5], w14
48
+ ; CHECK-GI-NEXT: sdiv w16, w16, w8
49
+ ; CHECK-GI-NEXT: mov v1.b[6], w15
50
+ ; CHECK-GI-NEXT: sdiv w17, w17, w8
51
+ ; CHECK-GI-NEXT: mov v1.b[7], w16
52
+ ; CHECK-GI-NEXT: sdiv w9, w18, w8
53
+ ; CHECK-GI-NEXT: mov v1.b[8], w17
54
+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
55
+ ; CHECK-GI-NEXT: mov v1.b[9], w9
56
+ ; CHECK-GI-NEXT: smov w9, v0.b[14]
57
+ ; CHECK-GI-NEXT: sdiv w11, w11, w8
58
+ ; CHECK-GI-NEXT: mov v1.b[10], w10
59
+ ; CHECK-GI-NEXT: smov w10, v0.b[15]
60
+ ; CHECK-GI-NEXT: sdiv w12, w12, w8
61
+ ; CHECK-GI-NEXT: mov v1.b[11], w11
62
+ ; CHECK-GI-NEXT: sdiv w13, w13, w8
63
+ ; CHECK-GI-NEXT: mov v1.b[12], w12
64
+ ; CHECK-GI-NEXT: sdiv w9, w9, w8
65
+ ; CHECK-GI-NEXT: mov v1.b[13], w13
66
+ ; CHECK-GI-NEXT: sdiv w8, w10, w8
67
+ ; CHECK-GI-NEXT: mov v1.b[14], w9
68
+ ; CHECK-GI-NEXT: mov v1.b[15], w8
69
+ ; CHECK-GI-NEXT: mov v0.16b, v1.16b
70
+ ; CHECK-GI-NEXT: ret
14
71
%div = sdiv <16 x i8 > %x , <i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 >
15
72
ret <16 x i8 > %div
16
73
}
17
74
18
75
define <8 x i16 > @div8xi16 (<8 x i16 > %x ) {
19
- ; CHECK-LABEL: div8xi16:
20
- ; CHECK: // %bb.0:
21
- ; CHECK-NEXT: mov w8, #40815 // =0x9f6f
22
- ; CHECK-NEXT: dup v1.8h, w8
23
- ; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
24
- ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
25
- ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
26
- ; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
27
- ; CHECK-NEXT: sshr v0.8h, v0.8h, #12
28
- ; CHECK-NEXT: usra v0.8h, v0.8h, #15
29
- ; CHECK-NEXT: ret
76
+ ; CHECK-SD-LABEL: div8xi16:
77
+ ; CHECK-SD: // %bb.0:
78
+ ; CHECK-SD-NEXT: mov w8, #40815 // =0x9f6f
79
+ ; CHECK-SD-NEXT: dup v1.8h, w8
80
+ ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h
81
+ ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
82
+ ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
83
+ ; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
84
+ ; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #12
85
+ ; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15
86
+ ; CHECK-SD-NEXT: ret
87
+ ;
88
+ ; CHECK-GI-LABEL: div8xi16:
89
+ ; CHECK-GI: // %bb.0:
90
+ ; CHECK-GI-NEXT: smov w9, v0.h[0]
91
+ ; CHECK-GI-NEXT: mov w8, #6577 // =0x19b1
92
+ ; CHECK-GI-NEXT: smov w10, v0.h[1]
93
+ ; CHECK-GI-NEXT: smov w11, v0.h[2]
94
+ ; CHECK-GI-NEXT: smov w12, v0.h[3]
95
+ ; CHECK-GI-NEXT: smov w13, v0.h[4]
96
+ ; CHECK-GI-NEXT: smov w14, v0.h[5]
97
+ ; CHECK-GI-NEXT: sdiv w9, w9, w8
98
+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
99
+ ; CHECK-GI-NEXT: fmov s1, w9
100
+ ; CHECK-GI-NEXT: sdiv w11, w11, w8
101
+ ; CHECK-GI-NEXT: mov v1.h[1], w10
102
+ ; CHECK-GI-NEXT: smov w10, v0.h[6]
103
+ ; CHECK-GI-NEXT: sdiv w12, w12, w8
104
+ ; CHECK-GI-NEXT: mov v1.h[2], w11
105
+ ; CHECK-GI-NEXT: smov w11, v0.h[7]
106
+ ; CHECK-GI-NEXT: sdiv w13, w13, w8
107
+ ; CHECK-GI-NEXT: mov v1.h[3], w12
108
+ ; CHECK-GI-NEXT: sdiv w9, w14, w8
109
+ ; CHECK-GI-NEXT: mov v1.h[4], w13
110
+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
111
+ ; CHECK-GI-NEXT: mov v1.h[5], w9
112
+ ; CHECK-GI-NEXT: sdiv w8, w11, w8
113
+ ; CHECK-GI-NEXT: mov v1.h[6], w10
114
+ ; CHECK-GI-NEXT: mov v1.h[7], w8
115
+ ; CHECK-GI-NEXT: mov v0.16b, v1.16b
116
+ ; CHECK-GI-NEXT: ret
30
117
%div = sdiv <8 x i16 > %x , <i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 >
31
118
ret <8 x i16 > %div
32
119
}
33
120
34
121
define <4 x i32 > @div32xi4 (<4 x i32 > %x ) {
35
- ; CHECK-LABEL: div32xi4:
36
- ; CHECK: // %bb.0:
37
- ; CHECK-NEXT: mov w8, #7527 // =0x1d67
38
- ; CHECK-NEXT: movk w8, #28805, lsl #16
39
- ; CHECK-NEXT: dup v1.4s, w8
40
- ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
41
- ; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
42
- ; CHECK-NEXT: uzp2 v1.4s, v0.4s, v2.4s
43
- ; CHECK-NEXT: sshr v0.4s, v1.4s, #22
44
- ; CHECK-NEXT: usra v0.4s, v1.4s, #31
45
- ; CHECK-NEXT: ret
122
+ ; CHECK-SD-LABEL: div32xi4:
123
+ ; CHECK-SD: // %bb.0:
124
+ ; CHECK-SD-NEXT: mov w8, #7527 // =0x1d67
125
+ ; CHECK-SD-NEXT: movk w8, #28805, lsl #16
126
+ ; CHECK-SD-NEXT: dup v1.4s, w8
127
+ ; CHECK-SD-NEXT: smull2 v2.2d, v0.4s, v1.4s
128
+ ; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s
129
+ ; CHECK-SD-NEXT: uzp2 v1.4s, v0.4s, v2.4s
130
+ ; CHECK-SD-NEXT: sshr v0.4s, v1.4s, #22
131
+ ; CHECK-SD-NEXT: usra v0.4s, v1.4s, #31
132
+ ; CHECK-SD-NEXT: ret
133
+ ;
134
+ ; CHECK-GI-LABEL: div32xi4:
135
+ ; CHECK-GI: // %bb.0:
136
+ ; CHECK-GI-NEXT: fmov w9, s0
137
+ ; CHECK-GI-NEXT: mov w8, #39957 // =0x9c15
138
+ ; CHECK-GI-NEXT: mov w10, v0.s[1]
139
+ ; CHECK-GI-NEXT: movk w8, #145, lsl #16
140
+ ; CHECK-GI-NEXT: mov w11, v0.s[2]
141
+ ; CHECK-GI-NEXT: mov w12, v0.s[3]
142
+ ; CHECK-GI-NEXT: sdiv w9, w9, w8
143
+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
144
+ ; CHECK-GI-NEXT: mov v0.s[0], w9
145
+ ; CHECK-GI-NEXT: sdiv w11, w11, w8
146
+ ; CHECK-GI-NEXT: mov v0.s[1], w10
147
+ ; CHECK-GI-NEXT: sdiv w8, w12, w8
148
+ ; CHECK-GI-NEXT: mov v0.s[2], w11
149
+ ; CHECK-GI-NEXT: mov v0.s[3], w8
150
+ ; CHECK-GI-NEXT: ret
46
151
%div = sdiv <4 x i32 > %x , <i32 9542677 , i32 9542677 , i32 9542677 , i32 9542677 >
47
152
ret <4 x i32 > %div
48
153
}
@@ -61,32 +166,78 @@ define <16 x i8> @udiv16xi8(<16 x i8> %x) {
61
166
}
62
167
63
168
define <8 x i16 > @udiv8xi16 (<8 x i16 > %x ) {
64
- ; CHECK-LABEL: udiv8xi16:
65
- ; CHECK: // %bb.0:
66
- ; CHECK-NEXT: mov w8, #16593 // =0x40d1
67
- ; CHECK-NEXT: dup v1.8h, w8
68
- ; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
69
- ; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
70
- ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
71
- ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
72
- ; CHECK-NEXT: usra v1.8h, v0.8h, #1
73
- ; CHECK-NEXT: ushr v0.8h, v1.8h, #12
74
- ; CHECK-NEXT: ret
169
+ ; CHECK-SD-LABEL: udiv8xi16:
170
+ ; CHECK-SD: // %bb.0:
171
+ ; CHECK-SD-NEXT: mov w8, #16593 // =0x40d1
172
+ ; CHECK-SD-NEXT: dup v1.8h, w8
173
+ ; CHECK-SD-NEXT: umull2 v2.4s, v0.8h, v1.8h
174
+ ; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
175
+ ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
176
+ ; CHECK-SD-NEXT: sub v0.8h, v0.8h, v1.8h
177
+ ; CHECK-SD-NEXT: usra v1.8h, v0.8h, #1
178
+ ; CHECK-SD-NEXT: ushr v0.8h, v1.8h, #12
179
+ ; CHECK-SD-NEXT: ret
180
+ ;
181
+ ; CHECK-GI-LABEL: udiv8xi16:
182
+ ; CHECK-GI: // %bb.0:
183
+ ; CHECK-GI-NEXT: adrp x8, .LCPI4_0
184
+ ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
185
+ ; CHECK-GI-NEXT: umull2 v2.4s, v0.8h, v1.8h
186
+ ; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
187
+ ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h
188
+ ; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
189
+ ; CHECK-GI-NEXT: usra v1.8h, v0.8h, #1
190
+ ; CHECK-GI-NEXT: ushr v0.8h, v1.8h, #12
191
+ ; CHECK-GI-NEXT: ret
75
192
%div = udiv <8 x i16 > %x , <i16 6537 , i16 6537 , i16 6537 , i16 6537 , i16 6537 , i16 6537 , i16 6537 , i16 6537 >
76
193
ret <8 x i16 > %div
77
194
}
78
195
79
196
define <4 x i32 > @udiv32xi4 (<4 x i32 > %x ) {
80
- ; CHECK-LABEL: udiv32xi4:
81
- ; CHECK: // %bb.0:
82
- ; CHECK-NEXT: mov w8, #16747 // =0x416b
83
- ; CHECK-NEXT: movk w8, #31439, lsl #16
84
- ; CHECK-NEXT: dup v1.4s, w8
85
- ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
86
- ; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
87
- ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
88
- ; CHECK-NEXT: ushr v0.4s, v0.4s, #22
89
- ; CHECK-NEXT: ret
197
+ ; CHECK-SD-LABEL: udiv32xi4:
198
+ ; CHECK-SD: // %bb.0:
199
+ ; CHECK-SD-NEXT: mov w8, #16747 // =0x416b
200
+ ; CHECK-SD-NEXT: movk w8, #31439, lsl #16
201
+ ; CHECK-SD-NEXT: dup v1.4s, w8
202
+ ; CHECK-SD-NEXT: umull2 v2.2d, v0.4s, v1.4s
203
+ ; CHECK-SD-NEXT: umull v0.2d, v0.2s, v1.2s
204
+ ; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v2.4s
205
+ ; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #22
206
+ ; CHECK-SD-NEXT: ret
207
+ ;
208
+ ; CHECK-GI-LABEL: udiv32xi4:
209
+ ; CHECK-GI: // %bb.0:
210
+ ; CHECK-GI-NEXT: adrp x8, .LCPI5_0
211
+ ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
212
+ ; CHECK-GI-NEXT: umull2 v2.2d, v0.4s, v1.4s
213
+ ; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
214
+ ; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v2.4s
215
+ ; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #22
216
+ ; CHECK-GI-NEXT: ret
90
217
%div = udiv <4 x i32 > %x , <i32 8743143 , i32 8743143 , i32 8743143 , i32 8743143 >
91
218
ret <4 x i32 > %div
92
219
}
220
+
221
+ define <2 x i64 > @udiv_v2i64 (<2 x i64 > %a ) {
222
+ ; CHECK-LABEL: udiv_v2i64:
223
+ ; CHECK: // %bb.0:
224
+ ; CHECK-NEXT: mov x8, #9363 // =0x2493
225
+ ; CHECK-NEXT: fmov x10, d0
226
+ ; CHECK-NEXT: mov x9, v0.d[1]
227
+ ; CHECK-NEXT: movk x8, #37449, lsl #16
228
+ ; CHECK-NEXT: movk x8, #18724, lsl #32
229
+ ; CHECK-NEXT: movk x8, #9362, lsl #48
230
+ ; CHECK-NEXT: umulh x11, x10, x8
231
+ ; CHECK-NEXT: umulh x8, x9, x8
232
+ ; CHECK-NEXT: sub x10, x10, x11
233
+ ; CHECK-NEXT: add x10, x11, x10, lsr #1
234
+ ; CHECK-NEXT: sub x9, x9, x8
235
+ ; CHECK-NEXT: add x8, x8, x9, lsr #1
236
+ ; CHECK-NEXT: lsr x9, x10, #2
237
+ ; CHECK-NEXT: fmov d0, x9
238
+ ; CHECK-NEXT: lsr x8, x8, #2
239
+ ; CHECK-NEXT: mov v0.d[1], x8
240
+ ; CHECK-NEXT: ret
241
+ %r = udiv <2 x i64 > %a , splat (i64 7 )
242
+ ret <2 x i64 > %r
243
+ }
0 commit comments