@@ -9,10 +9,11 @@ target triple = "aarch64-unknown-linux-gnu"
9
9
define <4 x i32 > @sdiv_v4i32_negative_pow2_divisor_packed (<4 x i32 > %op1 ) vscale_range(1 ,0 ) #0 {
10
10
; CHECK-LABEL: sdiv_v4i32_negative_pow2_divisor_packed:
11
11
; CHECK: // %bb.0:
12
- ; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
13
- ; CHECK-NEXT: usra v0.4s, v1.4s, #29
14
- ; CHECK-NEXT: sshr v0.4s, v0.4s, #3
15
- ; CHECK-NEXT: neg v0.4s, v0.4s
12
+ ; CHECK-NEXT: ptrue p0.s, vl4
13
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
14
+ ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
15
+ ; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
16
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
16
17
; CHECK-NEXT: ret
17
18
%res = sdiv <4 x i32 > %op1 , splat (i32 -8 )
18
19
ret <4 x i32 > %res
@@ -21,10 +22,11 @@ define <4 x i32> @sdiv_v4i32_negative_pow2_divisor_packed(<4 x i32> %op1) vscale
21
22
define <2 x i32 > @sdiv_v2i32_negative_pow2_divisor_unpacked (<2 x i32 > %op1 ) vscale_range(1 ,0 ) #0 {
22
23
; CHECK-LABEL: sdiv_v2i32_negative_pow2_divisor_unpacked:
23
24
; CHECK: // %bb.0:
24
- ; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
25
- ; CHECK-NEXT: usra v0.2s, v1.2s, #29
26
- ; CHECK-NEXT: sshr v0.2s, v0.2s, #3
27
- ; CHECK-NEXT: neg v0.2s, v0.2s
25
+ ; CHECK-NEXT: ptrue p0.s, vl2
26
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
27
+ ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
28
+ ; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
29
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
28
30
; CHECK-NEXT: ret
29
31
%res = sdiv <2 x i32 > %op1 , splat (i32 -8 )
30
32
ret <2 x i32 > %res
@@ -33,9 +35,10 @@ define <2 x i32> @sdiv_v2i32_negative_pow2_divisor_unpacked(<2 x i32> %op1) vsca
33
35
define <4 x i32 > @sdiv_v4i32_positive_pow2_divisor_packed (<4 x i32 > %op1 ) vscale_range(1 ,0 ) #0 {
34
36
; CHECK-LABEL: sdiv_v4i32_positive_pow2_divisor_packed:
35
37
; CHECK: // %bb.0:
36
- ; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
37
- ; CHECK-NEXT: usra v0.4s, v1.4s, #29
38
- ; CHECK-NEXT: sshr v0.4s, v0.4s, #3
38
+ ; CHECK-NEXT: ptrue p0.s, vl4
39
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
40
+ ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
41
+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
39
42
; CHECK-NEXT: ret
40
43
%res = sdiv <4 x i32 > %op1 , splat (i32 8 )
41
44
ret <4 x i32 > %res
@@ -44,9 +47,10 @@ define <4 x i32> @sdiv_v4i32_positive_pow2_divisor_packed(<4 x i32> %op1) vscale
44
47
define <2 x i32 > @sdiv_v2i32_positive_pow2_divisor_unpacked (<2 x i32 > %op1 ) vscale_range(1 ,0 ) #0 {
45
48
; CHECK-LABEL: sdiv_v2i32_positive_pow2_divisor_unpacked:
46
49
; CHECK: // %bb.0:
47
- ; CHECK-NEXT: cmlt v1.2s, v0.2s, #0
48
- ; CHECK-NEXT: usra v0.2s, v1.2s, #29
49
- ; CHECK-NEXT: sshr v0.2s, v0.2s, #3
50
+ ; CHECK-NEXT: ptrue p0.s, vl2
51
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
52
+ ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #3
53
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
50
54
; CHECK-NEXT: ret
51
55
%res = sdiv <2 x i32 > %op1 , splat (i32 8 )
52
56
ret <2 x i32 > %res
@@ -95,19 +99,12 @@ define void @sdiv_v64i8(ptr %a) #0 {
95
99
; VBITS_GE_128-LABEL: sdiv_v64i8:
96
100
; VBITS_GE_128: // %bb.0:
97
101
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
98
- ; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
99
- ; VBITS_GE_128-NEXT: cmlt v2.16b, v0.16b, #0
100
- ; VBITS_GE_128-NEXT: cmlt v5.16b, v1.16b, #0
101
- ; VBITS_GE_128-NEXT: cmlt v6.16b, v3.16b, #0
102
- ; VBITS_GE_128-NEXT: usra v0.16b, v2.16b, #3
103
- ; VBITS_GE_128-NEXT: cmlt v2.16b, v4.16b, #0
104
- ; VBITS_GE_128-NEXT: usra v1.16b, v5.16b, #3
105
- ; VBITS_GE_128-NEXT: usra v3.16b, v6.16b, #3
106
- ; VBITS_GE_128-NEXT: usra v4.16b, v2.16b, #3
107
- ; VBITS_GE_128-NEXT: sshr v0.16b, v0.16b, #5
108
- ; VBITS_GE_128-NEXT: sshr v1.16b, v1.16b, #5
109
- ; VBITS_GE_128-NEXT: sshr v2.16b, v3.16b, #5
110
- ; VBITS_GE_128-NEXT: sshr v3.16b, v4.16b, #5
102
+ ; VBITS_GE_128-NEXT: ptrue p0.b, vl16
103
+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
104
+ ; VBITS_GE_128-NEXT: asrd z0.b, p0/m, z0.b, #5
105
+ ; VBITS_GE_128-NEXT: asrd z1.b, p0/m, z1.b, #5
106
+ ; VBITS_GE_128-NEXT: asrd z2.b, p0/m, z2.b, #5
107
+ ; VBITS_GE_128-NEXT: asrd z3.b, p0/m, z3.b, #5
111
108
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
112
109
; VBITS_GE_128-NEXT: stp q2, q3, [x0]
113
110
; VBITS_GE_128-NEXT: ret
@@ -209,19 +206,12 @@ define void @sdiv_v32i16(ptr %a) #0 {
209
206
; VBITS_GE_128-LABEL: sdiv_v32i16:
210
207
; VBITS_GE_128: // %bb.0:
211
208
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
212
- ; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
213
- ; VBITS_GE_128-NEXT: cmlt v2.8h, v0.8h, #0
214
- ; VBITS_GE_128-NEXT: cmlt v5.8h, v1.8h, #0
215
- ; VBITS_GE_128-NEXT: cmlt v6.8h, v3.8h, #0
216
- ; VBITS_GE_128-NEXT: usra v0.8h, v2.8h, #11
217
- ; VBITS_GE_128-NEXT: cmlt v2.8h, v4.8h, #0
218
- ; VBITS_GE_128-NEXT: usra v1.8h, v5.8h, #11
219
- ; VBITS_GE_128-NEXT: usra v3.8h, v6.8h, #11
220
- ; VBITS_GE_128-NEXT: usra v4.8h, v2.8h, #11
221
- ; VBITS_GE_128-NEXT: sshr v0.8h, v0.8h, #5
222
- ; VBITS_GE_128-NEXT: sshr v1.8h, v1.8h, #5
223
- ; VBITS_GE_128-NEXT: sshr v2.8h, v3.8h, #5
224
- ; VBITS_GE_128-NEXT: sshr v3.8h, v4.8h, #5
209
+ ; VBITS_GE_128-NEXT: ptrue p0.h, vl8
210
+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
211
+ ; VBITS_GE_128-NEXT: asrd z0.h, p0/m, z0.h, #5
212
+ ; VBITS_GE_128-NEXT: asrd z1.h, p0/m, z1.h, #5
213
+ ; VBITS_GE_128-NEXT: asrd z2.h, p0/m, z2.h, #5
214
+ ; VBITS_GE_128-NEXT: asrd z3.h, p0/m, z3.h, #5
225
215
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
226
216
; VBITS_GE_128-NEXT: stp q2, q3, [x0]
227
217
; VBITS_GE_128-NEXT: ret
@@ -324,19 +314,12 @@ define void @sdiv_v16i32(ptr %a) #0 {
324
314
; VBITS_GE_128-LABEL: sdiv_v16i32:
325
315
; VBITS_GE_128: // %bb.0:
326
316
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
327
- ; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
328
- ; VBITS_GE_128-NEXT: cmlt v2.4s, v0.4s, #0
329
- ; VBITS_GE_128-NEXT: cmlt v5.4s, v1.4s, #0
330
- ; VBITS_GE_128-NEXT: cmlt v6.4s, v3.4s, #0
331
- ; VBITS_GE_128-NEXT: usra v0.4s, v2.4s, #27
332
- ; VBITS_GE_128-NEXT: cmlt v2.4s, v4.4s, #0
333
- ; VBITS_GE_128-NEXT: usra v1.4s, v5.4s, #27
334
- ; VBITS_GE_128-NEXT: usra v3.4s, v6.4s, #27
335
- ; VBITS_GE_128-NEXT: usra v4.4s, v2.4s, #27
336
- ; VBITS_GE_128-NEXT: sshr v0.4s, v0.4s, #5
337
- ; VBITS_GE_128-NEXT: sshr v1.4s, v1.4s, #5
338
- ; VBITS_GE_128-NEXT: sshr v2.4s, v3.4s, #5
339
- ; VBITS_GE_128-NEXT: sshr v3.4s, v4.4s, #5
317
+ ; VBITS_GE_128-NEXT: ptrue p0.s, vl4
318
+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
319
+ ; VBITS_GE_128-NEXT: asrd z0.s, p0/m, z0.s, #5
320
+ ; VBITS_GE_128-NEXT: asrd z1.s, p0/m, z1.s, #5
321
+ ; VBITS_GE_128-NEXT: asrd z2.s, p0/m, z2.s, #5
322
+ ; VBITS_GE_128-NEXT: asrd z3.s, p0/m, z3.s, #5
340
323
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
341
324
; VBITS_GE_128-NEXT: stp q2, q3, [x0]
342
325
; VBITS_GE_128-NEXT: ret
@@ -439,19 +422,12 @@ define void @sdiv_v8i64(ptr %a) #0 {
439
422
; VBITS_GE_128-LABEL: sdiv_v8i64:
440
423
; VBITS_GE_128: // %bb.0:
441
424
; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32]
442
- ; VBITS_GE_128-NEXT: ldp q3, q4, [x0]
443
- ; VBITS_GE_128-NEXT: cmlt v2.2d, v0.2d, #0
444
- ; VBITS_GE_128-NEXT: cmlt v5.2d, v1.2d, #0
445
- ; VBITS_GE_128-NEXT: cmlt v6.2d, v3.2d, #0
446
- ; VBITS_GE_128-NEXT: usra v0.2d, v2.2d, #59
447
- ; VBITS_GE_128-NEXT: cmlt v2.2d, v4.2d, #0
448
- ; VBITS_GE_128-NEXT: usra v1.2d, v5.2d, #59
449
- ; VBITS_GE_128-NEXT: usra v3.2d, v6.2d, #59
450
- ; VBITS_GE_128-NEXT: usra v4.2d, v2.2d, #59
451
- ; VBITS_GE_128-NEXT: sshr v0.2d, v0.2d, #5
452
- ; VBITS_GE_128-NEXT: sshr v1.2d, v1.2d, #5
453
- ; VBITS_GE_128-NEXT: sshr v2.2d, v3.2d, #5
454
- ; VBITS_GE_128-NEXT: sshr v3.2d, v4.2d, #5
425
+ ; VBITS_GE_128-NEXT: ptrue p0.d, vl2
426
+ ; VBITS_GE_128-NEXT: ldp q2, q3, [x0]
427
+ ; VBITS_GE_128-NEXT: asrd z0.d, p0/m, z0.d, #5
428
+ ; VBITS_GE_128-NEXT: asrd z1.d, p0/m, z1.d, #5
429
+ ; VBITS_GE_128-NEXT: asrd z2.d, p0/m, z2.d, #5
430
+ ; VBITS_GE_128-NEXT: asrd z3.d, p0/m, z3.d, #5
455
431
; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32]
456
432
; VBITS_GE_128-NEXT: stp q2, q3, [x0]
457
433
; VBITS_GE_128-NEXT: ret
0 commit comments