@@ -90,10 +90,11 @@ define <4 x i64> @v2i64_i16_04812(<16 x i16> %a) {
90
90
define <4 x i64 > @v2i64_i16_15913 (<16 x i16 > %a ) {
91
91
; CHECK-LABEL: v2i64_i16_15913:
92
92
; CHECK: // %bb.0:
93
- ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
94
- ; CHECK-NEXT: ushr v0.4s, v0.4s, #16
95
- ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0
96
- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
93
+ ; CHECK-NEXT: movi v2.2d, #0x0000000000ffff
94
+ ; CHECK-NEXT: ushr v0.2d, v0.2d, #16
95
+ ; CHECK-NEXT: ushr v1.2d, v1.2d, #16
96
+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
97
+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
97
98
; CHECK-NEXT: ret
98
99
%s1 = shufflevector <16 x i16 > %a , <16 x i16 > undef , <4 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 >
99
100
%z1 = zext <4 x i16 > %s1 to <4 x i64 >
@@ -117,10 +118,8 @@ define <4 x i64> @v2i64_i16_261014(<16 x i16> %a) {
117
118
define <4 x i64 > @v2i64_i16_371115 (<16 x i16 > %a ) {
118
119
; CHECK-LABEL: v2i64_i16_371115:
119
120
; CHECK: // %bb.0:
120
- ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
121
- ; CHECK-NEXT: ushr v0.4s, v0.4s, #16
122
- ; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0
123
- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
121
+ ; CHECK-NEXT: ushr v0.2d, v0.2d, #48
122
+ ; CHECK-NEXT: ushr v1.2d, v1.2d, #48
124
123
; CHECK-NEXT: ret
125
124
%s1 = shufflevector <16 x i16 > %a , <16 x i16 > undef , <4 x i32 > <i32 3 , i32 7 , i32 11 , i32 15 >
126
125
%z1 = zext <4 x i16 > %s1 to <4 x i64 >
@@ -142,8 +141,7 @@ define <4 x i32> @v4i32_0246(<8 x i16> %a, <8 x i16> %b) {
142
141
define <4 x i32 > @v4i32_1357 (<8 x i16 > %a , <8 x i16 > %b ) {
143
142
; CHECK-LABEL: v4i32_1357:
144
143
; CHECK: // %bb.0:
145
- ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v0.8h
146
- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
144
+ ; CHECK-NEXT: ushr v0.4s, v0.4s, #16
147
145
; CHECK-NEXT: ret
148
146
%c = shufflevector <8 x i16 > %a , <8 x i16 > %b , <4 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 >
149
147
%d = zext <4 x i16 > %c to <4 x i32 >
@@ -210,8 +208,7 @@ define <8 x i16> @v8i16_0246(<16 x i8> %a, <16 x i8> %b) {
210
208
define <8 x i16 > @v8i16_1357 (<16 x i8 > %a , <16 x i8 > %b ) {
211
209
; CHECK-LABEL: v8i16_1357:
212
210
; CHECK: // %bb.0:
213
- ; CHECK-NEXT: uzp2 v0.16b, v0.16b, v0.16b
214
- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
211
+ ; CHECK-NEXT: ushr v0.8h, v0.8h, #8
215
212
; CHECK-NEXT: ret
216
213
%c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
217
214
%d = zext <8 x i8 > %c to <8 x i16 >
@@ -278,8 +275,7 @@ define <8 x i32> @v8i32_0246(<16 x i8> %a, <16 x i8> %b) {
278
275
define <8 x i32 > @v8i32_1357 (<16 x i8 > %a , <16 x i8 > %b ) {
279
276
; CHECK-LABEL: v8i32_1357:
280
277
; CHECK: // %bb.0:
281
- ; CHECK-NEXT: uzp2 v0.16b, v0.16b, v0.16b
282
- ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
278
+ ; CHECK-NEXT: ushr v0.8h, v0.8h, #8
283
279
; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
284
280
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
285
281
; CHECK-NEXT: ret
@@ -291,10 +287,9 @@ define <8 x i32> @v8i32_1357(<16 x i8> %a, <16 x i8> %b) {
291
287
define <8 x i32 > @v8i32_04812 (<16 x i8 > %a , <16 x i8 > %b ) {
292
288
; CHECK-LABEL: v8i32_04812:
293
289
; CHECK: // %bb.0:
294
- ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
295
- ; CHECK-NEXT: bic v0.8h, #255, lsl #8
296
- ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
297
- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
290
+ ; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
291
+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
292
+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
298
293
; CHECK-NEXT: ret
299
294
%c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 , i32 16 , i32 20 , i32 24 , i32 28 >
300
295
%d = zext <8 x i8 > %c to <8 x i32 >
@@ -304,10 +299,11 @@ define <8 x i32> @v8i32_04812(<16 x i8> %a, <16 x i8> %b) {
304
299
define <8 x i32 > @v8i32_15913 (<16 x i8 > %a , <16 x i8 > %b ) {
305
300
; CHECK-LABEL: v8i32_15913:
306
301
; CHECK: // %bb.0:
307
- ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
308
- ; CHECK-NEXT: ushr v0.8h, v0.8h, #8
309
- ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
310
- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
302
+ ; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
303
+ ; CHECK-NEXT: ushr v0.4s, v0.4s, #8
304
+ ; CHECK-NEXT: ushr v1.4s, v1.4s, #8
305
+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
306
+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
311
307
; CHECK-NEXT: ret
312
308
%c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 , i32 17 , i32 21 , i32 25 , i32 29 >
313
309
%d = zext <8 x i8 > %c to <8 x i32 >
@@ -317,10 +313,10 @@ define <8 x i32> @v8i32_15913(<16 x i8> %a, <16 x i8> %b) {
317
313
define <8 x i32 > @v8i32_261014 (<16 x i8 > %a , <16 x i8 > %b ) {
318
314
; CHECK-LABEL: v8i32_261014:
319
315
; CHECK: // %bb.0:
320
- ; CHECK-NEXT: uzp2 v0.8h , v0.8h, v1.8h
321
- ; CHECK-NEXT: bic v0.8h, #255, lsl #8
322
- ; CHECK-NEXT: ushll2 v1 .4s, v0.8h, #0
323
- ; CHECK-NEXT: ushll v0 .4s, v0.4h, #0
316
+ ; CHECK-NEXT: ushr v0.4s , v0.4s, #16
317
+ ; CHECK-NEXT: ushr v1.4s, v1.4s, #16
318
+ ; CHECK-NEXT: bic v0 .4s, #255, lsl #8
319
+ ; CHECK-NEXT: bic v1 .4s, #255, lsl #8
324
320
; CHECK-NEXT: ret
325
321
%c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 2 , i32 6 , i32 10 , i32 14 , i32 18 , i32 22 , i32 26 , i32 30 >
326
322
%d = zext <8 x i8 > %c to <8 x i32 >
@@ -330,10 +326,8 @@ define <8 x i32> @v8i32_261014(<16 x i8> %a, <16 x i8> %b) {
330
326
define <8 x i32 > @v8i32_371115 (<16 x i8 > %a , <16 x i8 > %b ) {
331
327
; CHECK-LABEL: v8i32_371115:
332
328
; CHECK: // %bb.0:
333
- ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
334
- ; CHECK-NEXT: ushr v0.8h, v0.8h, #8
335
- ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
336
- ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
329
+ ; CHECK-NEXT: ushr v0.4s, v0.4s, #24
330
+ ; CHECK-NEXT: ushr v1.4s, v1.4s, #24
337
331
; CHECK-NEXT: ret
338
332
%c = shufflevector <16 x i8 > %a , <16 x i8 > %b , <8 x i32 > <i32 3 , i32 7 , i32 11 , i32 15 , i32 19 , i32 23 , i32 27 , i32 31 >
339
333
%d = zext <8 x i8 > %c to <8 x i32 >
@@ -407,77 +401,59 @@ define <8 x i64> @zext_load_add(ptr %p) {
407
401
define <8 x double > @uitofp_fadd (<32 x i16 > %l ) {
408
402
; CHECK-LABEL: uitofp_fadd:
409
403
; CHECK: // %bb.0:
410
- ; CHECK-NEXT: uzp1 v5.4s, v0.4s, v3.4s
411
- ; CHECK-NEXT: uzp1 v6.4s, v0.4s, v1.4s
412
- ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
413
- ; CHECK-NEXT: movi d4, #0x00ffff0000ffff
414
- ; CHECK-NEXT: uzp1 v7.4s, v2.4s, v3.4s
415
- ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
416
- ; CHECK-NEXT: ext v16.16b, v6.16b, v6.16b, #8
417
- ; CHECK-NEXT: ext v5.16b, v5.16b, v5.16b, #8
418
- ; CHECK-NEXT: uzp2 v1.4s, v0.4s, v3.4s
419
- ; CHECK-NEXT: and v17.8b, v6.8b, v4.8b
420
- ; CHECK-NEXT: and v18.8b, v7.8b, v4.8b
421
- ; CHECK-NEXT: ushr v6.2s, v6.2s, #16
422
- ; CHECK-NEXT: ushr v7.2s, v7.2s, #16
423
- ; CHECK-NEXT: and v21.8b, v0.8b, v4.8b
424
- ; CHECK-NEXT: and v22.8b, v2.8b, v4.8b
425
- ; CHECK-NEXT: ushr v2.2s, v2.2s, #16
426
- ; CHECK-NEXT: and v19.8b, v16.8b, v4.8b
427
- ; CHECK-NEXT: and v20.8b, v5.8b, v4.8b
428
- ; CHECK-NEXT: ushll v3.2d, v17.2s, #0
429
- ; CHECK-NEXT: ushll v17.2d, v18.2s, #0
430
- ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
431
- ; CHECK-NEXT: ushr v16.2s, v16.2s, #16
432
- ; CHECK-NEXT: ushr v5.2s, v5.2s, #16
433
- ; CHECK-NEXT: ushll v6.2d, v6.2s, #0
434
- ; CHECK-NEXT: ushll v7.2d, v7.2s, #0
435
- ; CHECK-NEXT: ushll v18.2d, v19.2s, #0
436
- ; CHECK-NEXT: ushll v19.2d, v20.2s, #0
437
- ; CHECK-NEXT: ext v20.16b, v0.16b, v0.16b, #8
438
- ; CHECK-NEXT: ushr v0.2s, v0.2s, #16
439
- ; CHECK-NEXT: ushll v16.2d, v16.2s, #0
440
- ; CHECK-NEXT: ushll v21.2d, v21.2s, #0
441
- ; CHECK-NEXT: ushll v5.2d, v5.2s, #0
442
- ; CHECK-NEXT: ushll v22.2d, v22.2s, #0
443
- ; CHECK-NEXT: ushll v2.2d, v2.2s, #0
444
- ; CHECK-NEXT: ucvtf v3.2d, v3.2d
445
- ; CHECK-NEXT: ucvtf v17.2d, v17.2d
446
- ; CHECK-NEXT: ucvtf v6.2d, v6.2d
447
- ; CHECK-NEXT: and v23.8b, v20.8b, v4.8b
448
- ; CHECK-NEXT: and v4.8b, v1.8b, v4.8b
449
- ; CHECK-NEXT: ushr v20.2s, v20.2s, #16
450
- ; CHECK-NEXT: ushr v1.2s, v1.2s, #16
451
- ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
452
- ; CHECK-NEXT: ucvtf v7.2d, v7.2d
404
+ ; CHECK-NEXT: movi v4.2d, #0x0000000000ffff
405
+ ; CHECK-NEXT: ushr v5.2d, v0.2d, #16
406
+ ; CHECK-NEXT: ushr v6.2d, v1.2d, #16
407
+ ; CHECK-NEXT: ushr v7.2d, v2.2d, #16
408
+ ; CHECK-NEXT: ushr v17.2d, v3.2d, #16
409
+ ; CHECK-NEXT: ushr v20.2d, v0.2d, #32
410
+ ; CHECK-NEXT: ushr v22.2d, v1.2d, #32
411
+ ; CHECK-NEXT: ushr v23.2d, v2.2d, #32
412
+ ; CHECK-NEXT: ushr v24.2d, v3.2d, #32
413
+ ; CHECK-NEXT: and v16.16b, v0.16b, v4.16b
414
+ ; CHECK-NEXT: and v18.16b, v1.16b, v4.16b
415
+ ; CHECK-NEXT: and v19.16b, v2.16b, v4.16b
416
+ ; CHECK-NEXT: and v21.16b, v3.16b, v4.16b
417
+ ; CHECK-NEXT: and v5.16b, v5.16b, v4.16b
418
+ ; CHECK-NEXT: and v6.16b, v6.16b, v4.16b
419
+ ; CHECK-NEXT: and v7.16b, v7.16b, v4.16b
420
+ ; CHECK-NEXT: and v17.16b, v17.16b, v4.16b
421
+ ; CHECK-NEXT: and v20.16b, v20.16b, v4.16b
422
+ ; CHECK-NEXT: and v22.16b, v22.16b, v4.16b
423
+ ; CHECK-NEXT: and v23.16b, v23.16b, v4.16b
424
+ ; CHECK-NEXT: and v4.16b, v24.16b, v4.16b
425
+ ; CHECK-NEXT: ushr v0.2d, v0.2d, #48
426
+ ; CHECK-NEXT: ushr v1.2d, v1.2d, #48
427
+ ; CHECK-NEXT: ushr v2.2d, v2.2d, #48
428
+ ; CHECK-NEXT: ushr v3.2d, v3.2d, #48
429
+ ; CHECK-NEXT: ucvtf v16.2d, v16.2d
453
430
; CHECK-NEXT: ucvtf v18.2d, v18.2d
454
431
; CHECK-NEXT: ucvtf v19.2d, v19.2d
455
- ; CHECK-NEXT: ucvtf v16.2d, v16.2d
456
- ; CHECK-NEXT: ushll v23.2d, v23.2s, #0
457
- ; CHECK-NEXT: ushll v4.2d, v4.2s, #0
458
- ; CHECK-NEXT: ushll v20.2d, v20.2s, #0
459
- ; CHECK-NEXT: ushll v1.2d, v1.2s, #0
460
- ; CHECK-NEXT: ucvtf v5.2d, v5.2d
461
432
; CHECK-NEXT: ucvtf v21.2d, v21.2d
433
+ ; CHECK-NEXT: ucvtf v5.2d, v5.2d
434
+ ; CHECK-NEXT: ucvtf v6.2d, v6.2d
435
+ ; CHECK-NEXT: ucvtf v7.2d, v7.2d
436
+ ; CHECK-NEXT: ucvtf v17.2d, v17.2d
437
+ ; CHECK-NEXT: ucvtf v20.2d, v20.2d
462
438
; CHECK-NEXT: ucvtf v22.2d, v22.2d
463
- ; CHECK-NEXT: ucvtf v0.2d, v0.2d
464
- ; CHECK-NEXT: ucvtf v2.2d, v2.2d
465
439
; CHECK-NEXT: ucvtf v23.2d, v23.2d
466
440
; CHECK-NEXT: ucvtf v4.2d, v4.2d
467
- ; CHECK-NEXT: ucvtf v20 .2d, v20 .2d
441
+ ; CHECK-NEXT: ucvtf v0 .2d, v0 .2d
468
442
; CHECK-NEXT: ucvtf v1.2d, v1.2d
469
- ; CHECK-NEXT: fadd v16.2d, v18.2d, v16.2d
470
- ; CHECK-NEXT: fadd v7.2d, v17.2d, v7.2d
471
- ; CHECK-NEXT: fadd v5.2d, v19.2d, v5.2d
472
- ; CHECK-NEXT: fadd v3.2d, v3.2d, v6.2d
473
- ; CHECK-NEXT: fadd v0.2d, v21.2d, v0.2d
474
- ; CHECK-NEXT: fadd v2.2d, v22.2d, v2.2d
475
- ; CHECK-NEXT: fadd v4.2d, v4.2d, v1.2d
476
- ; CHECK-NEXT: fadd v1.2d, v23.2d, v20.2d
477
- ; CHECK-NEXT: fadd v0.2d, v3.2d, v0.2d
443
+ ; CHECK-NEXT: ucvtf v2.2d, v2.2d
444
+ ; CHECK-NEXT: ucvtf v3.2d, v3.2d
445
+ ; CHECK-NEXT: fadd v5.2d, v16.2d, v5.2d
446
+ ; CHECK-NEXT: fadd v17.2d, v21.2d, v17.2d
447
+ ; CHECK-NEXT: fadd v7.2d, v19.2d, v7.2d
448
+ ; CHECK-NEXT: fadd v6.2d, v18.2d, v6.2d
449
+ ; CHECK-NEXT: fadd v0.2d, v20.2d, v0.2d
450
+ ; CHECK-NEXT: fadd v1.2d, v22.2d, v1.2d
451
+ ; CHECK-NEXT: fadd v3.2d, v4.2d, v3.2d
452
+ ; CHECK-NEXT: fadd v2.2d, v23.2d, v2.2d
453
+ ; CHECK-NEXT: fadd v0.2d, v5.2d, v0.2d
454
+ ; CHECK-NEXT: fadd v1.2d, v6.2d, v1.2d
478
455
; CHECK-NEXT: fadd v2.2d, v7.2d, v2.2d
479
- ; CHECK-NEXT: fadd v1.2d, v16.2d, v1.2d
480
- ; CHECK-NEXT: fadd v3.2d, v5.2d, v4.2d
456
+ ; CHECK-NEXT: fadd v3.2d, v17.2d, v3.2d
481
457
; CHECK-NEXT: ret
482
458
%s1 = shufflevector <32 x i16 > %l , <32 x i16 > undef , <8 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 , i32 16 , i32 20 , i32 24 , i32 28 >
483
459
%z1 = uitofp <8 x i16 > %s1 to <8 x double >
0 commit comments