@@ -374,6 +374,131 @@ define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, ptr %a) {
374
374
ret <16 x i8 > %s2
375
375
}
376
376
377
+ define <8 x i8 > @load_v8i8_2_1 (float %tmp , <8 x i8 > %b , ptr %a ) {
378
+ ; CHECK-LABEL: load_v8i8_2_1:
379
+ ; CHECK: // %bb.0:
380
+ ; CHECK-NEXT: ld1 { v2.b }[0], [x0]
381
+ ; CHECK-NEXT: add x8, x0, #1
382
+ ; CHECK-NEXT: mov v0.16b, v2.16b
383
+ ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
384
+ ; CHECK-NEXT: mov v2.b[1], v0.b[4]
385
+ ; CHECK-NEXT: fmov d0, d1
386
+ ; CHECK-NEXT: mov v0.h[0], v2.h[0]
387
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
388
+ ; CHECK-NEXT: ret
389
+ %l = load <2 x i8 >, ptr %a
390
+ %s1 = shufflevector <2 x i8 > %l , <2 x i8 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
391
+ %s2 = shufflevector <8 x i8 > %s1 , <8 x i8 > %b , <8 x i32 > <i32 0 , i32 1 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
392
+ ret <8 x i8 > %s2
393
+ }
394
+
395
+ define <8 x i8 > @load_v8i8_2_15 (float %tmp , <8 x i8 > %b , ptr %a ) {
396
+ ; CHECK-LABEL: load_v8i8_2_15:
397
+ ; CHECK: // %bb.0:
398
+ ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
399
+ ; CHECK-NEXT: add x8, x0, #1
400
+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
401
+ ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
402
+ ; CHECK-NEXT: adrp x8, .LCPI33_0
403
+ ; CHECK-NEXT: mov v0.b[1], v0.b[4]
404
+ ; CHECK-NEXT: mov v0.d[1], v1.d[0]
405
+ ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
406
+ ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
407
+ ; CHECK-NEXT: ret
408
+ %l = load <2 x i8 >, ptr %a
409
+ %s1 = shufflevector <2 x i8 > %l , <2 x i8 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
410
+ %s2 = shufflevector <8 x i8 > %s1 , <8 x i8 > %b , <8 x i32 > <i32 8 , i32 0 , i32 1 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
411
+ ret <8 x i8 > %s2
412
+ }
413
+
414
+ define <8 x i8 > @load_v8i8_2_2 (float %tmp , <8 x i8 > %b , ptr %a ) {
415
+ ; CHECK-LABEL: load_v8i8_2_2:
416
+ ; CHECK: // %bb.0:
417
+ ; CHECK-NEXT: ld1 { v2.b }[0], [x0]
418
+ ; CHECK-NEXT: add x8, x0, #1
419
+ ; CHECK-NEXT: mov v0.16b, v2.16b
420
+ ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
421
+ ; CHECK-NEXT: mov v2.b[1], v0.b[4]
422
+ ; CHECK-NEXT: fmov d0, d1
423
+ ; CHECK-NEXT: mov v0.h[1], v2.h[0]
424
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
425
+ ; CHECK-NEXT: ret
426
+ %l = load <2 x i8 >, ptr %a
427
+ %s1 = shufflevector <2 x i8 > %l , <2 x i8 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
428
+ %s2 = shufflevector <8 x i8 > %s1 , <8 x i8 > %b , <8 x i32 > <i32 8 , i32 9 , i32 0 , i32 1 , i32 12 , i32 13 , i32 14 , i32 15 >
429
+ ret <8 x i8 > %s2
430
+ }
431
+
432
+ define <8 x i8 > @load_v8i8_2_3 (float %tmp , <8 x i8 > %b , ptr %a ) {
433
+ ; CHECK-LABEL: load_v8i8_2_3:
434
+ ; CHECK: // %bb.0:
435
+ ; CHECK-NEXT: ld1 { v2.b }[0], [x0]
436
+ ; CHECK-NEXT: add x8, x0, #1
437
+ ; CHECK-NEXT: mov v0.16b, v2.16b
438
+ ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
439
+ ; CHECK-NEXT: mov v2.b[1], v0.b[4]
440
+ ; CHECK-NEXT: fmov d0, d1
441
+ ; CHECK-NEXT: mov v0.h[2], v2.h[0]
442
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
443
+ ; CHECK-NEXT: ret
444
+ %l = load <2 x i8 >, ptr %a
445
+ %s1 = shufflevector <2 x i8 > %l , <2 x i8 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
446
+ %s2 = shufflevector <8 x i8 > %s1 , <8 x i8 > %b , <8 x i32 > <i32 8 , i32 9 , i32 10 , i32 11 , i32 0 , i32 1 , i32 14 , i32 15 >
447
+ ret <8 x i8 > %s2
448
+ }
449
+
450
+ define <8 x i8 > @load_v8i8_2_4 (float %tmp , <8 x i8 > %b , ptr %a ) {
451
+ ; CHECK-LABEL: load_v8i8_2_4:
452
+ ; CHECK: // %bb.0:
453
+ ; CHECK-NEXT: ld1 { v2.b }[0], [x0]
454
+ ; CHECK-NEXT: add x8, x0, #1
455
+ ; CHECK-NEXT: mov v0.16b, v2.16b
456
+ ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
457
+ ; CHECK-NEXT: mov v2.b[1], v0.b[4]
458
+ ; CHECK-NEXT: fmov d0, d1
459
+ ; CHECK-NEXT: mov v0.h[3], v2.h[0]
460
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
461
+ ; CHECK-NEXT: ret
462
+ %l = load <2 x i8 >, ptr %a
463
+ %s1 = shufflevector <2 x i8 > %l , <2 x i8 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
464
+ %s2 = shufflevector <8 x i8 > %s1 , <8 x i8 > %b , <8 x i32 > <i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 0 , i32 1 >
465
+ ret <8 x i8 > %s2
466
+ }
467
+
468
+ define <4 x i8 > @load_v4i8_2_1 (float %tmp , <4 x i8 > %b , ptr %a ) {
469
+ ; CHECK-LABEL: load_v4i8_2_1:
470
+ ; CHECK: // %bb.0:
471
+ ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
472
+ ; CHECK-NEXT: add x8, x0, #1
473
+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
474
+ ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
475
+ ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
476
+ ; CHECK-NEXT: mov v0.s[1], v1.s[1]
477
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
478
+ ; CHECK-NEXT: ret
479
+ %l = load <2 x i8 >, ptr %a
480
+ %s1 = shufflevector <2 x i8 > %l , <2 x i8 > poison, <4 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef >
481
+ %s2 = shufflevector <4 x i8 > %s1 , <4 x i8 > %b , <4 x i32 > <i32 0 , i32 1 , i32 6 , i32 7 >
482
+ ret <4 x i8 > %s2
483
+ }
484
+
485
+ define <4 x i8 > @load_v4i8_2_2 (float %tmp , <4 x i8 > %b , ptr %a ) {
486
+ ; CHECK-LABEL: load_v4i8_2_2:
487
+ ; CHECK: // %bb.0:
488
+ ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
489
+ ; CHECK-NEXT: add x8, x0, #1
490
+ ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
491
+ ; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h
492
+ ; CHECK-NEXT: fmov d0, d1
493
+ ; CHECK-NEXT: mov v0.s[1], v2.s[0]
494
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
495
+ ; CHECK-NEXT: ret
496
+ %l = load <2 x i8 >, ptr %a
497
+ %s1 = shufflevector <2 x i8 > %l , <2 x i8 > poison, <4 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef >
498
+ %s2 = shufflevector <4 x i8 > %s1 , <4 x i8 > %b , <4 x i32 > <i32 4 , i32 5 , i32 0 , i32 1 >
499
+ ret <4 x i8 > %s2
500
+ }
501
+
377
502
; i16
378
503
379
504
define <8 x i16 > @load_v8i16_2_1 (float %tmp , <8 x i16 > %b , ptr %a ) {
@@ -400,10 +525,10 @@ define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) {
400
525
; CHECK-NEXT: add x9, x0, #2
401
526
; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1
402
527
; CHECK-NEXT: fmov s2, w8
403
- ; CHECK-NEXT: adrp x8, .LCPI33_0
528
+ ; CHECK-NEXT: adrp x8, .LCPI40_0
404
529
; CHECK-NEXT: ld1 { v2.h }[2], [x9]
405
530
; CHECK-NEXT: xtn v0.4h, v2.4s
406
- ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI33_0 ]
531
+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI40_0 ]
407
532
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
408
533
; CHECK-NEXT: ret
409
534
%l = load <2 x i16 >, ptr %a
0 commit comments