@@ -377,12 +377,8 @@ define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, ptr %a) {
377
377
define <8 x i8 > @load_v8i8_2_1 (float %tmp , <8 x i8 > %b , ptr %a ) {
378
378
; CHECK-LABEL: load_v8i8_2_1:
379
379
; CHECK: // %bb.0:
380
- ; CHECK-NEXT: ld1 { v2.b }[0], [x0]
381
- ; CHECK-NEXT: add x8, x0, #1
382
- ; CHECK-NEXT: mov v0.16b, v2.16b
383
- ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
384
- ; CHECK-NEXT: mov v2.b[1], v0.b[4]
385
380
; CHECK-NEXT: fmov d0, d1
381
+ ; CHECK-NEXT: ldr h2, [x0]
386
382
; CHECK-NEXT: mov v0.h[0], v2.h[0]
387
383
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
388
384
; CHECK-NEXT: ret
@@ -395,12 +391,9 @@ define <8 x i8> @load_v8i8_2_1(float %tmp, <8 x i8> %b, ptr %a) {
395
391
define <8 x i8 > @load_v8i8_2_15 (float %tmp , <8 x i8 > %b , ptr %a ) {
396
392
; CHECK-LABEL: load_v8i8_2_15:
397
393
; CHECK: // %bb.0:
398
- ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
399
- ; CHECK-NEXT: add x8, x0, #1
394
+ ; CHECK-NEXT: ldr h0, [x0]
400
395
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
401
- ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
402
396
; CHECK-NEXT: adrp x8, .LCPI33_0
403
- ; CHECK-NEXT: mov v0.b[1], v0.b[4]
404
397
; CHECK-NEXT: mov v0.d[1], v1.d[0]
405
398
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
406
399
; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
@@ -414,12 +407,8 @@ define <8 x i8> @load_v8i8_2_15(float %tmp, <8 x i8> %b, ptr %a) {
414
407
define <8 x i8 > @load_v8i8_2_2 (float %tmp , <8 x i8 > %b , ptr %a ) {
415
408
; CHECK-LABEL: load_v8i8_2_2:
416
409
; CHECK: // %bb.0:
417
- ; CHECK-NEXT: ld1 { v2.b }[0], [x0]
418
- ; CHECK-NEXT: add x8, x0, #1
419
- ; CHECK-NEXT: mov v0.16b, v2.16b
420
- ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
421
- ; CHECK-NEXT: mov v2.b[1], v0.b[4]
422
410
; CHECK-NEXT: fmov d0, d1
411
+ ; CHECK-NEXT: ldr h2, [x0]
423
412
; CHECK-NEXT: mov v0.h[1], v2.h[0]
424
413
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
425
414
; CHECK-NEXT: ret
@@ -432,12 +421,8 @@ define <8 x i8> @load_v8i8_2_2(float %tmp, <8 x i8> %b, ptr %a) {
432
421
define <8 x i8 > @load_v8i8_2_3 (float %tmp , <8 x i8 > %b , ptr %a ) {
433
422
; CHECK-LABEL: load_v8i8_2_3:
434
423
; CHECK: // %bb.0:
435
- ; CHECK-NEXT: ld1 { v2.b }[0], [x0]
436
- ; CHECK-NEXT: add x8, x0, #1
437
- ; CHECK-NEXT: mov v0.16b, v2.16b
438
- ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
439
- ; CHECK-NEXT: mov v2.b[1], v0.b[4]
440
424
; CHECK-NEXT: fmov d0, d1
425
+ ; CHECK-NEXT: ldr h2, [x0]
441
426
; CHECK-NEXT: mov v0.h[2], v2.h[0]
442
427
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
443
428
; CHECK-NEXT: ret
@@ -450,12 +435,8 @@ define <8 x i8> @load_v8i8_2_3(float %tmp, <8 x i8> %b, ptr %a) {
450
435
define <8 x i8 > @load_v8i8_2_4 (float %tmp , <8 x i8 > %b , ptr %a ) {
451
436
; CHECK-LABEL: load_v8i8_2_4:
452
437
; CHECK: // %bb.0:
453
- ; CHECK-NEXT: ld1 { v2.b }[0], [x0]
454
- ; CHECK-NEXT: add x8, x0, #1
455
- ; CHECK-NEXT: mov v0.16b, v2.16b
456
- ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
457
- ; CHECK-NEXT: mov v2.b[1], v0.b[4]
458
438
; CHECK-NEXT: fmov d0, d1
439
+ ; CHECK-NEXT: ldr h2, [x0]
459
440
; CHECK-NEXT: mov v0.h[3], v2.h[0]
460
441
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
461
442
; CHECK-NEXT: ret
@@ -468,11 +449,9 @@ define <8 x i8> @load_v8i8_2_4(float %tmp, <8 x i8> %b, ptr %a) {
468
449
define <4 x i8 > @load_v4i8_2_1 (float %tmp , <4 x i8 > %b , ptr %a ) {
469
450
; CHECK-LABEL: load_v4i8_2_1:
470
451
; CHECK: // %bb.0:
471
- ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
472
- ; CHECK-NEXT: add x8, x0, #1
452
+ ; CHECK-NEXT: ldr h0, [x0]
473
453
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
474
- ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
475
- ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
454
+ ; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b
476
455
; CHECK-NEXT: mov v0.s[1], v1.s[1]
477
456
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
478
457
; CHECK-NEXT: ret
@@ -485,10 +464,8 @@ define <4 x i8> @load_v4i8_2_1(float %tmp, <4 x i8> %b, ptr %a) {
485
464
define <4 x i8 > @load_v4i8_2_2 (float %tmp , <4 x i8 > %b , ptr %a ) {
486
465
; CHECK-LABEL: load_v4i8_2_2:
487
466
; CHECK: // %bb.0:
488
- ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
489
- ; CHECK-NEXT: add x8, x0, #1
490
- ; CHECK-NEXT: ld1 { v0.b }[4], [x8]
491
- ; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h
467
+ ; CHECK-NEXT: ldr h0, [x0]
468
+ ; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b
492
469
; CHECK-NEXT: fmov d0, d1
493
470
; CHECK-NEXT: mov v0.s[1], v2.s[0]
494
471
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -504,13 +481,8 @@ define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) {
504
481
define <8 x i16 > @load_v8i16_2_1 (float %tmp , <8 x i16 > %b , ptr %a ) {
505
482
; CHECK-LABEL: load_v8i16_2_1:
506
483
; CHECK: // %bb.0:
507
- ; CHECK-NEXT: ldrh w8, [x0]
508
- ; CHECK-NEXT: add x9, x0, #2
509
- ; CHECK-NEXT: fmov s0, w8
510
- ; CHECK-NEXT: ld1 { v0.h }[2], [x9]
511
- ; CHECK-NEXT: xtn v2.4h, v0.4s
512
484
; CHECK-NEXT: mov v0.16b, v1.16b
513
- ; CHECK-NEXT: mov v0.s[0], v2.s[0 ]
485
+ ; CHECK-NEXT: ld1 { v0.s } [0], [x0 ]
514
486
; CHECK-NEXT: ret
515
487
%l = load <2 x i16 >, ptr %a
516
488
%s1 = shufflevector <2 x i16 > %l , <2 x i16 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
@@ -521,13 +493,9 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) {
521
493
define <8 x i16 > @load_v8i16_2_15 (float %tmp , <8 x i16 > %b , ptr %a ) {
522
494
; CHECK-LABEL: load_v8i16_2_15:
523
495
; CHECK: // %bb.0:
524
- ; CHECK-NEXT: ldrh w8, [x0]
525
- ; CHECK-NEXT: add x9, x0, #2
526
496
; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1
527
- ; CHECK-NEXT: fmov s2, w8
528
497
; CHECK-NEXT: adrp x8, .LCPI40_0
529
- ; CHECK-NEXT: ld1 { v2.h }[2], [x9]
530
- ; CHECK-NEXT: xtn v0.4h, v2.4s
498
+ ; CHECK-NEXT: ldr s0, [x0]
531
499
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI40_0]
532
500
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
533
501
; CHECK-NEXT: ret
@@ -540,13 +508,8 @@ define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) {
540
508
define <8 x i16 > @load_v8i16_2_2 (float %tmp , <8 x i16 > %b , ptr %a ) {
541
509
; CHECK-LABEL: load_v8i16_2_2:
542
510
; CHECK: // %bb.0:
543
- ; CHECK-NEXT: ldrh w8, [x0]
544
- ; CHECK-NEXT: add x9, x0, #2
545
- ; CHECK-NEXT: fmov s0, w8
546
- ; CHECK-NEXT: ld1 { v0.h }[2], [x9]
547
- ; CHECK-NEXT: xtn v2.4h, v0.4s
548
511
; CHECK-NEXT: mov v0.16b, v1.16b
549
- ; CHECK-NEXT: mov v0.s[1], v2.s[0 ]
512
+ ; CHECK-NEXT: ld1 { v0.s } [1], [x0 ]
550
513
; CHECK-NEXT: ret
551
514
%l = load <2 x i16 >, ptr %a
552
515
%s1 = shufflevector <2 x i16 > %l , <2 x i16 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
@@ -557,13 +520,8 @@ define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, ptr %a) {
557
520
define <8 x i16 > @load_v8i16_2_3 (float %tmp , <8 x i16 > %b , ptr %a ) {
558
521
; CHECK-LABEL: load_v8i16_2_3:
559
522
; CHECK: // %bb.0:
560
- ; CHECK-NEXT: ldrh w8, [x0]
561
- ; CHECK-NEXT: add x9, x0, #2
562
- ; CHECK-NEXT: fmov s0, w8
563
- ; CHECK-NEXT: ld1 { v0.h }[2], [x9]
564
- ; CHECK-NEXT: xtn v2.4h, v0.4s
565
523
; CHECK-NEXT: mov v0.16b, v1.16b
566
- ; CHECK-NEXT: mov v0.s[2], v2.s[0 ]
524
+ ; CHECK-NEXT: ld1 { v0.s } [2], [x0 ]
567
525
; CHECK-NEXT: ret
568
526
%l = load <2 x i16 >, ptr %a
569
527
%s1 = shufflevector <2 x i16 > %l , <2 x i16 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
@@ -574,13 +532,8 @@ define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, ptr %a) {
574
532
define <8 x i16 > @load_v8i16_2_4 (float %tmp , <8 x i16 > %b , ptr %a ) {
575
533
; CHECK-LABEL: load_v8i16_2_4:
576
534
; CHECK: // %bb.0:
577
- ; CHECK-NEXT: ldrh w8, [x0]
578
- ; CHECK-NEXT: add x9, x0, #2
579
- ; CHECK-NEXT: fmov s0, w8
580
- ; CHECK-NEXT: ld1 { v0.h }[2], [x9]
581
- ; CHECK-NEXT: xtn v2.4h, v0.4s
582
535
; CHECK-NEXT: mov v0.16b, v1.16b
583
- ; CHECK-NEXT: mov v0.s[3], v2.s[0 ]
536
+ ; CHECK-NEXT: ld1 { v0.s } [3], [x0 ]
584
537
; CHECK-NEXT: ret
585
538
%l = load <2 x i16 >, ptr %a
586
539
%s1 = shufflevector <2 x i16 > %l , <2 x i16 > poison, <8 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
@@ -591,11 +544,8 @@ define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, ptr %a) {
591
544
define <4 x i16 > @load_v4i16_2_1 (float %tmp , <4 x i16 > %b , ptr %a ) {
592
545
; CHECK-LABEL: load_v4i16_2_1:
593
546
; CHECK: // %bb.0:
594
- ; CHECK-NEXT: ld1 { v0.h }[0], [x0]
595
- ; CHECK-NEXT: add x8, x0, #2
547
+ ; CHECK-NEXT: ldr s0, [x0]
596
548
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
597
- ; CHECK-NEXT: ld1 { v0.h }[2], [x8]
598
- ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
599
549
; CHECK-NEXT: mov v0.s[1], v1.s[1]
600
550
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
601
551
; CHECK-NEXT: ret
@@ -608,11 +558,8 @@ define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, ptr %a) {
608
558
define <4 x i16 > @load_v4i16_2_2 (float %tmp , <4 x i16 > %b , ptr %a ) {
609
559
; CHECK-LABEL: load_v4i16_2_2:
610
560
; CHECK: // %bb.0:
611
- ; CHECK-NEXT: ld1 { v0.h }[0], [x0]
612
- ; CHECK-NEXT: add x8, x0, #2
613
- ; CHECK-NEXT: ld1 { v0.h }[2], [x8]
614
- ; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h
615
561
; CHECK-NEXT: fmov d0, d1
562
+ ; CHECK-NEXT: ldr s2, [x0]
616
563
; CHECK-NEXT: mov v0.s[1], v2.s[0]
617
564
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
618
565
; CHECK-NEXT: ret
0 commit comments