@@ -342,21 +342,14 @@ define void @zip_v4i32(ptr %a, ptr %b) {
342
342
define void @zip1_v8i32_undef (ptr %a ) {
343
343
; CHECK-LABEL: zip1_v8i32_undef:
344
344
; CHECK: // %bb.0:
345
- ; CHECK-NEXT: sub sp, sp, #16
346
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
345
+ ; CHECK-NEXT: adrp x8, .LCPI6_0
347
346
; CHECK-NEXT: ldr q0, [x0, #16]
348
347
; CHECK-NEXT: ldr q0, [x0]
349
- ; CHECK-NEXT: mov z1.s, z0.s[3 ]
350
- ; CHECK-NEXT: mov z2 .s, z0.s[2]
348
+ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0 ]
349
+ ; CHECK-NEXT: tbl z1 .s, { z0.s }, z1.s
351
350
; CHECK-NEXT: zip1 z0.s, z0.s, z0.s
352
- ; CHECK-NEXT: fmov w8, s1
353
- ; CHECK-NEXT: fmov w9, s2
354
- ; CHECK-NEXT: stp w8, w8, [sp, #8]
355
- ; CHECK-NEXT: stp w9, w9, [sp]
356
- ; CHECK-NEXT: ldr q1, [sp]
357
- ; CHECK-NEXT: str q0, [x0]
358
351
; CHECK-NEXT: str q1, [x0, #16]
359
- ; CHECK-NEXT: add sp, sp, #16
352
+ ; CHECK-NEXT: str q0, [x0]
360
353
; CHECK-NEXT: ret
361
354
%tmp1 = load volatile <8 x i32 >, ptr %a
362
355
%tmp2 = shufflevector <8 x i32 > %tmp1 , <8 x i32 > undef , <8 x i32 > <i32 0 , i32 0 , i32 1 , i32 1 , i32 2 , i32 2 , i32 3 , i32 3 >
@@ -389,41 +382,15 @@ define void @trn_v32i8(ptr %a, ptr %b) {
389
382
define void @trn_v8i16 (ptr %a , ptr %b ) {
390
383
; CHECK-LABEL: trn_v8i16:
391
384
; CHECK: // %bb.0:
385
+ ; CHECK-NEXT: adrp x8, .LCPI8_0
386
+ ; CHECK-NEXT: adrp x9, .LCPI8_1
392
387
; CHECK-NEXT: ldr q0, [x0]
393
- ; CHECK-NEXT: fmov w8, s0
394
- ; CHECK-NEXT: mov z1.h, z0.h[3]
395
- ; CHECK-NEXT: mov z2.h, z0.h[1]
396
- ; CHECK-NEXT: mov z3.h, z0.h[5]
397
- ; CHECK-NEXT: mov z4.h, z0.h[4]
398
- ; CHECK-NEXT: strh w8, [sp, #-32]!
399
- ; CHECK-NEXT: .cfi_def_cfa_offset 32
400
- ; CHECK-NEXT: fmov w8, s1
401
- ; CHECK-NEXT: mov z1.h, z0.h[2]
402
- ; CHECK-NEXT: fmov w9, s2
403
- ; CHECK-NEXT: mov z2.h, z0.h[6]
404
- ; CHECK-NEXT: mov z0.h, z0.h[7]
405
- ; CHECK-NEXT: fmov w10, s3
406
- ; CHECK-NEXT: fmov w11, s4
407
- ; CHECK-NEXT: fmov w12, s1
408
- ; CHECK-NEXT: strh w8, [sp, #14]
409
- ; CHECK-NEXT: fmov w13, s2
410
- ; CHECK-NEXT: strh w9, [sp, #12]
411
- ; CHECK-NEXT: strh w10, [sp, #10]
412
- ; CHECK-NEXT: strh w12, [sp, #4]
413
- ; CHECK-NEXT: fmov w12, s0
414
- ; CHECK-NEXT: strh w11, [sp, #8]
415
- ; CHECK-NEXT: strh w13, [sp, #6]
416
- ; CHECK-NEXT: strh w12, [sp, #2]
417
- ; CHECK-NEXT: strh w12, [sp, #28]
418
- ; CHECK-NEXT: strh w11, [sp, #26]
419
- ; CHECK-NEXT: strh w10, [sp, #22]
420
- ; CHECK-NEXT: strh w8, [sp, #20]
421
- ; CHECK-NEXT: strh w13, [sp, #18]
422
- ; CHECK-NEXT: strh w9, [sp, #16]
423
- ; CHECK-NEXT: ldp q0, q1, [sp]
424
- ; CHECK-NEXT: add z0.h, z0.h, z1.h
388
+ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
389
+ ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_1]
390
+ ; CHECK-NEXT: tbl z1.h, { z0.h }, z1.h
391
+ ; CHECK-NEXT: tbl z0.h, { z0.h }, z2.h
392
+ ; CHECK-NEXT: add z0.h, z1.h, z0.h
425
393
; CHECK-NEXT: str q0, [x0]
426
- ; CHECK-NEXT: add sp, sp, #32
427
394
; CHECK-NEXT: ret
428
395
%tmp1 = load <8 x i16 >, ptr %a
429
396
%tmp2 = load <8 x i16 >, ptr %b
@@ -692,21 +659,14 @@ define void @zip2_v8i32(ptr %a, ptr %b) #0{
692
659
define void @zip2_v8i32_undef (ptr %a ) #0 {
693
660
; CHECK-LABEL: zip2_v8i32_undef:
694
661
; CHECK: // %bb.0:
695
- ; CHECK-NEXT: sub sp, sp, #16
696
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
662
+ ; CHECK-NEXT: adrp x8, .LCPI17_0
697
663
; CHECK-NEXT: ldr q0, [x0]
698
664
; CHECK-NEXT: ldr q0, [x0, #16]
699
- ; CHECK-NEXT: mov z1.s, z0.s[3 ]
700
- ; CHECK-NEXT: mov z2 .s, z0.s[2]
665
+ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0 ]
666
+ ; CHECK-NEXT: tbl z1 .s, { z0.s }, z1.s
701
667
; CHECK-NEXT: zip1 z0.s, z0.s, z0.s
702
- ; CHECK-NEXT: fmov w8, s1
703
- ; CHECK-NEXT: fmov w9, s2
704
- ; CHECK-NEXT: stp w8, w8, [sp, #8]
705
- ; CHECK-NEXT: stp w9, w9, [sp]
706
- ; CHECK-NEXT: ldr q1, [sp]
707
- ; CHECK-NEXT: str q0, [x0]
708
668
; CHECK-NEXT: str q1, [x0, #16]
709
- ; CHECK-NEXT: add sp, sp, #16
669
+ ; CHECK-NEXT: str q0, [x0]
710
670
; CHECK-NEXT: ret
711
671
%tmp1 = load volatile <8 x i32 >, ptr %a
712
672
%tmp2 = shufflevector <8 x i32 > %tmp1 , <8 x i32 > undef , <8 x i32 > <i32 4 , i32 4 , i32 5 , i32 5 , i32 6 , i32 6 , i32 7 , i32 7 >
@@ -921,26 +881,15 @@ define void @uzp_v32i8(ptr %a, ptr %b) #0{
921
881
define void @uzp_v4i16 (ptr %a , ptr %b ) #0 {
922
882
; CHECK-LABEL: uzp_v4i16:
923
883
; CHECK: // %bb.0:
884
+ ; CHECK-NEXT: adrp x8, .LCPI19_0
885
+ ; CHECK-NEXT: adrp x9, .LCPI19_1
924
886
; CHECK-NEXT: ldr d0, [x0]
925
- ; CHECK-NEXT: mov z1.h, z0.h[1]
926
- ; CHECK-NEXT: fmov w8, s0
927
- ; CHECK-NEXT: mov z2.h, z0.h[2]
928
- ; CHECK-NEXT: mov z3.h, z0.h[3]
929
- ; CHECK-NEXT: fmov w9, s1
930
- ; CHECK-NEXT: strh w8, [sp, #-16]!
931
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
932
- ; CHECK-NEXT: fmov w10, s2
933
- ; CHECK-NEXT: fmov w11, s3
934
- ; CHECK-NEXT: strh w9, [sp, #6]
935
- ; CHECK-NEXT: strh w8, [sp, #10]
936
- ; CHECK-NEXT: strh w9, [sp, #8]
937
- ; CHECK-NEXT: strh w10, [sp, #4]
938
- ; CHECK-NEXT: strh w11, [sp, #2]
939
- ; CHECK-NEXT: strh w10, [sp, #12]
940
- ; CHECK-NEXT: ldp d0, d1, [sp]
941
- ; CHECK-NEXT: add z0.h, z0.h, z1.h
887
+ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0]
888
+ ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI19_1]
889
+ ; CHECK-NEXT: tbl z1.h, { z0.h }, z1.h
890
+ ; CHECK-NEXT: tbl z0.h, { z0.h }, z2.h
891
+ ; CHECK-NEXT: add z0.h, z1.h, z0.h
942
892
; CHECK-NEXT: str d0, [x0]
943
- ; CHECK-NEXT: add sp, sp, #16
944
893
; CHECK-NEXT: ret
945
894
%tmp1 = load <4 x i16 >, ptr %a
946
895
%tmp2 = load <4 x i16 >, ptr %b
@@ -1071,11 +1020,12 @@ define void @uzp_v16i16(ptr %a, ptr %b) #0{
1071
1020
define void @uzp_v8f32 (ptr %a , ptr %b ) #0 {
1072
1021
; CHECK-LABEL: uzp_v8f32:
1073
1022
; CHECK: // %bb.0:
1074
- ; CHECK-NEXT: sub sp, sp, #64
1075
- ; CHECK-NEXT: .cfi_def_cfa_offset 64
1023
+ ; CHECK-NEXT: sub sp, sp, #48
1024
+ ; CHECK-NEXT: .cfi_def_cfa_offset 48
1076
1025
; CHECK-NEXT: ldp q2, q0, [x0]
1077
- ; CHECK-NEXT: ptrue p0.s, vl4
1026
+ ; CHECK-NEXT: adrp x8, .LCPI21_0
1078
1027
; CHECK-NEXT: ldp q4, q1, [x1]
1028
+ ; CHECK-NEXT: ptrue p0.s, vl4
1079
1029
; CHECK-NEXT: mov z3.s, z0.s[2]
1080
1030
; CHECK-NEXT: mov z5.s, z1.s[2]
1081
1031
; CHECK-NEXT: stp s0, s3, [sp, #24]
@@ -1085,17 +1035,17 @@ define void @uzp_v8f32(ptr %a, ptr %b) #0{
1085
1035
; CHECK-NEXT: mov z0.s, z0.s[1]
1086
1036
; CHECK-NEXT: stp s3, s1, [sp, #4]
1087
1037
; CHECK-NEXT: mov z1.s, z2.s[1]
1088
- ; CHECK-NEXT: stp s0, s5, [sp, #40 ]
1089
- ; CHECK-NEXT: mov z5.s, z4.s[3 ]
1090
- ; CHECK-NEXT: mov z4.s, z4.s[1 ]
1038
+ ; CHECK-NEXT: str s5, [sp, #44 ]
1039
+ ; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI21_0 ]
1040
+ ; CHECK-NEXT: str s0, [sp, #40 ]
1091
1041
; CHECK-NEXT: ldp q3, q2, [sp]
1042
+ ; CHECK-NEXT: tbl z0.s, { z4.s }, z5.s
1092
1043
; CHECK-NEXT: str s1, [sp, #32]
1093
- ; CHECK-NEXT: stp s4, s5, [sp, #48]
1094
- ; CHECK-NEXT: ldp q0, q1, [sp, #32]
1095
- ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z2.s
1096
- ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s
1097
- ; CHECK-NEXT: stp q0, q1, [x0]
1098
- ; CHECK-NEXT: add sp, sp, #64
1044
+ ; CHECK-NEXT: ldr q1, [sp, #32]
1045
+ ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z2.s
1046
+ ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z3.s
1047
+ ; CHECK-NEXT: stp q1, q0, [x0]
1048
+ ; CHECK-NEXT: add sp, sp, #48
1099
1049
; CHECK-NEXT: ret
1100
1050
%tmp1 = load <8 x float >, ptr %a
1101
1051
%tmp2 = load <8 x float >, ptr %b
0 commit comments