@@ -236,22 +236,20 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
236
236
; CHECK-NEXT: sunpklo z4.d, z2.s
237
237
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
238
238
; CHECK-NEXT: sunpklo z0.s, z0.h
239
- ; CHECK-NEXT: mov z7.d, z1.d
240
- ; CHECK-NEXT: sunpklo z2.d, z2.s
239
+ ; CHECK-NEXT: sunpklo z7.d, z1.s
240
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
241
241
; CHECK-NEXT: sunpklo z5.d, z3.s
242
242
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
243
- ; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
243
+ ; CHECK-NEXT: sunpklo z2.d, z2.s
244
244
; CHECK-NEXT: sunpklo z1.d, z1.s
245
- ; CHECK-NEXT: mov z6.d, z0.d
245
+ ; CHECK-NEXT: sunpklo z6.d, z0.s
246
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
246
247
; CHECK-NEXT: sunpklo z3.d, z3.s
247
248
; CHECK-NEXT: stp q4, q2, [x0]
248
- ; CHECK-NEXT: sunpklo z4.d, z7.s
249
- ; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
250
249
; CHECK-NEXT: sunpklo z0.d, z0.s
250
+ ; CHECK-NEXT: stp q7, q1, [x0, #32]
251
251
; CHECK-NEXT: stp q5, q3, [x0, #64]
252
- ; CHECK-NEXT: sunpklo z2.d, z6.s
253
- ; CHECK-NEXT: stp q1, q4, [x0, #32]
254
- ; CHECK-NEXT: stp q0, q2, [x0, #96]
252
+ ; CHECK-NEXT: stp q6, q0, [x0, #96]
255
253
; CHECK-NEXT: ret
256
254
%b = sext <16 x i8 > %a to <16 x i64 >
257
255
store <16 x i64 > %b , ptr %out
@@ -264,62 +262,60 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
264
262
; CHECK-NEXT: ldp q1, q0, [x0]
265
263
; CHECK-NEXT: add z0.b, z0.b, z0.b
266
264
; CHECK-NEXT: add z1.b, z1.b, z1.b
267
- ; CHECK-NEXT: mov z2.d, z0.d
265
+ ; CHECK-NEXT: sunpklo z2.h, z0.b
266
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
267
+ ; CHECK-NEXT: sunpklo z3.h, z1.b
268
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
268
269
; CHECK-NEXT: sunpklo z0.h, z0.b
269
- ; CHECK-NEXT: mov z3.d, z1.d
270
+ ; CHECK-NEXT: sunpklo z4.s, z2.h
270
271
; CHECK-NEXT: sunpklo z1.h, z1.b
272
+ ; CHECK-NEXT: sunpklo z5.s, z3.h
271
273
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
272
274
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
273
- ; CHECK-NEXT: sunpklo z4 .s, z0.h
275
+ ; CHECK-NEXT: sunpklo z6 .s, z0.h
274
276
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
275
- ; CHECK-NEXT: sunpklo z5.s, z1.h
276
- ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
277
- ; CHECK-NEXT: sunpklo z2.h, z2.b
278
- ; CHECK-NEXT: sunpklo z3.h, z3.b
279
- ; CHECK-NEXT: sunpklo z0.s, z0.h
280
- ; CHECK-NEXT: sunpklo z16.d, z4.s
277
+ ; CHECK-NEXT: sunpklo z7.d, z4.s
281
278
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
282
- ; CHECK-NEXT: sunpklo z1.s, z1.h
279
+ ; CHECK-NEXT: sunpklo z2.s, z2.h
280
+ ; CHECK-NEXT: sunpklo z3.s, z3.h
281
+ ; CHECK-NEXT: sunpklo z16.s, z1.h
283
282
; CHECK-NEXT: sunpklo z17.d, z5.s
284
283
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
285
- ; CHECK-NEXT: sunpklo z6.s, z2.h
286
- ; CHECK-NEXT: sunpklo z7.s, z3.h
287
- ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
284
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
285
+ ; CHECK-NEXT: sunpklo z0.s, z0.h
288
286
; CHECK-NEXT: sunpklo z4.d, z4.s
289
- ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
290
- ; CHECK-NEXT: sunpklo z19.d, z0.s
291
- ; CHECK-NEXT: sunpklo z5.d, z5.s
292
- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
293
- ; CHECK-NEXT: sunpklo z2.s, z2.h
294
287
; CHECK-NEXT: sunpklo z18.d, z6.s
295
288
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
296
- ; CHECK-NEXT: sunpklo z3.s, z3.h
297
- ; CHECK-NEXT: stp q16, q4, [x1, #128]
298
- ; CHECK-NEXT: mov z16.d, z7.d
299
- ; CHECK-NEXT: sunpklo z0.d, z0.s
300
- ; CHECK-NEXT: stp q17, q5, [x1]
301
- ; CHECK-NEXT: sunpklo z5.d, z7.s
302
- ; CHECK-NEXT: sunpklo z4.d, z6.s
303
- ; CHECK-NEXT: mov z6.d, z1.d
304
- ; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
289
+ ; CHECK-NEXT: sunpklo z5.d, z5.s
290
+ ; CHECK-NEXT: sunpklo z1.s, z1.h
291
+ ; CHECK-NEXT: sunpklo z19.d, z16.s
292
+ ; CHECK-NEXT: sunpklo z6.d, z6.s
293
+ ; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
294
+ ; CHECK-NEXT: stp q7, q4, [x1, #128]
305
295
; CHECK-NEXT: mov z7.d, z2.d
306
- ; CHECK-NEXT: stp q19, q0, [x1, #160]
307
- ; CHECK-NEXT: sunpklo z0.d, z2.s
308
- ; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
309
- ; CHECK-NEXT: sunpklo z1.d, z1.s
310
- ; CHECK-NEXT: stp q18, q4, [x1, #192]
311
296
; CHECK-NEXT: mov z4.d, z3.d
312
- ; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
297
+ ; CHECK-NEXT: stp q17, q5, [x1]
298
+ ; CHECK-NEXT: mov z5.d, z0.d
313
299
; CHECK-NEXT: sunpklo z16.d, z16.s
314
- ; CHECK-NEXT: sunpklo z6.d, z6.s
300
+ ; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
315
301
; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
316
- ; CHECK-NEXT: sunpklo z2.d, z7.s
302
+ ; CHECK-NEXT: stp q18, q6, [x1, #192]
303
+ ; CHECK-NEXT: mov z6.d, z1.d
304
+ ; CHECK-NEXT: sunpklo z2.d, z2.s
317
305
; CHECK-NEXT: sunpklo z3.d, z3.s
318
- ; CHECK-NEXT: stp q5, q16, [x1, #64]
319
- ; CHECK-NEXT: stp q1, q6, [x1, #32]
320
- ; CHECK-NEXT: sunpklo z1.d, z4.s
306
+ ; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
307
+ ; CHECK-NEXT: sunpklo z0.d, z0.s
308
+ ; CHECK-NEXT: sunpklo z7.d, z7.s
309
+ ; CHECK-NEXT: sunpklo z4.d, z4.s
310
+ ; CHECK-NEXT: stp q19, q16, [x1, #64]
311
+ ; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
312
+ ; CHECK-NEXT: sunpklo z1.d, z1.s
313
+ ; CHECK-NEXT: stp q3, q4, [x1, #32]
314
+ ; CHECK-NEXT: sunpklo z3.d, z6.s
315
+ ; CHECK-NEXT: stp q2, q7, [x1, #160]
316
+ ; CHECK-NEXT: sunpklo z2.d, z5.s
317
+ ; CHECK-NEXT: stp q1, q3, [x1, #96]
321
318
; CHECK-NEXT: stp q0, q2, [x1, #224]
322
- ; CHECK-NEXT: stp q3, q1, [x1, #96]
323
319
; CHECK-NEXT: ret
324
320
%a = load <32 x i8 >, ptr %in
325
321
%b = add <32 x i8 > %a , %a
@@ -661,22 +657,20 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
661
657
; CHECK-NEXT: uunpklo z4.d, z2.s
662
658
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
663
659
; CHECK-NEXT: uunpklo z0.s, z0.h
664
- ; CHECK-NEXT: mov z7.d, z1.d
665
- ; CHECK-NEXT: uunpklo z2.d, z2.s
660
+ ; CHECK-NEXT: uunpklo z7.d, z1.s
661
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
666
662
; CHECK-NEXT: uunpklo z5.d, z3.s
667
663
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
668
- ; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
664
+ ; CHECK-NEXT: uunpklo z2.d, z2.s
669
665
; CHECK-NEXT: uunpklo z1.d, z1.s
670
- ; CHECK-NEXT: mov z6.d, z0.d
666
+ ; CHECK-NEXT: uunpklo z6.d, z0.s
667
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
671
668
; CHECK-NEXT: uunpklo z3.d, z3.s
672
669
; CHECK-NEXT: stp q4, q2, [x0]
673
- ; CHECK-NEXT: uunpklo z4.d, z7.s
674
- ; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
675
670
; CHECK-NEXT: uunpklo z0.d, z0.s
671
+ ; CHECK-NEXT: stp q7, q1, [x0, #32]
676
672
; CHECK-NEXT: stp q5, q3, [x0, #64]
677
- ; CHECK-NEXT: uunpklo z2.d, z6.s
678
- ; CHECK-NEXT: stp q1, q4, [x0, #32]
679
- ; CHECK-NEXT: stp q0, q2, [x0, #96]
673
+ ; CHECK-NEXT: stp q6, q0, [x0, #96]
680
674
; CHECK-NEXT: ret
681
675
%b = zext <16 x i8 > %a to <16 x i64 >
682
676
store <16 x i64 > %b , ptr %out
@@ -689,62 +683,60 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
689
683
; CHECK-NEXT: ldp q1, q0, [x0]
690
684
; CHECK-NEXT: add z0.b, z0.b, z0.b
691
685
; CHECK-NEXT: add z1.b, z1.b, z1.b
692
- ; CHECK-NEXT: mov z2.d, z0.d
686
+ ; CHECK-NEXT: uunpklo z2.h, z0.b
687
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
688
+ ; CHECK-NEXT: uunpklo z3.h, z1.b
689
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
693
690
; CHECK-NEXT: uunpklo z0.h, z0.b
694
- ; CHECK-NEXT: mov z3.d, z1.d
691
+ ; CHECK-NEXT: uunpklo z4.s, z2.h
695
692
; CHECK-NEXT: uunpklo z1.h, z1.b
693
+ ; CHECK-NEXT: uunpklo z5.s, z3.h
696
694
; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
697
695
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
698
- ; CHECK-NEXT: uunpklo z4 .s, z0.h
696
+ ; CHECK-NEXT: uunpklo z6 .s, z0.h
699
697
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
700
- ; CHECK-NEXT: uunpklo z5.s, z1.h
701
- ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
702
- ; CHECK-NEXT: uunpklo z2.h, z2.b
703
- ; CHECK-NEXT: uunpklo z3.h, z3.b
704
- ; CHECK-NEXT: uunpklo z0.s, z0.h
705
- ; CHECK-NEXT: uunpklo z16.d, z4.s
698
+ ; CHECK-NEXT: uunpklo z7.d, z4.s
706
699
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
707
- ; CHECK-NEXT: uunpklo z1.s, z1.h
700
+ ; CHECK-NEXT: uunpklo z2.s, z2.h
701
+ ; CHECK-NEXT: uunpklo z3.s, z3.h
702
+ ; CHECK-NEXT: uunpklo z16.s, z1.h
708
703
; CHECK-NEXT: uunpklo z17.d, z5.s
709
704
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
710
- ; CHECK-NEXT: uunpklo z6.s, z2.h
711
- ; CHECK-NEXT: uunpklo z7.s, z3.h
712
- ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8
705
+ ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
706
+ ; CHECK-NEXT: uunpklo z0.s, z0.h
713
707
; CHECK-NEXT: uunpklo z4.d, z4.s
714
- ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
715
- ; CHECK-NEXT: uunpklo z19.d, z0.s
716
- ; CHECK-NEXT: uunpklo z5.d, z5.s
717
- ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
718
- ; CHECK-NEXT: uunpklo z2.s, z2.h
719
708
; CHECK-NEXT: uunpklo z18.d, z6.s
720
709
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
721
- ; CHECK-NEXT: uunpklo z3.s, z3.h
722
- ; CHECK-NEXT: stp q16, q4, [x1, #128]
723
- ; CHECK-NEXT: mov z16.d, z7.d
724
- ; CHECK-NEXT: uunpklo z0.d, z0.s
725
- ; CHECK-NEXT: stp q17, q5, [x1]
726
- ; CHECK-NEXT: uunpklo z5.d, z7.s
727
- ; CHECK-NEXT: uunpklo z4.d, z6.s
728
- ; CHECK-NEXT: mov z6.d, z1.d
729
- ; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8
710
+ ; CHECK-NEXT: uunpklo z5.d, z5.s
711
+ ; CHECK-NEXT: uunpklo z1.s, z1.h
712
+ ; CHECK-NEXT: uunpklo z19.d, z16.s
713
+ ; CHECK-NEXT: uunpklo z6.d, z6.s
714
+ ; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
715
+ ; CHECK-NEXT: stp q7, q4, [x1, #128]
730
716
; CHECK-NEXT: mov z7.d, z2.d
731
- ; CHECK-NEXT: stp q19, q0, [x1, #160]
732
- ; CHECK-NEXT: uunpklo z0.d, z2.s
733
- ; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
734
- ; CHECK-NEXT: uunpklo z1.d, z1.s
735
- ; CHECK-NEXT: stp q18, q4, [x1, #192]
736
717
; CHECK-NEXT: mov z4.d, z3.d
737
- ; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
718
+ ; CHECK-NEXT: stp q17, q5, [x1]
719
+ ; CHECK-NEXT: mov z5.d, z0.d
738
720
; CHECK-NEXT: uunpklo z16.d, z16.s
739
- ; CHECK-NEXT: uunpklo z6.d, z6.s
721
+ ; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8
740
722
; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8
741
- ; CHECK-NEXT: uunpklo z2.d, z7.s
723
+ ; CHECK-NEXT: stp q18, q6, [x1, #192]
724
+ ; CHECK-NEXT: mov z6.d, z1.d
725
+ ; CHECK-NEXT: uunpklo z2.d, z2.s
742
726
; CHECK-NEXT: uunpklo z3.d, z3.s
743
- ; CHECK-NEXT: stp q5, q16, [x1, #64]
744
- ; CHECK-NEXT: stp q1, q6, [x1, #32]
745
- ; CHECK-NEXT: uunpklo z1.d, z4.s
727
+ ; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
728
+ ; CHECK-NEXT: uunpklo z0.d, z0.s
729
+ ; CHECK-NEXT: uunpklo z7.d, z7.s
730
+ ; CHECK-NEXT: uunpklo z4.d, z4.s
731
+ ; CHECK-NEXT: stp q19, q16, [x1, #64]
732
+ ; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
733
+ ; CHECK-NEXT: uunpklo z1.d, z1.s
734
+ ; CHECK-NEXT: stp q3, q4, [x1, #32]
735
+ ; CHECK-NEXT: uunpklo z3.d, z6.s
736
+ ; CHECK-NEXT: stp q2, q7, [x1, #160]
737
+ ; CHECK-NEXT: uunpklo z2.d, z5.s
738
+ ; CHECK-NEXT: stp q1, q3, [x1, #96]
746
739
; CHECK-NEXT: stp q0, q2, [x1, #224]
747
- ; CHECK-NEXT: stp q3, q1, [x1, #96]
748
740
; CHECK-NEXT: ret
749
741
%a = load <32 x i8 >, ptr %in
750
742
%b = add <32 x i8 > %a , %a
0 commit comments