@@ -493,41 +493,38 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef %
493
493
define void @transpose_s16_8x8_ (ptr nocapture noundef %0 ) {
494
494
; CHECK-LABEL: transpose_s16_8x8_:
495
495
; CHECK: // %bb.0:
496
- ; CHECK-NEXT: mov x8, x0
496
+ ; CHECK-NEXT: ldp q0, q1, [x0]
497
+ ; CHECK-NEXT: ldp q2, q3, [x0, #32]
497
498
; CHECK-NEXT: ldp q4, q5, [x0, #64]
498
- ; CHECK-NEXT: mov x9, x0
499
- ; CHECK-NEXT: ldr q0, [x8, #16]!
500
- ; CHECK-NEXT: mov x10, x0
501
- ; CHECK-NEXT: ldr q3, [x0]
502
499
; CHECK-NEXT: ldp q6, q7, [x0, #96]
500
+ ; CHECK-NEXT: trn1 v16.8h, v0.8h, v1.8h
501
+ ; CHECK-NEXT: trn2 v0.8h, v0.8h, v1.8h
502
+ ; CHECK-NEXT: trn1 v1.8h, v2.8h, v3.8h
503
+ ; CHECK-NEXT: trn2 v2.8h, v2.8h, v3.8h
503
504
; CHECK-NEXT: trn1 v17.8h, v4.8h, v5.8h
504
- ; CHECK-NEXT: ldr q1, [x9, #32]!
505
- ; CHECK-NEXT: trn1 v16.8h, v3.8h, v0.8h
506
- ; CHECK-NEXT: ldr q2, [x10, #48]!
507
- ; CHECK-NEXT: trn2 v4.8h, v4.8h, v5.8h
508
- ; CHECK-NEXT: trn1 v19.8h, v6.8h, v7.8h
509
- ; CHECK-NEXT: trn2 v0.8h, v3.8h, v0.8h
510
- ; CHECK-NEXT: trn2 v3.8h, v6.8h, v7.8h
511
- ; CHECK-NEXT: trn1 v18.8h, v1.8h, v2.8h
512
- ; CHECK-NEXT: trn2 v1.8h, v1.8h, v2.8h
505
+ ; CHECK-NEXT: trn2 v3.8h, v4.8h, v5.8h
506
+ ; CHECK-NEXT: trn1 v18.8h, v6.8h, v7.8h
507
+ ; CHECK-NEXT: trn2 v4.8h, v6.8h, v7.8h
513
508
; CHECK-NEXT: trn1 v5.4s, v16.4s, v17.4s
509
+ ; CHECK-NEXT: trn1 v7.4s, v0.4s, v3.4s
514
510
; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s
515
- ; CHECK-NEXT: trn1 v20.4s, v0.4s, v4.4s
516
- ; CHECK-NEXT: trn1 v6.4s, v18.4s, v19.4s
517
- ; CHECK-NEXT: trn2 v17.4s, v18.4s, v19.4s
518
- ; CHECK-NEXT: trn2 v18.4s, v0.4s, v4.4s
519
- ; CHECK-NEXT: trn1 v21.4s, v1.4s, v3.4s
520
- ; CHECK-NEXT: trn2 v19.4s, v1.4s, v3.4s
521
- ; CHECK-NEXT: zip2 v0.4s, v5.4s, v6.4s
522
- ; CHECK-NEXT: zip2 v2.4s, v16.4s, v17.4s
523
- ; CHECK-NEXT: st2 { v5.2s, v6.2s }, [x0]
524
- ; CHECK-NEXT: zip2 v1.4s, v20.4s, v21.4s
525
- ; CHECK-NEXT: zip2 v3.4s, v18.4s, v19.4s
526
- ; CHECK-NEXT: st2 { v20.2s, v21.2s }, [x8]
527
- ; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x9]
528
- ; CHECK-NEXT: st2 { v18.2s, v19.2s }, [x10]
529
- ; CHECK-NEXT: stp q0, q1, [x0, #64]
530
- ; CHECK-NEXT: stp q2, q3, [x0, #96]
511
+ ; CHECK-NEXT: trn1 v6.4s, v1.4s, v18.4s
512
+ ; CHECK-NEXT: trn1 v19.4s, v2.4s, v4.4s
513
+ ; CHECK-NEXT: trn2 v1.4s, v1.4s, v18.4s
514
+ ; CHECK-NEXT: trn2 v0.4s, v0.4s, v3.4s
515
+ ; CHECK-NEXT: trn2 v2.4s, v2.4s, v4.4s
516
+ ; CHECK-NEXT: zip1 v3.4s, v5.4s, v6.4s
517
+ ; CHECK-NEXT: zip1 v4.4s, v7.4s, v19.4s
518
+ ; CHECK-NEXT: zip1 v17.4s, v16.4s, v1.4s
519
+ ; CHECK-NEXT: zip1 v18.4s, v0.4s, v2.4s
520
+ ; CHECK-NEXT: zip2 v5.4s, v5.4s, v6.4s
521
+ ; CHECK-NEXT: zip2 v1.4s, v16.4s, v1.4s
522
+ ; CHECK-NEXT: zip2 v0.4s, v0.4s, v2.4s
523
+ ; CHECK-NEXT: stp q3, q4, [x0]
524
+ ; CHECK-NEXT: zip2 v3.4s, v7.4s, v19.4s
525
+ ; CHECK-NEXT: stp q17, q18, [x0, #32]
526
+ ; CHECK-NEXT: stp q1, q0, [x0, #96]
527
+ ; CHECK-NEXT: stp q5, q3, [x0, #64]
531
528
; CHECK-NEXT: ret
532
529
%2 = load <8 x i16 >, ptr %0 , align 16
533
530
%3 = getelementptr inbounds <8 x i16 >, ptr %0 , i64 1
0 commit comments