You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/RISCV/rvv/scalable-vectors-interleaved-access.ll
+107Lines changed: 107 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -507,6 +507,113 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
507
507
ret { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res3
508
508
}
509
509
510
+
; We should not transform this function because the expression is not a balanced tree.
511
+
define {<vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32>} @not_balanced_load_tree(ptr%ptr, i32%rvl) {
512
+
; RV32-LABEL: not_balanced_load_tree:
513
+
; RV32: # %bb.0:
514
+
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
515
+
; RV32-NEXT: vle32.v v12, (a0)
516
+
; RV32-NEXT: li a0, 32
517
+
; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
518
+
; RV32-NEXT: vnsrl.wx v8, v12, a0
519
+
; RV32-NEXT: vnsrl.wi v16, v12, 0
520
+
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
521
+
; RV32-NEXT: vnsrl.wi v10, v16, 0
522
+
; RV32-NEXT: vnsrl.wx v11, v16, a0
523
+
; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
524
+
; RV32-NEXT: vnsrl.wx v12, v11, a0
525
+
; RV32-NEXT: vnsrl.wi v11, v11, 0
526
+
; RV32-NEXT: ret
527
+
;
528
+
; RV64-LABEL: not_balanced_load_tree:
529
+
; RV64: # %bb.0:
530
+
; RV64-NEXT: slli a1, a1, 32
531
+
; RV64-NEXT: srli a1, a1, 32
532
+
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
533
+
; RV64-NEXT: vle32.v v12, (a0)
534
+
; RV64-NEXT: li a0, 32
535
+
; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
536
+
; RV64-NEXT: vnsrl.wx v8, v12, a0
537
+
; RV64-NEXT: vnsrl.wi v16, v12, 0
538
+
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
539
+
; RV64-NEXT: vnsrl.wi v10, v16, 0
540
+
; RV64-NEXT: vnsrl.wx v11, v16, a0
541
+
; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
542
+
; RV64-NEXT: vnsrl.wx v12, v11, a0
543
+
; RV64-NEXT: vnsrl.wi v11, v11, 0
544
+
; RV64-NEXT: ret
545
+
%wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr%ptr, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1true, i320), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32%rvl)
546
+
%d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
547
+
%d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
548
+
%t0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 1
549
+
%d1 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.0)
550
+
%t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 0
551
+
%d1.1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 1
552
+
%d2 = call { <vscale x 1 x i32>, <vscale x 1 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 2 x i32> %d1.1)
553
+
%t2 = extractvalue { <vscale x 1 x i32>, <vscale x 1 x i32> } %d2, 0
554
+
%t3 = extractvalue { <vscale x 1 x i32>, <vscale x 1 x i32> } %d2, 1
555
+
556
+
%res0 = insertvalue { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } undef, <vscale x 4 x i32> %t0, 0
557
+
%res1 = insertvalue { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } %res0, <vscale x 2 x i32> %t1, 1
558
+
%res2 = insertvalue { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } %res1, <vscale x 1 x i32> %t2, 2
559
+
%res3 = insertvalue { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } %res2, <vscale x 1 x i32> %t3, 3
560
+
ret { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } %res3
561
+
}
562
+
563
+
definevoid@not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32> %v1, <vscale x 4 x i32> %v2, ptr%ptr, i32%rvl) {
564
+
; RV32-LABEL: not_balanced_store_tree:
565
+
; RV32: # %bb.0:
566
+
; RV32-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
567
+
; RV32-NEXT: vwaddu.vv v12, v8, v8
568
+
; RV32-NEXT: li a2, -1
569
+
; RV32-NEXT: csrr a3, vlenb
570
+
; RV32-NEXT: vwmaccu.vx v12, a2, v8
571
+
; RV32-NEXT: srli a3, a3, 3
572
+
; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma
573
+
; RV32-NEXT: vslidedown.vx v8, v12, a3
574
+
; RV32-NEXT: add a4, a3, a3
575
+
; RV32-NEXT: vsetvli zero, a4, e32, m1, ta, ma
576
+
; RV32-NEXT: vslideup.vx v12, v8, a3
577
+
; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
578
+
; RV32-NEXT: vwaddu.vv v14, v12, v9
579
+
; RV32-NEXT: vwmaccu.vx v14, a2, v9
580
+
; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma
581
+
; RV32-NEXT: vwaddu.vv v16, v14, v10
582
+
; RV32-NEXT: vwmaccu.vx v16, a2, v10
583
+
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
584
+
; RV32-NEXT: vse32.v v16, (a0)
585
+
; RV32-NEXT: ret
586
+
;
587
+
; RV64-LABEL: not_balanced_store_tree:
588
+
; RV64: # %bb.0:
589
+
; RV64-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
590
+
; RV64-NEXT: vwaddu.vv v12, v8, v8
591
+
; RV64-NEXT: li a2, -1
592
+
; RV64-NEXT: csrr a3, vlenb
593
+
; RV64-NEXT: slli a1, a1, 32
594
+
; RV64-NEXT: vwmaccu.vx v12, a2, v8
595
+
; RV64-NEXT: srli a3, a3, 3
596
+
; RV64-NEXT: vsetvli a4, zero, e32, m1, ta, ma
597
+
; RV64-NEXT: vslidedown.vx v8, v12, a3
598
+
; RV64-NEXT: add a4, a3, a3
599
+
; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma
600
+
; RV64-NEXT: vslideup.vx v12, v8, a3
601
+
; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma
602
+
; RV64-NEXT: vwaddu.vv v14, v12, v9
603
+
; RV64-NEXT: vwmaccu.vx v14, a2, v9
604
+
; RV64-NEXT: vsetvli a3, zero, e32, m2, ta, ma
605
+
; RV64-NEXT: vwaddu.vv v16, v14, v10
606
+
; RV64-NEXT: vwmaccu.vx v16, a2, v10
607
+
; RV64-NEXT: srli a1, a1, 32
608
+
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
609
+
; RV64-NEXT: vse32.v v16, (a0)
610
+
; RV64-NEXT: ret
611
+
%interleaved.vec0 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
612
+
%interleaved.vec1 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 2 x i32> %interleaved.vec0, <vscale x 2 x i32> %v1)
613
+
%interleaved.vec2 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 4 x i32> %interleaved.vec1, <vscale x 4 x i32> %v2)
614
+
callvoid@llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %interleaved.vec2, ptr%ptr, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1true, i320), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32%rvl)
615
+
retvoid
616
+
}
510
617
511
618
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
0 commit comments