@@ -564,3 +564,133 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
564
564
store <8 x i64 > %d , ptr %x
565
565
ret void
566
566
}
567
+
568
+ ; The next batch of tests cover inserts into high LMUL vectors when the
569
+ ; exact VLEM is known. FIXME: These can directly access the sub-registers
570
+
571
+ define <16 x i32 > @insertelt_c0_v16xi32_exact (<16 x i32 > %vin , i32 %a ) vscale_range(2 ,2 ) {
572
+ ; CHECK-LABEL: insertelt_c0_v16xi32_exact:
573
+ ; CHECK: # %bb.0:
574
+ ; CHECK-NEXT: vsetivli zero, 16, e32, m1, tu, ma
575
+ ; CHECK-NEXT: vmv.s.x v8, a0
576
+ ; CHECK-NEXT: ret
577
+ %v = insertelement <16 x i32 > %vin , i32 %a , i32 0
578
+ ret <16 x i32 > %v
579
+ }
580
+
581
+ define <16 x i32 > @insertelt_c1_v16xi32_exact (<16 x i32 > %vin , i32 %a ) vscale_range(2 ,2 ) {
582
+ ; CHECK-LABEL: insertelt_c1_v16xi32_exact:
583
+ ; CHECK: # %bb.0:
584
+ ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
585
+ ; CHECK-NEXT: vmv.s.x v12, a0
586
+ ; CHECK-NEXT: vslideup.vi v8, v12, 1
587
+ ; CHECK-NEXT: ret
588
+ %v = insertelement <16 x i32 > %vin , i32 %a , i32 1
589
+ ret <16 x i32 > %v
590
+ }
591
+
592
+ define <16 x i32 > @insertelt_c2_v16xi32_exact (<16 x i32 > %vin , i32 %a ) vscale_range(2 ,2 ) {
593
+ ; CHECK-LABEL: insertelt_c2_v16xi32_exact:
594
+ ; CHECK: # %bb.0:
595
+ ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
596
+ ; CHECK-NEXT: vmv.s.x v12, a0
597
+ ; CHECK-NEXT: vslideup.vi v8, v12, 2
598
+ ; CHECK-NEXT: ret
599
+ %v = insertelement <16 x i32 > %vin , i32 %a , i32 2
600
+ ret <16 x i32 > %v
601
+ }
602
+
603
+ define <16 x i32 > @insertelt_c3_v16xi32_exact (<16 x i32 > %vin , i32 %a ) vscale_range(2 ,2 ) {
604
+ ; CHECK-LABEL: insertelt_c3_v16xi32_exact:
605
+ ; CHECK: # %bb.0:
606
+ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
607
+ ; CHECK-NEXT: vmv.s.x v12, a0
608
+ ; CHECK-NEXT: vslideup.vi v8, v12, 3
609
+ ; CHECK-NEXT: ret
610
+ %v = insertelement <16 x i32 > %vin , i32 %a , i32 3
611
+ ret <16 x i32 > %v
612
+ }
613
+
614
+ define <16 x i32 > @insertelt_c12_v16xi32_exact (<16 x i32 > %vin , i32 %a ) vscale_range(2 ,2 ) {
615
+ ; CHECK-LABEL: insertelt_c12_v16xi32_exact:
616
+ ; CHECK: # %bb.0:
617
+ ; CHECK-NEXT: vsetivli zero, 13, e32, m4, tu, ma
618
+ ; CHECK-NEXT: vmv.s.x v12, a0
619
+ ; CHECK-NEXT: vslideup.vi v8, v12, 12
620
+ ; CHECK-NEXT: ret
621
+ %v = insertelement <16 x i32 > %vin , i32 %a , i32 12
622
+ ret <16 x i32 > %v
623
+ }
624
+
625
+ define <16 x i32 > @insertelt_c13_v16xi32_exact (<16 x i32 > %vin , i32 %a ) vscale_range(2 ,2 ) {
626
+ ; CHECK-LABEL: insertelt_c13_v16xi32_exact:
627
+ ; CHECK: # %bb.0:
628
+ ; CHECK-NEXT: vsetivli zero, 14, e32, m4, tu, ma
629
+ ; CHECK-NEXT: vmv.s.x v12, a0
630
+ ; CHECK-NEXT: vslideup.vi v8, v12, 13
631
+ ; CHECK-NEXT: ret
632
+ %v = insertelement <16 x i32 > %vin , i32 %a , i32 13
633
+ ret <16 x i32 > %v
634
+ }
635
+
636
+ define <16 x i32 > @insertelt_c14_v16xi32_exact (<16 x i32 > %vin , i32 %a ) vscale_range(2 ,2 ) {
637
+ ; CHECK-LABEL: insertelt_c14_v16xi32_exact:
638
+ ; CHECK: # %bb.0:
639
+ ; CHECK-NEXT: vsetivli zero, 15, e32, m4, tu, ma
640
+ ; CHECK-NEXT: vmv.s.x v12, a0
641
+ ; CHECK-NEXT: vslideup.vi v8, v12, 14
642
+ ; CHECK-NEXT: ret
643
+ %v = insertelement <16 x i32 > %vin , i32 %a , i32 14
644
+ ret <16 x i32 > %v
645
+ }
646
+
647
+ define <16 x i32 > @insertelt_c15_v16xi32_exact (<16 x i32 > %vin , i32 %a ) vscale_range(2 ,2 ) {
648
+ ; CHECK-LABEL: insertelt_c15_v16xi32_exact:
649
+ ; CHECK: # %bb.0:
650
+ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
651
+ ; CHECK-NEXT: vmv.s.x v12, a0
652
+ ; CHECK-NEXT: vslideup.vi v8, v12, 15
653
+ ; CHECK-NEXT: ret
654
+ %v = insertelement <16 x i32 > %vin , i32 %a , i32 15
655
+ ret <16 x i32 > %v
656
+ }
657
+
658
+ define <8 x i64 > @insertelt_c4_v8xi64_exact (<8 x i64 > %vin , i64 %a ) vscale_range(2 ,2 ) {
659
+ ; RV32-LABEL: insertelt_c4_v8xi64_exact:
660
+ ; RV32: # %bb.0:
661
+ ; RV32-NEXT: vsetivli zero, 2, e32, m4, ta, ma
662
+ ; RV32-NEXT: vslide1down.vx v12, v8, a0
663
+ ; RV32-NEXT: vslide1down.vx v12, v12, a1
664
+ ; RV32-NEXT: vsetivli zero, 5, e64, m4, tu, ma
665
+ ; RV32-NEXT: vslideup.vi v8, v12, 4
666
+ ; RV32-NEXT: ret
667
+ ;
668
+ ; RV64-LABEL: insertelt_c4_v8xi64_exact:
669
+ ; RV64: # %bb.0:
670
+ ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
671
+ ; RV64-NEXT: vmv.s.x v12, a0
672
+ ; RV64-NEXT: vslideup.vi v8, v12, 4
673
+ ; RV64-NEXT: ret
674
+ %v = insertelement <8 x i64 > %vin , i64 %a , i32 4
675
+ ret <8 x i64 > %v
676
+ }
677
+
678
+ define <8 x i64 > @insertelt_c5_v8xi64_exact (<8 x i64 > %vin , i64 %a ) vscale_range(2 ,2 ) {
679
+ ; RV32-LABEL: insertelt_c5_v8xi64_exact:
680
+ ; RV32: # %bb.0:
681
+ ; RV32-NEXT: vsetivli zero, 2, e32, m4, ta, ma
682
+ ; RV32-NEXT: vslide1down.vx v12, v8, a0
683
+ ; RV32-NEXT: vslide1down.vx v12, v12, a1
684
+ ; RV32-NEXT: vsetivli zero, 6, e64, m4, tu, ma
685
+ ; RV32-NEXT: vslideup.vi v8, v12, 5
686
+ ; RV32-NEXT: ret
687
+ ;
688
+ ; RV64-LABEL: insertelt_c5_v8xi64_exact:
689
+ ; RV64: # %bb.0:
690
+ ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
691
+ ; RV64-NEXT: vmv.s.x v12, a0
692
+ ; RV64-NEXT: vslideup.vi v8, v12, 5
693
+ ; RV64-NEXT: ret
694
+ %v = insertelement <8 x i64 > %vin , i64 %a , i32 5
695
+ ret <8 x i64 > %v
696
+ }
0 commit comments