@@ -719,3 +719,100 @@ define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) {
719
719
%s = shufflevector <8 x i32 > %x , <8 x i32 > %y , <8 x i32 > <i32 0 , i32 1 , i32 10 , i32 11 , i32 4 , i32 5 , i32 6 , i32 7 >
720
720
ret <8 x i32 > %s
721
721
}
722
+
723
+ ; FIXME: This could be expressed as a vrgather.vv
724
+ define <8 x i8 > @shuffle_v64i8_v8i8 (<64 x i8 > %wide.vec ) {
725
+ ; RV32-LABEL: shuffle_v64i8_v8i8:
726
+ ; RV32: # %bb.0:
727
+ ; RV32-NEXT: addi sp, sp, -128
728
+ ; RV32-NEXT: .cfi_def_cfa_offset 128
729
+ ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
730
+ ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
731
+ ; RV32-NEXT: .cfi_offset ra, -4
732
+ ; RV32-NEXT: .cfi_offset s0, -8
733
+ ; RV32-NEXT: addi s0, sp, 128
734
+ ; RV32-NEXT: .cfi_def_cfa s0, 0
735
+ ; RV32-NEXT: andi sp, sp, -64
736
+ ; RV32-NEXT: li a0, 64
737
+ ; RV32-NEXT: mv a1, sp
738
+ ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
739
+ ; RV32-NEXT: vse8.v v8, (a1)
740
+ ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
741
+ ; RV32-NEXT: vslidedown.vi v10, v8, 8
742
+ ; RV32-NEXT: vmv.x.s a0, v10
743
+ ; RV32-NEXT: vmv.x.s a1, v8
744
+ ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
745
+ ; RV32-NEXT: vmv.v.x v10, a1
746
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
747
+ ; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
748
+ ; RV32-NEXT: vslidedown.vi v12, v8, 16
749
+ ; RV32-NEXT: vmv.x.s a0, v12
750
+ ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
751
+ ; RV32-NEXT: vslide1down.vx v10, v10, a0
752
+ ; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
753
+ ; RV32-NEXT: vslidedown.vi v8, v8, 24
754
+ ; RV32-NEXT: vmv.x.s a0, v8
755
+ ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
756
+ ; RV32-NEXT: vslide1down.vx v8, v10, a0
757
+ ; RV32-NEXT: lbu a0, 32(sp)
758
+ ; RV32-NEXT: lbu a1, 40(sp)
759
+ ; RV32-NEXT: lbu a2, 48(sp)
760
+ ; RV32-NEXT: lbu a3, 56(sp)
761
+ ; RV32-NEXT: vslide1down.vx v8, v8, a0
762
+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
763
+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
764
+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
765
+ ; RV32-NEXT: addi sp, s0, -128
766
+ ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
767
+ ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
768
+ ; RV32-NEXT: addi sp, sp, 128
769
+ ; RV32-NEXT: ret
770
+ ;
771
+ ; RV64-LABEL: shuffle_v64i8_v8i8:
772
+ ; RV64: # %bb.0:
773
+ ; RV64-NEXT: addi sp, sp, -128
774
+ ; RV64-NEXT: .cfi_def_cfa_offset 128
775
+ ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
776
+ ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
777
+ ; RV64-NEXT: .cfi_offset ra, -8
778
+ ; RV64-NEXT: .cfi_offset s0, -16
779
+ ; RV64-NEXT: addi s0, sp, 128
780
+ ; RV64-NEXT: .cfi_def_cfa s0, 0
781
+ ; RV64-NEXT: andi sp, sp, -64
782
+ ; RV64-NEXT: li a0, 64
783
+ ; RV64-NEXT: mv a1, sp
784
+ ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
785
+ ; RV64-NEXT: vse8.v v8, (a1)
786
+ ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
787
+ ; RV64-NEXT: vslidedown.vi v10, v8, 8
788
+ ; RV64-NEXT: vmv.x.s a0, v10
789
+ ; RV64-NEXT: vmv.x.s a1, v8
790
+ ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
791
+ ; RV64-NEXT: vmv.v.x v10, a1
792
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
793
+ ; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
794
+ ; RV64-NEXT: vslidedown.vi v12, v8, 16
795
+ ; RV64-NEXT: vmv.x.s a0, v12
796
+ ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
797
+ ; RV64-NEXT: vslide1down.vx v10, v10, a0
798
+ ; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
799
+ ; RV64-NEXT: vslidedown.vi v8, v8, 24
800
+ ; RV64-NEXT: vmv.x.s a0, v8
801
+ ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
802
+ ; RV64-NEXT: vslide1down.vx v8, v10, a0
803
+ ; RV64-NEXT: lbu a0, 32(sp)
804
+ ; RV64-NEXT: lbu a1, 40(sp)
805
+ ; RV64-NEXT: lbu a2, 48(sp)
806
+ ; RV64-NEXT: lbu a3, 56(sp)
807
+ ; RV64-NEXT: vslide1down.vx v8, v8, a0
808
+ ; RV64-NEXT: vslide1down.vx v8, v8, a1
809
+ ; RV64-NEXT: vslide1down.vx v8, v8, a2
810
+ ; RV64-NEXT: vslide1down.vx v8, v8, a3
811
+ ; RV64-NEXT: addi sp, s0, -128
812
+ ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
813
+ ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
814
+ ; RV64-NEXT: addi sp, sp, 128
815
+ ; RV64-NEXT: ret
816
+ %s = shufflevector <64 x i8 > %wide.vec , <64 x i8 > poison, <8 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 , i32 32 , i32 40 , i32 48 , i32 56 >
817
+ ret <8 x i8 > %s
818
+ }
0 commit comments