@@ -722,97 +722,25 @@ define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) {
722
722
723
723
; FIXME: This could be expressed as a vrgather.vv
724
724
define <8 x i8 > @shuffle_v64i8_v8i8 (<64 x i8 > %wide.vec ) {
725
- ; RV32-LABEL: shuffle_v64i8_v8i8:
726
- ; RV32: # %bb.0:
727
- ; RV32-NEXT: addi sp, sp, -128
728
- ; RV32-NEXT: .cfi_def_cfa_offset 128
729
- ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
730
- ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
731
- ; RV32-NEXT: .cfi_offset ra, -4
732
- ; RV32-NEXT: .cfi_offset s0, -8
733
- ; RV32-NEXT: addi s0, sp, 128
734
- ; RV32-NEXT: .cfi_def_cfa s0, 0
735
- ; RV32-NEXT: andi sp, sp, -64
736
- ; RV32-NEXT: li a0, 64
737
- ; RV32-NEXT: mv a1, sp
738
- ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
739
- ; RV32-NEXT: vse8.v v8, (a1)
740
- ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
741
- ; RV32-NEXT: vslidedown.vi v10, v8, 8
742
- ; RV32-NEXT: vmv.x.s a0, v10
743
- ; RV32-NEXT: vmv.x.s a1, v8
744
- ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
745
- ; RV32-NEXT: vmv.v.x v10, a1
746
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
747
- ; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
748
- ; RV32-NEXT: vslidedown.vi v12, v8, 16
749
- ; RV32-NEXT: vmv.x.s a0, v12
750
- ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
751
- ; RV32-NEXT: vslide1down.vx v10, v10, a0
752
- ; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
753
- ; RV32-NEXT: vslidedown.vi v8, v8, 24
754
- ; RV32-NEXT: vmv.x.s a0, v8
755
- ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
756
- ; RV32-NEXT: vslide1down.vx v8, v10, a0
757
- ; RV32-NEXT: lbu a0, 32(sp)
758
- ; RV32-NEXT: lbu a1, 40(sp)
759
- ; RV32-NEXT: lbu a2, 48(sp)
760
- ; RV32-NEXT: lbu a3, 56(sp)
761
- ; RV32-NEXT: vslide1down.vx v8, v8, a0
762
- ; RV32-NEXT: vslide1down.vx v8, v8, a1
763
- ; RV32-NEXT: vslide1down.vx v8, v8, a2
764
- ; RV32-NEXT: vslide1down.vx v8, v8, a3
765
- ; RV32-NEXT: addi sp, s0, -128
766
- ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
767
- ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
768
- ; RV32-NEXT: addi sp, sp, 128
769
- ; RV32-NEXT: ret
770
- ;
771
- ; RV64-LABEL: shuffle_v64i8_v8i8:
772
- ; RV64: # %bb.0:
773
- ; RV64-NEXT: addi sp, sp, -128
774
- ; RV64-NEXT: .cfi_def_cfa_offset 128
775
- ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
776
- ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
777
- ; RV64-NEXT: .cfi_offset ra, -8
778
- ; RV64-NEXT: .cfi_offset s0, -16
779
- ; RV64-NEXT: addi s0, sp, 128
780
- ; RV64-NEXT: .cfi_def_cfa s0, 0
781
- ; RV64-NEXT: andi sp, sp, -64
782
- ; RV64-NEXT: li a0, 64
783
- ; RV64-NEXT: mv a1, sp
784
- ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
785
- ; RV64-NEXT: vse8.v v8, (a1)
786
- ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
787
- ; RV64-NEXT: vslidedown.vi v10, v8, 8
788
- ; RV64-NEXT: vmv.x.s a0, v10
789
- ; RV64-NEXT: vmv.x.s a1, v8
790
- ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
791
- ; RV64-NEXT: vmv.v.x v10, a1
792
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
793
- ; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
794
- ; RV64-NEXT: vslidedown.vi v12, v8, 16
795
- ; RV64-NEXT: vmv.x.s a0, v12
796
- ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
797
- ; RV64-NEXT: vslide1down.vx v10, v10, a0
798
- ; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
799
- ; RV64-NEXT: vslidedown.vi v8, v8, 24
800
- ; RV64-NEXT: vmv.x.s a0, v8
801
- ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
802
- ; RV64-NEXT: vslide1down.vx v8, v10, a0
803
- ; RV64-NEXT: lbu a0, 32(sp)
804
- ; RV64-NEXT: lbu a1, 40(sp)
805
- ; RV64-NEXT: lbu a2, 48(sp)
806
- ; RV64-NEXT: lbu a3, 56(sp)
807
- ; RV64-NEXT: vslide1down.vx v8, v8, a0
808
- ; RV64-NEXT: vslide1down.vx v8, v8, a1
809
- ; RV64-NEXT: vslide1down.vx v8, v8, a2
810
- ; RV64-NEXT: vslide1down.vx v8, v8, a3
811
- ; RV64-NEXT: addi sp, s0, -128
812
- ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
813
- ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
814
- ; RV64-NEXT: addi sp, sp, 128
815
- ; RV64-NEXT: ret
725
+ ; CHECK-LABEL: shuffle_v64i8_v8i8:
726
+ ; CHECK: # %bb.0:
727
+ ; CHECK-NEXT: li a0, 32
728
+ ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
729
+ ; CHECK-NEXT: vid.v v12
730
+ ; CHECK-NEXT: vsll.vi v14, v12, 3
731
+ ; CHECK-NEXT: vrgather.vv v12, v8, v14
732
+ ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
733
+ ; CHECK-NEXT: vslidedown.vx v8, v8, a0
734
+ ; CHECK-NEXT: li a1, 240
735
+ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
736
+ ; CHECK-NEXT: vmv.s.x v0, a1
737
+ ; CHECK-NEXT: lui a1, 98561
738
+ ; CHECK-NEXT: addi a1, a1, -2048
739
+ ; CHECK-NEXT: vmv.v.x v10, a1
740
+ ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
741
+ ; CHECK-NEXT: vrgather.vv v12, v8, v10, v0.t
742
+ ; CHECK-NEXT: vmv1r.v v8, v12
743
+ ; CHECK-NEXT: ret
816
744
%s = shufflevector <64 x i8 > %wide.vec , <64 x i8 > poison, <8 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 , i32 32 , i32 40 , i32 48 , i32 56 >
817
745
ret <8 x i8 > %s
818
746
}
0 commit comments