Skip to content

Commit c55a080

Browse files
committed
[RISCV] Add shuffle coverage for compress, decompress, and repeat idioms
compress is intented to match vcompress from the ISA manual. Note that deinterleave is a subset of this, and is already tested elsewhere. decompress is the synthetic pattern defined in same - though we can often do better than the mentioned iota/vrgather. Note that some of these can also be expressed as interleave with at least one undef source, and is already tested elsewhere. repeat repeats each input element N times in the output. It can be described as as a interleave operations, but we can sometimes do better lowering wise.
1 parent b1a34b8 commit c55a080

File tree

1 file changed

+197
-0
lines changed

1 file changed

+197
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,3 +744,200 @@ define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) {
744744
%s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56>
745745
ret <8 x i8> %s
746746
}
747+
748+
define <8 x i8> @shuffle_compress_singlesrc_e8(<8 x i8> %v) {
749+
; CHECK-LABEL: shuffle_compress_singlesrc_e8:
750+
; CHECK: # %bb.0:
751+
; CHECK-NEXT: lui a0, %hi(.LCPI49_0)
752+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0)
753+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
754+
; CHECK-NEXT: vle8.v v10, (a0)
755+
; CHECK-NEXT: vrgather.vv v9, v8, v10
756+
; CHECK-NEXT: vmv1r.v v8, v9
757+
; CHECK-NEXT: ret
758+
%out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
759+
ret <8 x i8> %out
760+
}
761+
762+
define <8 x i16> @shuffle_compress_singlesrc_e16(<8 x i16> %v) {
763+
; CHECK-LABEL: shuffle_compress_singlesrc_e16:
764+
; CHECK: # %bb.0:
765+
; CHECK-NEXT: lui a0, %hi(.LCPI50_0)
766+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0)
767+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
768+
; CHECK-NEXT: vle16.v v10, (a0)
769+
; CHECK-NEXT: vrgather.vv v9, v8, v10
770+
; CHECK-NEXT: vmv.v.v v8, v9
771+
; CHECK-NEXT: ret
772+
%out = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
773+
ret <8 x i16> %out
774+
}
775+
776+
define <8 x i32> @shuffle_compress_singlesrc_e32(<8 x i32> %v) {
777+
; CHECK-LABEL: shuffle_compress_singlesrc_e32:
778+
; CHECK: # %bb.0:
779+
; CHECK-NEXT: lui a0, %hi(.LCPI51_0)
780+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0)
781+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
782+
; CHECK-NEXT: vle16.v v12, (a0)
783+
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
784+
; CHECK-NEXT: vmv.v.v v8, v10
785+
; CHECK-NEXT: ret
786+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef>
787+
ret <8 x i32> %out
788+
}
789+
790+
define <8 x i64> @shuffle_compress_singlesrc_e64(<8 x i64> %v) {
791+
; CHECK-LABEL: shuffle_compress_singlesrc_e64:
792+
; CHECK: # %bb.0:
793+
; CHECK-NEXT: lui a0, %hi(.LCPI52_0)
794+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0)
795+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
796+
; CHECK-NEXT: vle16.v v16, (a0)
797+
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
798+
; CHECK-NEXT: vmv.v.v v8, v12
799+
; CHECK-NEXT: ret
800+
%out = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
801+
ret <8 x i64> %out
802+
}
803+
804+
define <8 x i32> @shuffle_compress_singlesrc_gaps_e32(<8 x i32> %v) {
805+
; CHECK-LABEL: shuffle_compress_singlesrc_gaps_e32:
806+
; CHECK: # %bb.0:
807+
; CHECK-NEXT: lui a0, %hi(.LCPI53_0)
808+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0)
809+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
810+
; CHECK-NEXT: vle16.v v12, (a0)
811+
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
812+
; CHECK-NEXT: vmv.v.v v8, v10
813+
; CHECK-NEXT: ret
814+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 4, i32 5, i32 7, i32 undef, i32 undef, i32 undef>
815+
ret <8 x i32> %out
816+
}
817+
818+
define <8 x i32> @shuffle_decompress2_singlesrc_e32(<8 x i32> %v) {
819+
; CHECK-LABEL: shuffle_decompress2_singlesrc_e32:
820+
; CHECK: # %bb.0:
821+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
822+
; CHECK-NEXT: vwaddu.vv v10, v8, v8
823+
; CHECK-NEXT: li a0, -1
824+
; CHECK-NEXT: vwmaccu.vx v10, a0, v8
825+
; CHECK-NEXT: vmv2r.v v8, v10
826+
; CHECK-NEXT: ret
827+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
828+
ret <8 x i32> %out
829+
}
830+
831+
define <8 x i32> @shuffle_decompress3_singlesrc_e32(<8 x i32> %v) {
832+
; RV32-LABEL: shuffle_decompress3_singlesrc_e32:
833+
; RV32: # %bb.0:
834+
; RV32-NEXT: lui a0, %hi(.LCPI55_0)
835+
; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0)
836+
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
837+
; RV32-NEXT: vle16.v v12, (a0)
838+
; RV32-NEXT: vrgatherei16.vv v10, v8, v12
839+
; RV32-NEXT: vmv.v.v v8, v10
840+
; RV32-NEXT: ret
841+
;
842+
; RV64-LABEL: shuffle_decompress3_singlesrc_e32:
843+
; RV64: # %bb.0:
844+
; RV64-NEXT: lui a0, 32769
845+
; RV64-NEXT: slli a0, a0, 21
846+
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
847+
; RV64-NEXT: vmv.v.x v12, a0
848+
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
849+
; RV64-NEXT: vrgatherei16.vv v10, v8, v12
850+
; RV64-NEXT: vmv.v.v v8, v10
851+
; RV64-NEXT: ret
852+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 2, i32 undef>
853+
ret <8 x i32> %out
854+
}
855+
856+
; TODO: This should be a single vslideup.vi
857+
define <8 x i32> @shuffle_decompress4_singlesrc_e32(<8 x i32> %v) {
858+
; CHECK-LABEL: shuffle_decompress4_singlesrc_e32:
859+
; CHECK: # %bb.0:
860+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
861+
; CHECK-NEXT: vid.v v10
862+
; CHECK-NEXT: vsrl.vi v12, v10, 2
863+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
864+
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
865+
; CHECK-NEXT: vmv.v.v v8, v10
866+
; CHECK-NEXT: ret
867+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
868+
ret <8 x i32> %out
869+
}
870+
871+
; TODO: This should be either a single vslideup.vi or two widening interleaves.
872+
define <8 x i8> @shuffle_decompress4_singlesrc_e8(<8 x i8> %v) {
873+
; CHECK-LABEL: shuffle_decompress4_singlesrc_e8:
874+
; CHECK: # %bb.0:
875+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
876+
; CHECK-NEXT: vid.v v9
877+
; CHECK-NEXT: vsrl.vi v10, v9, 2
878+
; CHECK-NEXT: vrgather.vv v9, v8, v10
879+
; CHECK-NEXT: vmv1r.v v8, v9
880+
; CHECK-NEXT: ret
881+
%out = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
882+
ret <8 x i8> %out
883+
}
884+
885+
define <8 x i32> @shuffle_decompress_singlesrc_e32(<8 x i32> %v) {
886+
; CHECK-LABEL: shuffle_decompress_singlesrc_e32:
887+
; CHECK: # %bb.0:
888+
; CHECK-NEXT: lui a0, %hi(.LCPI58_0)
889+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0)
890+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
891+
; CHECK-NEXT: vle16.v v12, (a0)
892+
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
893+
; CHECK-NEXT: vmv.v.v v8, v10
894+
; CHECK-NEXT: ret
895+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 undef, i32 4>
896+
ret <8 x i32> %out
897+
}
898+
899+
define <8 x i32> @shuffle_repeat2_singlesrc_e32(<8 x i32> %v) {
900+
; CHECK-LABEL: shuffle_repeat2_singlesrc_e32:
901+
; CHECK: # %bb.0:
902+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
903+
; CHECK-NEXT: vwaddu.vv v10, v8, v8
904+
; CHECK-NEXT: li a0, -1
905+
; CHECK-NEXT: vwmaccu.vx v10, a0, v8
906+
; CHECK-NEXT: vmv2r.v v8, v10
907+
; CHECK-NEXT: ret
908+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
909+
ret <8 x i32> %out
910+
}
911+
912+
define <8 x i32> @shuffle_repeat3_singlesrc_e32(<8 x i32> %v) {
913+
; CHECK-LABEL: shuffle_repeat3_singlesrc_e32:
914+
; CHECK: # %bb.0:
915+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
916+
; CHECK-NEXT: vmv.v.i v0, 7
917+
; CHECK-NEXT: vmv.v.i v11, 1
918+
; CHECK-NEXT: li a0, 192
919+
; CHECK-NEXT: vmv.s.x v10, a0
920+
; CHECK-NEXT: vmerge.vim v11, v11, 0, v0
921+
; CHECK-NEXT: vmv.v.v v0, v10
922+
; CHECK-NEXT: vmerge.vim v12, v11, 2, v0
923+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
924+
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
925+
; CHECK-NEXT: vmv.v.v v8, v10
926+
; CHECK-NEXT: ret
927+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2>
928+
ret <8 x i32> %out
929+
}
930+
931+
define <8 x i32> @shuffle_repeat4_singlesrc_e32(<8 x i32> %v) {
932+
; CHECK-LABEL: shuffle_repeat4_singlesrc_e32:
933+
; CHECK: # %bb.0:
934+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
935+
; CHECK-NEXT: vid.v v10
936+
; CHECK-NEXT: vsrl.vi v12, v10, 2
937+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
938+
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
939+
; CHECK-NEXT: vmv.v.v v8, v10
940+
; CHECK-NEXT: ret
941+
%out = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
942+
ret <8 x i32> %out
943+
}

0 commit comments

Comments
 (0)