Skip to content

Commit bc569f6

Browse files
committed
[RISCV] Add test case for shufflevector that gets scalarized. NFC
This shufflevector gets scalarized into a build_vector of extract_vector_elts because the output type doesn't match the input vector type. Normally this is combined back into a vector_shuffle in DAGCombine, but this one fails because we don't consider a extract_subvector to be cheap, specifically because it's at an index > 31. This should be canonicalized back into a vector_shuffle at some point so we can lower it as a vrgather.vv.
1 parent 2f7d9ab commit bc569f6

File tree

1 file changed

+97
-0
lines changed

1 file changed

+97
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,3 +719,100 @@ define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) {
719719
%s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
720720
ret <8 x i32> %s
721721
}
722+
723+
; FIXME: This could be expressed as a vrgather.vv
724+
define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) {
725+
; RV32-LABEL: shuffle_v64i8_v8i8:
726+
; RV32: # %bb.0:
727+
; RV32-NEXT: addi sp, sp, -128
728+
; RV32-NEXT: .cfi_def_cfa_offset 128
729+
; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
730+
; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
731+
; RV32-NEXT: .cfi_offset ra, -4
732+
; RV32-NEXT: .cfi_offset s0, -8
733+
; RV32-NEXT: addi s0, sp, 128
734+
; RV32-NEXT: .cfi_def_cfa s0, 0
735+
; RV32-NEXT: andi sp, sp, -64
736+
; RV32-NEXT: li a0, 64
737+
; RV32-NEXT: mv a1, sp
738+
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
739+
; RV32-NEXT: vse8.v v8, (a1)
740+
; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
741+
; RV32-NEXT: vslidedown.vi v10, v8, 8
742+
; RV32-NEXT: vmv.x.s a0, v10
743+
; RV32-NEXT: vmv.x.s a1, v8
744+
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
745+
; RV32-NEXT: vmv.v.x v10, a1
746+
; RV32-NEXT: vslide1down.vx v10, v10, a0
747+
; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
748+
; RV32-NEXT: vslidedown.vi v12, v8, 16
749+
; RV32-NEXT: vmv.x.s a0, v12
750+
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
751+
; RV32-NEXT: vslide1down.vx v10, v10, a0
752+
; RV32-NEXT: vsetivli zero, 1, e8, m2, ta, ma
753+
; RV32-NEXT: vslidedown.vi v8, v8, 24
754+
; RV32-NEXT: vmv.x.s a0, v8
755+
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
756+
; RV32-NEXT: vslide1down.vx v8, v10, a0
757+
; RV32-NEXT: lbu a0, 32(sp)
758+
; RV32-NEXT: lbu a1, 40(sp)
759+
; RV32-NEXT: lbu a2, 48(sp)
760+
; RV32-NEXT: lbu a3, 56(sp)
761+
; RV32-NEXT: vslide1down.vx v8, v8, a0
762+
; RV32-NEXT: vslide1down.vx v8, v8, a1
763+
; RV32-NEXT: vslide1down.vx v8, v8, a2
764+
; RV32-NEXT: vslide1down.vx v8, v8, a3
765+
; RV32-NEXT: addi sp, s0, -128
766+
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
767+
; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
768+
; RV32-NEXT: addi sp, sp, 128
769+
; RV32-NEXT: ret
770+
;
771+
; RV64-LABEL: shuffle_v64i8_v8i8:
772+
; RV64: # %bb.0:
773+
; RV64-NEXT: addi sp, sp, -128
774+
; RV64-NEXT: .cfi_def_cfa_offset 128
775+
; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
776+
; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
777+
; RV64-NEXT: .cfi_offset ra, -8
778+
; RV64-NEXT: .cfi_offset s0, -16
779+
; RV64-NEXT: addi s0, sp, 128
780+
; RV64-NEXT: .cfi_def_cfa s0, 0
781+
; RV64-NEXT: andi sp, sp, -64
782+
; RV64-NEXT: li a0, 64
783+
; RV64-NEXT: mv a1, sp
784+
; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
785+
; RV64-NEXT: vse8.v v8, (a1)
786+
; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
787+
; RV64-NEXT: vslidedown.vi v10, v8, 8
788+
; RV64-NEXT: vmv.x.s a0, v10
789+
; RV64-NEXT: vmv.x.s a1, v8
790+
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
791+
; RV64-NEXT: vmv.v.x v10, a1
792+
; RV64-NEXT: vslide1down.vx v10, v10, a0
793+
; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
794+
; RV64-NEXT: vslidedown.vi v12, v8, 16
795+
; RV64-NEXT: vmv.x.s a0, v12
796+
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
797+
; RV64-NEXT: vslide1down.vx v10, v10, a0
798+
; RV64-NEXT: vsetivli zero, 1, e8, m2, ta, ma
799+
; RV64-NEXT: vslidedown.vi v8, v8, 24
800+
; RV64-NEXT: vmv.x.s a0, v8
801+
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
802+
; RV64-NEXT: vslide1down.vx v8, v10, a0
803+
; RV64-NEXT: lbu a0, 32(sp)
804+
; RV64-NEXT: lbu a1, 40(sp)
805+
; RV64-NEXT: lbu a2, 48(sp)
806+
; RV64-NEXT: lbu a3, 56(sp)
807+
; RV64-NEXT: vslide1down.vx v8, v8, a0
808+
; RV64-NEXT: vslide1down.vx v8, v8, a1
809+
; RV64-NEXT: vslide1down.vx v8, v8, a2
810+
; RV64-NEXT: vslide1down.vx v8, v8, a3
811+
; RV64-NEXT: addi sp, s0, -128
812+
; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
813+
; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
814+
; RV64-NEXT: addi sp, sp, 128
815+
; RV64-NEXT: ret
816+
%s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56>
817+
ret <8 x i8> %s
818+
}

0 commit comments

Comments
 (0)