Skip to content

Commit e60a939

Browse files
authored
[RISCV] Use zext and shift for spread(4,8) when types allow (#118893)
For a spread with an element type small enough, we can use a zext and shift to perform the shuffle. For e8, this covers spread(2,4,8), and for e16 covers spread(2,4). Note that spread(2) is already covered by the existing interleave logic, and is simply listed for completeness in the prior description.
1 parent 6410edc commit e60a939

File tree

2 files changed

+60
-24
lines changed

2 files changed

+60
-24
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4824,6 +4824,36 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
48244824
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
48254825
}
48264826

4827+
// Match a mask which "spreads" the leading elements of a vector evenly
4828+
// across the result. Factor is the spread amount, and Index is the
4829+
// offset applied. (on success, Index < Factor) This is the inverse
4830+
// of a deinterleave with the same Factor and Index. This is analogous
4831+
// to an interleave, except that all but one lane is undef.
4832+
static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4833+
SmallVector<bool> LaneIsUndef(Factor, true);
4834+
for (unsigned i = 0; i < Mask.size(); i++)
4835+
LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4836+
4837+
bool Found = false;
4838+
for (unsigned i = 0; i < Factor; i++) {
4839+
if (LaneIsUndef[i])
4840+
continue;
4841+
if (Found)
4842+
return false;
4843+
Index = i;
4844+
Found = true;
4845+
}
4846+
if (!Found)
4847+
return false;
4848+
4849+
for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4850+
unsigned j = i * Factor + Index;
4851+
if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4852+
return false;
4853+
}
4854+
return true;
4855+
}
4856+
48274857
// Given a vector a, b, c, d return a vector Factor times longer
48284858
// with Factor-1 undef's between elements. Ex:
48294859
// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
@@ -5417,6 +5447,23 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
54175447
DAG.getUNDEF(VT));
54185448
}
54195449

5450+
// Match a spread(4,8) which can be done via extend and shift. Spread(2)
5451+
// is fully covered in interleave(2) above, so it is ignored here.
5452+
if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5453+
unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5454+
assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5455+
for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5456+
unsigned Index;
5457+
if (isSpreadMask(Mask, Factor, Index)) {
5458+
MVT NarrowVT =
5459+
MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5460+
SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5461+
DAG.getVectorIdxConstant(0, DL));
5462+
return getWideningSpread(Src, Factor, Index, DL, DAG);
5463+
}
5464+
}
5465+
}
5466+
54205467
if (VT.getScalarSizeInBits() == 8 &&
54215468
any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
54225469
// On such a vector we're unable to use i8 as the index type.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -879,10 +879,8 @@ define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) {
879879
define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) {
880880
; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx0:
881881
; CHECK: # %bb.0:
882-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
883-
; CHECK-NEXT: vid.v v9
884-
; CHECK-NEXT: vsrl.vi v10, v9, 2
885-
; CHECK-NEXT: vrgather.vv v9, v8, v10
882+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
883+
; CHECK-NEXT: vzext.vf4 v9, v8
886884
; CHECK-NEXT: vmv.v.v v8, v9
887885
; CHECK-NEXT: ret
888886
%out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
@@ -892,11 +890,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) {
892890
define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) {
893891
; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx1:
894892
; CHECK: # %bb.0:
895-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
896-
; CHECK-NEXT: vid.v v9
897-
; CHECK-NEXT: vsrl.vi v10, v9, 2
898-
; CHECK-NEXT: vrgather.vv v9, v8, v10
899-
; CHECK-NEXT: vmv.v.v v8, v9
893+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
894+
; CHECK-NEXT: vzext.vf4 v9, v8
895+
; CHECK-NEXT: vsll.vi v8, v9, 8
900896
; CHECK-NEXT: ret
901897
%out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef>
902898
ret <16 x i8> %out
@@ -905,11 +901,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) {
905901
define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) {
906902
; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx2:
907903
; CHECK: # %bb.0:
908-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
909-
; CHECK-NEXT: vid.v v9
910-
; CHECK-NEXT: vsrl.vi v10, v9, 2
911-
; CHECK-NEXT: vrgather.vv v9, v8, v10
912-
; CHECK-NEXT: vmv.v.v v8, v9
904+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
905+
; CHECK-NEXT: vzext.vf4 v9, v8
906+
; CHECK-NEXT: vsll.vi v8, v9, 16
913907
; CHECK-NEXT: ret
914908
%out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef>
915909
ret <16 x i8> %out
@@ -918,11 +912,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) {
918912
define <16 x i8> @shuffle_spread4_singlesrc_e8_idx3(<16 x i8> %v) {
919913
; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx3:
920914
; CHECK: # %bb.0:
921-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
922-
; CHECK-NEXT: vid.v v9
923-
; CHECK-NEXT: vsrl.vi v10, v9, 2
924-
; CHECK-NEXT: vrgather.vv v9, v8, v10
925-
; CHECK-NEXT: vmv.v.v v8, v9
915+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
916+
; CHECK-NEXT: vzext.vf4 v9, v8
917+
; CHECK-NEXT: vsll.vi v8, v9, 24
926918
; CHECK-NEXT: ret
927919
%out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3>
928920
ret <16 x i8> %out
@@ -946,11 +938,8 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx4(<16 x i8> %v) {
946938
define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) {
947939
; CHECK-LABEL: shuffle_spread8_singlesrc_e8:
948940
; CHECK: # %bb.0:
949-
; CHECK-NEXT: li a0, 32
950-
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
951-
; CHECK-NEXT: vid.v v10
952-
; CHECK-NEXT: vsrl.vi v12, v10, 3
953-
; CHECK-NEXT: vrgather.vv v10, v8, v12
941+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
942+
; CHECK-NEXT: vzext.vf8 v10, v8
954943
; CHECK-NEXT: vmv.v.v v8, v10
955944
; CHECK-NEXT: ret
956945
%out = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>

0 commit comments

Comments
 (0)