Skip to content

[RISCV] Use zext and shift for spread(4,8) when types allow #118893

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4824,6 +4824,36 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}

// Match a mask which "spreads" the leading elements of a vector evenly
// across the result. Factor is the spread amount, and Index is the
// offset applied. (on success, Index < Factor) This is the inverse
// of a deinterleave with the same Factor and Index. This is analogous
// to an interleave, except that all but one lane is undef.
static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
SmallVector<bool> LaneIsUndef(Factor, true);
for (unsigned i = 0; i < Mask.size(); i++)
LaneIsUndef[i % Factor] &= (Mask[i] == -1);

bool Found = false;
for (unsigned i = 0; i < Factor; i++) {
if (LaneIsUndef[i])
continue;
if (Found)
return false;
Index = i;
Found = true;
}
if (!Found)
return false;

for (unsigned i = 0; i < Mask.size() / Factor; i++) {
unsigned j = i * Factor + Index;
if (Mask[j] != -1 && (unsigned)Mask[j] != i)
return false;
}
return true;
}

// Given a vector a, b, c, d return a vector Factor times longer
// with Factor-1 undef's between elements. Ex:
// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
Expand Down Expand Up @@ -5417,6 +5447,23 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
DAG.getUNDEF(VT));
}

// Match a spread(4,8) which can be done via extend and shift. Spread(2)
// is fully covered in interleave(2) above, so it is ignored here.
if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
unsigned Index;
if (isSpreadMask(Mask, Factor, Index)) {
MVT NarrowVT =
MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
DAG.getVectorIdxConstant(0, DL));
return getWideningSpread(Src, Factor, Index, DL, DAG);
}
}
}

if (VT.getScalarSizeInBits() == 8 &&
any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
// On such a vector we're unable to use i8 as the index type.
Expand Down
37 changes: 13 additions & 24 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -879,10 +879,8 @@ define <8 x i32> @shuffle_spread4_singlesrc_e32(<8 x i32> %v) {
define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) {
; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: vsrl.vi v10, v9, 2
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vzext.vf4 v9, v8
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
Expand All @@ -892,11 +890,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx0(<16 x i8> %v) {
define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) {
; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: vsrl.vi v10, v9, 2
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vzext.vf4 v9, v8
; CHECK-NEXT: vsll.vi v8, v9, 8
; CHECK-NEXT: ret
%out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef>
ret <16 x i8> %out
Expand All @@ -905,11 +901,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx1(<16 x i8> %v) {
define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) {
; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: vsrl.vi v10, v9, 2
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vzext.vf4 v9, v8
; CHECK-NEXT: vsll.vi v8, v9, 16
; CHECK-NEXT: ret
%out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef>
ret <16 x i8> %out
Expand All @@ -918,11 +912,9 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx2(<16 x i8> %v) {
define <16 x i8> @shuffle_spread4_singlesrc_e8_idx3(<16 x i8> %v) {
; CHECK-LABEL: shuffle_spread4_singlesrc_e8_idx3:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: vsrl.vi v10, v9, 2
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vzext.vf4 v9, v8
; CHECK-NEXT: vsll.vi v8, v9, 24
; CHECK-NEXT: ret
%out = shufflevector <16 x i8> %v, <16 x i8> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3>
ret <16 x i8> %out
Expand All @@ -946,11 +938,8 @@ define <16 x i8> @shuffle_spread4_singlesrc_e8_idx4(<16 x i8> %v) {
define <32 x i8> @shuffle_spread8_singlesrc_e8(<32 x i8> %v) {
; CHECK-LABEL: shuffle_spread8_singlesrc_e8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vid.v v10
; CHECK-NEXT: vsrl.vi v12, v10, 3
; CHECK-NEXT: vrgather.vv v10, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vzext.vf8 v10, v8
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%out = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
Expand Down
Loading