Skip to content

[RISCV] Recognize VLA shift pairs from shuffle masks #127710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4563,6 +4563,50 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
}

/// Is this mask representing a masked combination of two slides?
static bool isMaskedSlidePair(ArrayRef<int> Mask,
std::pair<int, int> SrcInfo[2]) {
int NumElts = Mask.size();
int SignalValue = NumElts * 2;
SrcInfo[0] = {-1, SignalValue};
SrcInfo[1] = {-1, SignalValue};
for (unsigned i = 0; i != Mask.size(); ++i) {
int M = Mask[i];
if (M < 0)
continue;
int Src = M >= (int)NumElts;
int Diff = (int)i - (M % NumElts);
bool Match = false;
for (int j = 0; j < 2; j++) {
if (SrcInfo[j].first == -1) {
assert(SrcInfo[j].second == SignalValue);
SrcInfo[j].first = Src;
SrcInfo[j].second = Diff;
}
if (SrcInfo[j].first == Src && SrcInfo[j].second == Diff) {
Match = true;
break;
}
}
if (!Match)
return false;
}

// Avoid matching unconditional slides for now. This is reasonably
// covered by existing matchers.
if (SrcInfo[0].first == -1 || SrcInfo[1].first == -1)
return false;
// Avoid matching vselect idioms
if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
return false;
// Prefer vslideup as the second instruction, and identity
// only as the initial instruction.
if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
SrcInfo[1].second == 0)
std::swap(SrcInfo[0], SrcInfo[1]);
return true;
}

/// Match shuffles that concatenate two vectors, rotate the concatenation,
/// and then extract the original number of elements from the rotated result.
/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
Expand Down Expand Up @@ -5651,6 +5695,75 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
}

// Recognize a pattern which can handled via a pair of vslideup/vslidedown
// instructions (in any combination) with masking on the second instruction.
// Avoid matching bit rotates as slide pairs. This is a performance
// heuristic, not a functional check.
// TODO: Generalize this slightly to allow single instruction cases, and
// prune the logic above which is mostly covered by this already.
std::pair<int, int> SrcInfo[2];
unsigned RotateAmt;
MVT RotateVT;
if (isMaskedSlidePair(Mask, SrcInfo) &&
!isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt)) {
SDValue Sources[2];
auto GetSourceFor = [&](const std::pair<int, int> &Info) {
int SrcIdx = Info.first;
assert(SrcIdx == 0 || SrcIdx == 1);
SDValue &Src = Sources[SrcIdx];
if (!Src) {
SDValue SrcV = SrcIdx == 0 ? V1 : V2;
Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
}
return Src;
};
auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
SDValue Passthru) {
SDValue SrcV = GetSourceFor(Src);
int SlideAmt = Src.second;
if (SlideAmt == 0) {
// Should never be second operation
assert(Mask == TrueMask);
return SrcV;
}
if (SlideAmt < 0)
return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
RISCVVType::TAIL_AGNOSTIC);
return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
RISCVVType::TAIL_AGNOSTIC);
};

// Build the mask. Note that vslideup unconditionally preserves elements
// below the slide amount in the destination, and thus those elements are
// undefined in the mask. If the mask ends up all true (or undef), it
// will be folded away by general logic.
SmallVector<SDValue> MaskVals;
for (unsigned i = 0; i != Mask.size(); ++i) {
int M = Mask[i];
if (M < 0 || (SrcInfo[1].second > 0 && i < (unsigned)SrcInfo[1].second)) {
MaskVals.push_back(DAG.getUNDEF(XLenVT));
continue;
}
int Src = M >= (int)NumElts;
int Diff = (int)i - (M % NumElts);
bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
"Must match exactly one of the two slides");
MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
}
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue SelectMask = convertToScalableVector(
ContainerVT.changeVectorElementType(MVT::i1),
DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);

SDValue Res = DAG.getUNDEF(ContainerVT);
Res = GetSlide(SrcInfo[0], TrueMask, Res);
Res = GetSlide(SrcInfo[1], SelectMask, Res);
return convertFromScalableVector(VT, Res, DAG, Subtarget);
}

// Handle any remaining single source shuffles
assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,18 @@ define void @buildvec_no_vid_v4f32(ptr %x) {
define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, <8 x float> %y) optsize {
; CHECK-LABEL: hang_when_merging_stores_after_legalization:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v12, -14
; CHECK-NEXT: vid.v v14
; CHECK-NEXT: li a0, 7
; CHECK-NEXT: vmadd.vx v14, a0, v12
; CHECK-NEXT: li a0, 129
; CHECK-NEXT: vmv.s.x v15, a0
; CHECK-NEXT: vmv.v.i v0, 12
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vcompress.vm v12, v8, v15
; CHECK-NEXT: vrgatherei16.vv v12, v10, v14, v0.t
; CHECK-NEXT: vmv1r.v v8, v12
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 4
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vslidedown.vi v12, v10, 4
; CHECK-NEXT: vslideup.vi v12, v10, 2, v0.t
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 2
; CHECK-NEXT: vmv.v.i v10, 12
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
; CHECK-NEXT: ret
%z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> <i32 0, i32 7, i32 8, i32 15>
ret <4 x float> %z
Expand Down
51 changes: 17 additions & 34 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,44 +38,27 @@ define <4 x float> @interleave_v2f32(<2 x float> %x, <2 x float> %y) {
define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; V128-LABEL: interleave_v2f64:
; V128: # %bb.0:
; V128-NEXT: csrr a0, vlenb
; V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; V128-NEXT: vid.v v10
; V128-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; V128-NEXT: vmv1r.v v10, v9
; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: srli a0, a0, 3
; V128-NEXT: vsrl.vi v10, v10, 1
; V128-NEXT: vslidedown.vx v11, v10, a0
; V128-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; V128-NEXT: vrgatherei16.vv v13, v9, v11
; V128-NEXT: vrgatherei16.vv v12, v9, v10
; V128-NEXT: vrgatherei16.vv v15, v8, v11
; V128-NEXT: vrgatherei16.vv v14, v8, v10
; V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; V128-NEXT: vmerge.vvm v8, v14, v12, v0
; V128-NEXT: vslideup.vi v12, v10, 1
; V128-NEXT: vslideup.vi v12, v10, 2
; V128-NEXT: vmv2r.v v10, v8
; V128-NEXT: vslideup.vi v10, v8, 1
; V128-NEXT: vmerge.vvm v8, v10, v12, v0
; V128-NEXT: ret
;
; RV32-V512-LABEL: interleave_v2f64:
; RV32-V512: # %bb.0:
; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
;
; RV64-V512-LABEL: interleave_v2f64:
; RV64-V512: # %bb.0:
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
; V512-LABEL: interleave_v2f64:
; V512: # %bb.0:
; V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
; V512-NEXT: vslideup.vi v10, v9, 1
; V512-NEXT: vmv1r.v v11, v8
; V512-NEXT: vslideup.vi v10, v9, 2
; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vslideup.vi v11, v8, 1
; V512-NEXT: vmerge.vvm v8, v11, v10, v0
; V512-NEXT: ret
%a = shufflevector <2 x double> %x, <2 x double> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x double> %a
}
Expand Down
93 changes: 36 additions & 57 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,9 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: addi a0, a0, 513
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v12, v10
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vslideup.vi v10, v8, 2
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
Expand All @@ -86,14 +81,9 @@ define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: addi a0, a0, 513
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v12, v10
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vslideup.vi v10, v8, 2
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%s = shufflevector <4 x double> poison, <4 x double> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
Expand All @@ -103,13 +93,12 @@ define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) {
; CHECK-LABEL: vrgather_shuffle_vv_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v12, v8, 1
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vslideup.vi v12, v8, 2
; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
Expand All @@ -120,16 +109,18 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
; CHECK-LABEL: vrgather_shuffle_xv_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vid.v v10
; CHECK-NEXT: vrsub.vi v12, v10, 4
; CHECK-NEXT: vmv2r.v v10, v8
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vslideup.vi v10, v8, 2, v0.t
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 12
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfmv.v.f v10, fa5
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa5
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
; CHECK-NEXT: ret
%s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
ret <4 x double> %s
Expand All @@ -138,17 +129,17 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
; CHECK-LABEL: vrgather_shuffle_vx_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 2
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0)
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v10, 9
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vcompress.vm v12, v8, v10
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v0, 3
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa5
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
; CHECK-NEXT: vfmv.v.f v10, fa5
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
; CHECK-NEXT: ret
%s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
ret <4 x double> %s
Expand Down Expand Up @@ -311,13 +302,9 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) {
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: addi a0, a0, 513
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v10, v9
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vslideup.vi v9, v8, 2
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
Expand All @@ -327,12 +314,10 @@ define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) {
define <4 x bfloat> @vrgather_shuffle_vv_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
; CHECK-LABEL: vrgather_shuffle_vv_v4bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI25_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v11, (a0)
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vslideup.vi v10, v8, 2
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
Expand All @@ -355,13 +340,9 @@ define <4 x bfloat> @vrgather_shuffle_vx_v4bf16_load(ptr %p) {
define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: addi a0, a0, 513
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v10, v9
; CHECK-NEXT: vrgather.vv v9, v8, v10
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: vslideup.vi v9, v8, 2
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
Expand All @@ -371,12 +352,10 @@ define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) {
; CHECK-LABEL: vrgather_shuffle_vv_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v11, (a0)
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vslideup.vi v10, v8, 2
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
Expand Down
Loading
Loading