Skip to content

Commit 43f2968

Browse files
authored
[RISCV] Recognize VLA shift pairs from shuffle masks (#127710)
If we have a shuffle mask which can be represented as two slides + some conditional masking, we can emit a VLA sequence which is at most O(2*LMUL). This is essentially a generalization of the existing isElementRotate, but is staged to only introduce the new match for the moment. A follow up change will start consolidating code - see the notes below. A couple of notes: 1) I'm excluding bit rotates mostly to keep the diffs manageable. 2) The existing isElementRotate logic is nearly redundant after this change. However, we have some intersection between the bit rotate and element rotate matching. To keep things simple, I left that in place for now, and will merge/cleanup in a separate change. 3) The individual asVSlideup and asVSlidedown are closely related, but the former looks through extracts and the later changes VL. I'm leaving these in place for now, but hope to common them up a bit as well.
1 parent 04b5c63 commit 43f2968

15 files changed

+1093
-1122
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4560,6 +4560,50 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
45604560
return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
45614561
}
45624562

4563+
/// Is this mask representing a masked combination of two slides?
4564+
static bool isMaskedSlidePair(ArrayRef<int> Mask,
4565+
std::pair<int, int> SrcInfo[2]) {
4566+
int NumElts = Mask.size();
4567+
int SignalValue = NumElts * 2;
4568+
SrcInfo[0] = {-1, SignalValue};
4569+
SrcInfo[1] = {-1, SignalValue};
4570+
for (unsigned i = 0; i != Mask.size(); ++i) {
4571+
int M = Mask[i];
4572+
if (M < 0)
4573+
continue;
4574+
int Src = M >= (int)NumElts;
4575+
int Diff = (int)i - (M % NumElts);
4576+
bool Match = false;
4577+
for (int j = 0; j < 2; j++) {
4578+
if (SrcInfo[j].first == -1) {
4579+
assert(SrcInfo[j].second == SignalValue);
4580+
SrcInfo[j].first = Src;
4581+
SrcInfo[j].second = Diff;
4582+
}
4583+
if (SrcInfo[j].first == Src && SrcInfo[j].second == Diff) {
4584+
Match = true;
4585+
break;
4586+
}
4587+
}
4588+
if (!Match)
4589+
return false;
4590+
}
4591+
4592+
// Avoid matching unconditional slides for now. This is reasonably
4593+
// covered by existing matchers.
4594+
if (SrcInfo[0].first == -1 || SrcInfo[1].first == -1)
4595+
return false;
4596+
// Avoid matching vselect idioms
4597+
if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4598+
return false;
4599+
// Prefer vslideup as the second instruction, and identity
4600+
// only as the initial instruction.
4601+
if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4602+
SrcInfo[1].second == 0)
4603+
std::swap(SrcInfo[0], SrcInfo[1]);
4604+
return true;
4605+
}
4606+
45634607
/// Match shuffles that concatenate two vectors, rotate the concatenation,
45644608
/// and then extract the original number of elements from the rotated result.
45654609
/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
@@ -5648,6 +5692,75 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
56485692
return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
56495693
}
56505694

5695+
// Recognize a pattern which can handled via a pair of vslideup/vslidedown
5696+
// instructions (in any combination) with masking on the second instruction.
5697+
// Avoid matching bit rotates as slide pairs. This is a performance
5698+
// heuristic, not a functional check.
5699+
// TODO: Generalize this slightly to allow single instruction cases, and
5700+
// prune the logic above which is mostly covered by this already.
5701+
std::pair<int, int> SrcInfo[2];
5702+
unsigned RotateAmt;
5703+
MVT RotateVT;
5704+
if (isMaskedSlidePair(Mask, SrcInfo) &&
5705+
!isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt)) {
5706+
SDValue Sources[2];
5707+
auto GetSourceFor = [&](const std::pair<int, int> &Info) {
5708+
int SrcIdx = Info.first;
5709+
assert(SrcIdx == 0 || SrcIdx == 1);
5710+
SDValue &Src = Sources[SrcIdx];
5711+
if (!Src) {
5712+
SDValue SrcV = SrcIdx == 0 ? V1 : V2;
5713+
Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
5714+
}
5715+
return Src;
5716+
};
5717+
auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
5718+
SDValue Passthru) {
5719+
SDValue SrcV = GetSourceFor(Src);
5720+
int SlideAmt = Src.second;
5721+
if (SlideAmt == 0) {
5722+
// Should never be second operation
5723+
assert(Mask == TrueMask);
5724+
return SrcV;
5725+
}
5726+
if (SlideAmt < 0)
5727+
return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
5728+
DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
5729+
RISCVVType::TAIL_AGNOSTIC);
5730+
return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
5731+
DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
5732+
RISCVVType::TAIL_AGNOSTIC);
5733+
};
5734+
5735+
// Build the mask. Note that vslideup unconditionally preserves elements
5736+
// below the slide amount in the destination, and thus those elements are
5737+
// undefined in the mask. If the mask ends up all true (or undef), it
5738+
// will be folded away by general logic.
5739+
SmallVector<SDValue> MaskVals;
5740+
for (unsigned i = 0; i != Mask.size(); ++i) {
5741+
int M = Mask[i];
5742+
if (M < 0 || (SrcInfo[1].second > 0 && i < (unsigned)SrcInfo[1].second)) {
5743+
MaskVals.push_back(DAG.getUNDEF(XLenVT));
5744+
continue;
5745+
}
5746+
int Src = M >= (int)NumElts;
5747+
int Diff = (int)i - (M % NumElts);
5748+
bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
5749+
assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
5750+
"Must match exactly one of the two slides");
5751+
MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
5752+
}
5753+
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5754+
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5755+
SDValue SelectMask = convertToScalableVector(
5756+
ContainerVT.changeVectorElementType(MVT::i1),
5757+
DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
5758+
5759+
SDValue Res = DAG.getUNDEF(ContainerVT);
5760+
Res = GetSlide(SrcInfo[0], TrueMask, Res);
5761+
Res = GetSlide(SrcInfo[1], SelectMask, Res);
5762+
return convertFromScalableVector(VT, Res, DAG, Subtarget);
5763+
}
56515764

56525765
// Handle any remaining single source shuffles
56535766
assert(!V1.isUndef() && "Unexpected shuffle canonicalization");

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,18 @@ define void @buildvec_no_vid_v4f32(ptr %x) {
3939
define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, <8 x float> %y) optsize {
4040
; CHECK-LABEL: hang_when_merging_stores_after_legalization:
4141
; CHECK: # %bb.0:
42-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
43-
; CHECK-NEXT: vmv.v.i v12, -14
44-
; CHECK-NEXT: vid.v v14
45-
; CHECK-NEXT: li a0, 7
46-
; CHECK-NEXT: vmadd.vx v14, a0, v12
47-
; CHECK-NEXT: li a0, 129
48-
; CHECK-NEXT: vmv.s.x v15, a0
49-
; CHECK-NEXT: vmv.v.i v0, 12
50-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
51-
; CHECK-NEXT: vcompress.vm v12, v8, v15
52-
; CHECK-NEXT: vrgatherei16.vv v12, v10, v14, v0.t
53-
; CHECK-NEXT: vmv1r.v v8, v12
42+
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
43+
; CHECK-NEXT: vmv.v.i v0, 4
44+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
45+
; CHECK-NEXT: vslidedown.vi v12, v10, 4
46+
; CHECK-NEXT: vslideup.vi v12, v10, 2, v0.t
47+
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
48+
; CHECK-NEXT: vmv.v.i v0, 2
49+
; CHECK-NEXT: vmv.v.i v10, 12
50+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
51+
; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t
52+
; CHECK-NEXT: vmv1r.v v0, v10
53+
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
5454
; CHECK-NEXT: ret
5555
%z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> <i32 0, i32 7, i32 8, i32 15>
5656
ret <4 x float> %z

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll

Lines changed: 17 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,44 +38,27 @@ define <4 x float> @interleave_v2f32(<2 x float> %x, <2 x float> %y) {
3838
define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
3939
; V128-LABEL: interleave_v2f64:
4040
; V128: # %bb.0:
41-
; V128-NEXT: csrr a0, vlenb
42-
; V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
43-
; V128-NEXT: vid.v v10
41+
; V128-NEXT: vsetivli zero, 1, e8, m1, ta, ma
42+
; V128-NEXT: vmv1r.v v10, v9
4443
; V128-NEXT: vmv.v.i v0, 10
45-
; V128-NEXT: srli a0, a0, 3
46-
; V128-NEXT: vsrl.vi v10, v10, 1
47-
; V128-NEXT: vslidedown.vx v11, v10, a0
48-
; V128-NEXT: vsetvli a0, zero, e64, m1, ta, ma
49-
; V128-NEXT: vrgatherei16.vv v13, v9, v11
50-
; V128-NEXT: vrgatherei16.vv v12, v9, v10
51-
; V128-NEXT: vrgatherei16.vv v15, v8, v11
52-
; V128-NEXT: vrgatherei16.vv v14, v8, v10
5344
; V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma
54-
; V128-NEXT: vmerge.vvm v8, v14, v12, v0
45+
; V128-NEXT: vslideup.vi v12, v10, 1
46+
; V128-NEXT: vslideup.vi v12, v10, 2
47+
; V128-NEXT: vmv2r.v v10, v8
48+
; V128-NEXT: vslideup.vi v10, v8, 1
49+
; V128-NEXT: vmerge.vvm v8, v10, v12, v0
5550
; V128-NEXT: ret
5651
;
57-
; RV32-V512-LABEL: interleave_v2f64:
58-
; RV32-V512: # %bb.0:
59-
; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
60-
; RV32-V512-NEXT: vid.v v10
61-
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
62-
; RV32-V512-NEXT: vmv.v.i v0, 10
63-
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
64-
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
65-
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
66-
; RV32-V512-NEXT: vmv.v.v v8, v10
67-
; RV32-V512-NEXT: ret
68-
;
69-
; RV64-V512-LABEL: interleave_v2f64:
70-
; RV64-V512: # %bb.0:
71-
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
72-
; RV64-V512-NEXT: vid.v v10
73-
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
74-
; RV64-V512-NEXT: vmv.v.i v0, 10
75-
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
76-
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
77-
; RV64-V512-NEXT: vmv.v.v v8, v10
78-
; RV64-V512-NEXT: ret
52+
; V512-LABEL: interleave_v2f64:
53+
; V512: # %bb.0:
54+
; V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma
55+
; V512-NEXT: vslideup.vi v10, v9, 1
56+
; V512-NEXT: vmv1r.v v11, v8
57+
; V512-NEXT: vslideup.vi v10, v9, 2
58+
; V512-NEXT: vmv.v.i v0, 10
59+
; V512-NEXT: vslideup.vi v11, v8, 1
60+
; V512-NEXT: vmerge.vvm v8, v11, v10, v0
61+
; V512-NEXT: ret
7962
%a = shufflevector <2 x double> %x, <2 x double> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
8063
ret <4 x double> %a
8164
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll

Lines changed: 36 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,9 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
6969
define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
7070
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f64:
7171
; CHECK: # %bb.0:
72-
; CHECK-NEXT: lui a0, 4096
73-
; CHECK-NEXT: addi a0, a0, 513
74-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
75-
; CHECK-NEXT: vmv.s.x v10, a0
76-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
77-
; CHECK-NEXT: vsext.vf2 v12, v10
78-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
79-
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
72+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
73+
; CHECK-NEXT: vslidedown.vi v10, v8, 1
74+
; CHECK-NEXT: vslideup.vi v10, v8, 2
8075
; CHECK-NEXT: vmv.v.v v8, v10
8176
; CHECK-NEXT: ret
8277
%s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
@@ -86,14 +81,9 @@ define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
8681
define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
8782
; CHECK-LABEL: vrgather_permute_shuffle_uv_v4f64:
8883
; CHECK: # %bb.0:
89-
; CHECK-NEXT: lui a0, 4096
90-
; CHECK-NEXT: addi a0, a0, 513
91-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
92-
; CHECK-NEXT: vmv.s.x v10, a0
93-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
94-
; CHECK-NEXT: vsext.vf2 v12, v10
95-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
96-
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
84+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
85+
; CHECK-NEXT: vslidedown.vi v10, v8, 1
86+
; CHECK-NEXT: vslideup.vi v10, v8, 2
9787
; CHECK-NEXT: vmv.v.v v8, v10
9888
; CHECK-NEXT: ret
9989
%s = shufflevector <4 x double> poison, <4 x double> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
@@ -103,13 +93,12 @@ define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
10393
define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) {
10494
; CHECK-LABEL: vrgather_shuffle_vv_v4f64:
10595
; CHECK: # %bb.0:
106-
; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
107-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0)
108-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
109-
; CHECK-NEXT: vle16.v v14, (a0)
96+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
97+
; CHECK-NEXT: vslidedown.vi v12, v8, 1
98+
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
11099
; CHECK-NEXT: vmv.v.i v0, 8
111-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
112-
; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
100+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
101+
; CHECK-NEXT: vslideup.vi v12, v8, 2
113102
; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t
114103
; CHECK-NEXT: vmv.v.v v8, v12
115104
; CHECK-NEXT: ret
@@ -120,16 +109,18 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
120109
define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
121110
; CHECK-LABEL: vrgather_shuffle_xv_v4f64:
122111
; CHECK: # %bb.0:
112+
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
113+
; CHECK-NEXT: vmv.v.i v0, 8
123114
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
124115
; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0)
125-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
126-
; CHECK-NEXT: vid.v v10
127-
; CHECK-NEXT: vrsub.vi v12, v10, 4
116+
; CHECK-NEXT: vmv2r.v v10, v8
117+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
118+
; CHECK-NEXT: vslideup.vi v10, v8, 2, v0.t
119+
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
128120
; CHECK-NEXT: vmv.v.i v0, 12
129-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
130-
; CHECK-NEXT: vfmv.v.f v10, fa5
131-
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t
132-
; CHECK-NEXT: vmv.v.v v8, v10
121+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
122+
; CHECK-NEXT: vfmv.v.f v8, fa5
123+
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
133124
; CHECK-NEXT: ret
134125
%s = shufflevector <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x double> %x, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
135126
ret <4 x double> %s
@@ -138,17 +129,17 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
138129
define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
139130
; CHECK-LABEL: vrgather_shuffle_vx_v4f64:
140131
; CHECK: # %bb.0:
132+
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
133+
; CHECK-NEXT: vmv.v.i v0, 2
141134
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
142135
; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0)
143-
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
144-
; CHECK-NEXT: vmv.v.i v10, 9
145-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
146-
; CHECK-NEXT: vcompress.vm v12, v8, v10
136+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
137+
; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t
147138
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
148139
; CHECK-NEXT: vmv.v.i v0, 3
149140
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
150-
; CHECK-NEXT: vfmv.v.f v8, fa5
151-
; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
141+
; CHECK-NEXT: vfmv.v.f v10, fa5
142+
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
152143
; CHECK-NEXT: ret
153144
%s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
154145
ret <4 x double> %s
@@ -311,13 +302,9 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
311302
define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) {
312303
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4bf16:
313304
; CHECK: # %bb.0:
314-
; CHECK-NEXT: lui a0, 4096
315-
; CHECK-NEXT: addi a0, a0, 513
316-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
317-
; CHECK-NEXT: vmv.s.x v9, a0
318-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
319-
; CHECK-NEXT: vsext.vf2 v10, v9
320-
; CHECK-NEXT: vrgather.vv v9, v8, v10
305+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
306+
; CHECK-NEXT: vslidedown.vi v9, v8, 1
307+
; CHECK-NEXT: vslideup.vi v9, v8, 2
321308
; CHECK-NEXT: vmv1r.v v8, v9
322309
; CHECK-NEXT: ret
323310
%s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
@@ -327,12 +314,10 @@ define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) {
327314
define <4 x bfloat> @vrgather_shuffle_vv_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
328315
; CHECK-LABEL: vrgather_shuffle_vv_v4bf16:
329316
; CHECK: # %bb.0:
330-
; CHECK-NEXT: lui a0, %hi(.LCPI25_0)
331-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0)
332317
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
333-
; CHECK-NEXT: vle16.v v11, (a0)
318+
; CHECK-NEXT: vslidedown.vi v10, v8, 1
334319
; CHECK-NEXT: vmv.v.i v0, 8
335-
; CHECK-NEXT: vrgather.vv v10, v8, v11
320+
; CHECK-NEXT: vslideup.vi v10, v8, 2
336321
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
337322
; CHECK-NEXT: vmv1r.v v8, v10
338323
; CHECK-NEXT: ret
@@ -355,13 +340,9 @@ define <4 x bfloat> @vrgather_shuffle_vx_v4bf16_load(ptr %p) {
355340
define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
356341
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16:
357342
; CHECK: # %bb.0:
358-
; CHECK-NEXT: lui a0, 4096
359-
; CHECK-NEXT: addi a0, a0, 513
360-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
361-
; CHECK-NEXT: vmv.s.x v9, a0
362-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
363-
; CHECK-NEXT: vsext.vf2 v10, v9
364-
; CHECK-NEXT: vrgather.vv v9, v8, v10
343+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
344+
; CHECK-NEXT: vslidedown.vi v9, v8, 1
345+
; CHECK-NEXT: vslideup.vi v9, v8, 2
365346
; CHECK-NEXT: vmv1r.v v8, v9
366347
; CHECK-NEXT: ret
367348
%s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
@@ -371,12 +352,10 @@ define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
371352
define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) {
372353
; CHECK-LABEL: vrgather_shuffle_vv_v4f16:
373354
; CHECK: # %bb.0:
374-
; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
375-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0)
376355
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
377-
; CHECK-NEXT: vle16.v v11, (a0)
356+
; CHECK-NEXT: vslidedown.vi v10, v8, 1
378357
; CHECK-NEXT: vmv.v.i v0, 8
379-
; CHECK-NEXT: vrgather.vv v10, v8, v11
358+
; CHECK-NEXT: vslideup.vi v10, v8, 2
380359
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
381360
; CHECK-NEXT: vmv1r.v v8, v10
382361
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)