Skip to content

Commit 657dbc3

Browse files
authored
[RISCV] Reorder shuffle operands if one side is an identity (#98534)
Doing so allows one side to fold entirely into the mask applied to the other recursive call (or a vmerge.vv at worst). This is a generalization of the existing IsSelect case (both operands are selects), so I removed that code in the process. This actually started as an attempt to remove the IsSelect bit as I'd thought it was fully redundant with the recursive formulation, but digging into test deltas revealed that we depended on that to catch the majority of the identity cases, and that in turn we were missing some cases where only RHS was an identity.
1 parent 6f04f46 commit 657dbc3

File tree

5 files changed

+90
-115
lines changed

5 files changed

+90
-115
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5321,67 +5321,42 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
53215321
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
53225322
}
53235323

5324-
// By default we preserve the original operand order, and use a mask to
5325-
// select LHS as true and RHS as false. However, since RVV vector selects may
5326-
// feature splats but only on the LHS, we may choose to invert our mask and
5327-
// instead select between RHS and LHS.
5328-
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5329-
5330-
// Detect shuffles which can be re-expressed as vector selects; these are
5331-
// shuffles in which each element in the destination is taken from an element
5332-
// at the corresponding index in either source vectors.
5333-
bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5334-
int MaskIndex = MaskIdx.value();
5335-
return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5336-
});
5337-
if (IsSelect) {
5338-
// Now construct the mask that will be used by the vselect operation.
5339-
SmallVector<SDValue> MaskVals;
5340-
for (int MaskIndex : Mask) {
5341-
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5342-
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5343-
}
5344-
5345-
if (SwapOps)
5346-
std::swap(V1, V2);
5347-
5348-
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5349-
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5350-
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5351-
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5352-
}
5353-
53545324
// As a backup, shuffles can be lowered via a vrgather instruction, possibly
53555325
// merged with a second vrgather.
53565326
SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5357-
SmallVector<SDValue> MaskVals;
53585327

53595328
// Now construct the mask that will be used by the blended vrgather operation.
5360-
// Cconstruct the appropriate indices into each vector.
5329+
// Construct the appropriate indices into each vector.
53615330
for (int MaskIndex : Mask) {
5362-
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5363-
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
53645331
bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
53655332
ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
53665333
? MaskIndex : -1);
53675334
ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
53685335
}
53695336

5370-
if (SwapOps) {
5371-
std::swap(V1, V2);
5372-
std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5373-
}
5374-
5375-
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5376-
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5377-
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5337+
// Try to pick a profitable operand order.
5338+
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5339+
SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
53785340

53795341
// Recursively invoke lowering for each operand if we had two
53805342
// independent single source shuffles, and then combine the result via a
53815343
// vselect. Note that the vselect will likely be folded back into the
53825344
// second permute (vrgather, or other) by the post-isel combine.
53835345
V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
53845346
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5347+
5348+
SmallVector<SDValue> MaskVals;
5349+
for (int MaskIndex : Mask) {
5350+
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5351+
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5352+
}
5353+
5354+
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5355+
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5356+
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5357+
5358+
if (SwapOps)
5359+
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
53855360
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
53865361
}
53875362

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -663,11 +663,11 @@ define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) {
663663
define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) {
664664
; CHECK-LABEL: merge_slidedown:
665665
; CHECK: # %bb.0:
666-
; CHECK-NEXT: li a0, 195
667-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
666+
; CHECK-NEXT: li a0, 60
667+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
668668
; CHECK-NEXT: vmv.s.x v0, a0
669-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
670-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
669+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
670+
; CHECK-NEXT: vmv1r.v v8, v9
671671
; CHECK-NEXT: ret
672672
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 14, i32 15>
673673
ret <8 x i8> %res

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll

Lines changed: 49 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
1919
define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
2020
; CHECK-LABEL: trn2.v8i8:
2121
; CHECK: # %bb.0:
22-
; CHECK-NEXT: li a0, 170
23-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
22+
; CHECK-NEXT: li a0, 85
23+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
2424
; CHECK-NEXT: vmv.s.x v0, a0
25-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
26-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
25+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
26+
; CHECK-NEXT: vmv1r.v v8, v9
2727
; CHECK-NEXT: ret
2828
%tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2929
ret <8 x i8> %tmp0
@@ -46,13 +46,13 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
4646
define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
4747
; CHECK-LABEL: trn2.v16i8:
4848
; CHECK: # %bb.0:
49-
; CHECK-NEXT: lui a0, 11
50-
; CHECK-NEXT: addi a0, a0, -1366
49+
; CHECK-NEXT: lui a0, 5
50+
; CHECK-NEXT: addi a0, a0, 1365
5151
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
5252
; CHECK-NEXT: vmv.s.x v0, a0
53-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
54-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
55-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
53+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
54+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
55+
; CHECK-NEXT: vmv.v.v v8, v9
5656
; CHECK-NEXT: ret
5757
%tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
5858
ret <16 x i8> %tmp0
@@ -72,10 +72,10 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
7272
define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
7373
; CHECK-LABEL: trn2.v4i16:
7474
; CHECK: # %bb.0:
75-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
76-
; CHECK-NEXT: vmv.v.i v0, 10
77-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
78-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
75+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
76+
; CHECK-NEXT: vmv.v.i v0, 5
77+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
78+
; CHECK-NEXT: vmv1r.v v8, v9
7979
; CHECK-NEXT: ret
8080
%tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
8181
ret <4 x i16> %tmp0
@@ -96,11 +96,11 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
9696
define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
9797
; CHECK-LABEL: trn2.v8i16:
9898
; CHECK: # %bb.0:
99-
; CHECK-NEXT: li a0, 170
100-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
99+
; CHECK-NEXT: li a0, 85
100+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
101101
; CHECK-NEXT: vmv.s.x v0, a0
102-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
103-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
102+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
103+
; CHECK-NEXT: vmv.v.v v8, v9
104104
; CHECK-NEXT: ret
105105
%tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
106106
ret <8 x i16> %tmp0
@@ -119,10 +119,10 @@ define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
119119
define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
120120
; CHECK-LABEL: trn2.v2i32:
121121
; CHECK: # %bb.0:
122-
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
123-
; CHECK-NEXT: vmv.v.i v0, 2
124-
; CHECK-NEXT: vrgather.vi v10, v8, 1
125-
; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
122+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
123+
; CHECK-NEXT: vmv.v.i v0, 1
124+
; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t
125+
; CHECK-NEXT: vmv1r.v v8, v9
126126
; CHECK-NEXT: ret
127127
%tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
128128
ret <2 x i32> %tmp0
@@ -142,10 +142,10 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
142142
define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
143143
; CHECK-LABEL: trn2.v4i32:
144144
; CHECK: # %bb.0:
145-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
146-
; CHECK-NEXT: vmv.v.i v0, 10
147-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
148-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
145+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
146+
; CHECK-NEXT: vmv.v.i v0, 5
147+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
148+
; CHECK-NEXT: vmv.v.v v8, v9
149149
; CHECK-NEXT: ret
150150
%tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
151151
ret <4 x i32> %tmp0
@@ -164,10 +164,10 @@ define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
164164
define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
165165
; CHECK-LABEL: trn2.v2i64:
166166
; CHECK: # %bb.0:
167-
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
168-
; CHECK-NEXT: vmv.v.i v0, 2
169-
; CHECK-NEXT: vrgather.vi v10, v8, 1
170-
; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
167+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
168+
; CHECK-NEXT: vmv.v.i v0, 1
169+
; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t
170+
; CHECK-NEXT: vmv.v.v v8, v9
171171
; CHECK-NEXT: ret
172172
%tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
173173
ret <2 x i64> %tmp0
@@ -186,10 +186,10 @@ define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) {
186186
define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) {
187187
; CHECK-LABEL: trn2.v2f32:
188188
; CHECK: # %bb.0:
189-
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
190-
; CHECK-NEXT: vmv.v.i v0, 2
191-
; CHECK-NEXT: vrgather.vi v10, v8, 1
192-
; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
189+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
190+
; CHECK-NEXT: vmv.v.i v0, 1
191+
; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t
192+
; CHECK-NEXT: vmv1r.v v8, v9
193193
; CHECK-NEXT: ret
194194
%tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
195195
ret <2 x float> %tmp0
@@ -209,10 +209,10 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) {
209209
define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) {
210210
; CHECK-LABEL: trn2.v4f32:
211211
; CHECK: # %bb.0:
212-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
213-
; CHECK-NEXT: vmv.v.i v0, 10
214-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
215-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
212+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
213+
; CHECK-NEXT: vmv.v.i v0, 5
214+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
215+
; CHECK-NEXT: vmv.v.v v8, v9
216216
; CHECK-NEXT: ret
217217
%tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
218218
ret <4 x float> %tmp0
@@ -231,10 +231,10 @@ define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) {
231231
define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) {
232232
; CHECK-LABEL: trn2.v2f64:
233233
; CHECK: # %bb.0:
234-
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
235-
; CHECK-NEXT: vmv.v.i v0, 2
236-
; CHECK-NEXT: vrgather.vi v10, v8, 1
237-
; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
234+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
235+
; CHECK-NEXT: vmv.v.i v0, 1
236+
; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t
237+
; CHECK-NEXT: vmv.v.v v8, v9
238238
; CHECK-NEXT: ret
239239
%tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
240240
ret <2 x double> %tmp0
@@ -254,10 +254,10 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) {
254254
define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) {
255255
; CHECK-LABEL: trn2.v4f16:
256256
; CHECK: # %bb.0:
257-
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
258-
; CHECK-NEXT: vmv.v.i v0, 10
259-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
260-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
257+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
258+
; CHECK-NEXT: vmv.v.i v0, 5
259+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
260+
; CHECK-NEXT: vmv1r.v v8, v9
261261
; CHECK-NEXT: ret
262262
%tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
263263
ret <4 x half> %tmp0
@@ -278,11 +278,11 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) {
278278
define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) {
279279
; CHECK-LABEL: trn2.v8f16:
280280
; CHECK: # %bb.0:
281-
; CHECK-NEXT: li a0, 170
282-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
281+
; CHECK-NEXT: li a0, 85
282+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
283283
; CHECK-NEXT: vmv.s.x v0, a0
284-
; CHECK-NEXT: vslidedown.vi v8, v8, 1
285-
; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
284+
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
285+
; CHECK-NEXT: vmv.v.v v8, v9
286286
; CHECK-NEXT: ret
287287
%tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
288288
ret <8 x half> %tmp0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,10 @@ define void @vnsrl_32_i32(ptr %in, ptr %out) {
179179
; ZVE32F: # %bb.0: # %entry
180180
; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
181181
; ZVE32F-NEXT: vle32.v v8, (a0)
182-
; ZVE32F-NEXT: vmv.v.i v0, 2
182+
; ZVE32F-NEXT: vmv.v.i v0, 1
183183
; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
184-
; ZVE32F-NEXT: vrgather.vi v9, v8, 1
185-
; ZVE32F-NEXT: vslidedown.vi v9, v8, 2, v0.t
184+
; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
185+
; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t
186186
; ZVE32F-NEXT: vse32.v v9, (a1)
187187
; ZVE32F-NEXT: ret
188188
entry:
@@ -233,10 +233,10 @@ define void @vnsrl_32_float(ptr %in, ptr %out) {
233233
; ZVE32F: # %bb.0: # %entry
234234
; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
235235
; ZVE32F-NEXT: vle32.v v8, (a0)
236-
; ZVE32F-NEXT: vmv.v.i v0, 2
236+
; ZVE32F-NEXT: vmv.v.i v0, 1
237237
; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
238-
; ZVE32F-NEXT: vrgather.vi v9, v8, 1
239-
; ZVE32F-NEXT: vslidedown.vi v9, v8, 2, v0.t
238+
; ZVE32F-NEXT: vslidedown.vi v9, v8, 2
239+
; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t
240240
; ZVE32F-NEXT: vse32.v v9, (a1)
241241
; ZVE32F-NEXT: ret
242242
entry:
@@ -276,10 +276,10 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) {
276276
; V: # %bb.0: # %entry
277277
; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
278278
; V-NEXT: vle64.v v8, (a0)
279-
; V-NEXT: vmv.v.i v0, 2
279+
; V-NEXT: vmv.v.i v0, 1
280280
; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
281-
; V-NEXT: vrgather.vi v9, v8, 1
282-
; V-NEXT: vslidedown.vi v9, v8, 2, v0.t
281+
; V-NEXT: vslidedown.vi v9, v8, 2
282+
; V-NEXT: vrgather.vi v9, v8, 1, v0.t
283283
; V-NEXT: vse64.v v9, (a1)
284284
; V-NEXT: ret
285285
;
@@ -327,10 +327,10 @@ define void @vnsrl_64_double(ptr %in, ptr %out) {
327327
; V: # %bb.0: # %entry
328328
; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma
329329
; V-NEXT: vle64.v v8, (a0)
330-
; V-NEXT: vmv.v.i v0, 2
330+
; V-NEXT: vmv.v.i v0, 1
331331
; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
332-
; V-NEXT: vrgather.vi v9, v8, 1
333-
; V-NEXT: vslidedown.vi v9, v8, 2, v0.t
332+
; V-NEXT: vslidedown.vi v9, v8, 2
333+
; V-NEXT: vrgather.vi v9, v8, 1, v0.t
334334
; V-NEXT: vse64.v v9, (a1)
335335
; V-NEXT: ret
336336
;

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
8080
; CHECK: # %bb.0:
8181
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
8282
; CHECK-NEXT: vslidedown.vi v10, v8, 2
83-
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
84-
; CHECK-NEXT: vmv.v.i v0, 2
85-
; CHECK-NEXT: vrgather.vi v9, v8, 1
86-
; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0
83+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
84+
; CHECK-NEXT: vmv.v.i v0, 1
85+
; CHECK-NEXT: vmv1r.v v9, v10
86+
; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t
8787
; CHECK-NEXT: vslideup.vi v8, v10, 1
8888
; CHECK-NEXT: ret
8989
%retval = call {<2 x i64>, <2 x i64>} @llvm.vector.deinterleave2.v4i64(<4 x i64> %vec)
@@ -166,10 +166,10 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double
166166
; CHECK: # %bb.0:
167167
; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
168168
; CHECK-NEXT: vslidedown.vi v10, v8, 2
169-
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
170-
; CHECK-NEXT: vmv.v.i v0, 2
171-
; CHECK-NEXT: vrgather.vi v9, v8, 1
172-
; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0
169+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
170+
; CHECK-NEXT: vmv.v.i v0, 1
171+
; CHECK-NEXT: vmv1r.v v9, v10
172+
; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t
173173
; CHECK-NEXT: vslideup.vi v8, v10, 1
174174
; CHECK-NEXT: ret
175175
%retval = call {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double> %vec)

0 commit comments

Comments
 (0)