Skip to content

Commit bab7920

Browse files
[RISCV][CG]Use processShuffleMasks for per-register shuffles
Patch adds usage of processShuffleMasks in in codegen in lowerShuffleViaVRegSplitting. This function is already used for X86 shuffles estimations and in DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE functions, unifies the code. Reviewers: topperc, wangpc-pp, lukel97, preames Reviewed By: preames Pull Request: #121765
1 parent 61e2841 commit bab7920

File tree

6 files changed

+144
-102
lines changed

6 files changed

+144
-102
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,8 @@ void processShuffleMasks(
270270
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
271271
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
272272
function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
273-
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);
273+
function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
274+
ManyInputsAction);
274275

275276
/// Compute the demanded elements mask of horizontal binary operations. A
276277
/// horizontal operation combines two adjacent elements in a vector operand.

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,8 @@ void llvm::processShuffleMasks(
557557
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
558558
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
559559
function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
560-
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
560+
function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
561+
ManyInputsAction) {
561562
SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
562563
// Try to perform better estimation of the permutation.
563564
// 1. Split the source/destination vectors into real registers.
@@ -628,6 +629,7 @@ void llvm::processShuffleMasks(
628629
}
629630
};
630631
int SecondIdx;
632+
bool NewReg = true;
631633
do {
632634
int FirstIdx = -1;
633635
SecondIdx = -1;
@@ -645,15 +647,17 @@ void llvm::processShuffleMasks(
645647
SecondIdx = I;
646648
SecondMask = RegMask;
647649
CombineMasks(FirstMask, SecondMask);
648-
ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
650+
ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);
651+
NewReg = false;
649652
NormalizeMask(FirstMask);
650653
RegMask.clear();
651654
SecondMask = FirstMask;
652655
SecondIdx = FirstIdx;
653656
}
654657
if (FirstIdx != SecondIdx && SecondIdx >= 0) {
655658
CombineMasks(SecondMask, FirstMask);
656-
ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
659+
ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);
660+
NewReg = false;
657661
Dest[FirstIdx].clear();
658662
NormalizeMask(SecondMask);
659663
}

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3059,8 +3059,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
30593059
Inputs[Idx] = Output;
30603060
},
30613061
[&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs,
3062-
&TmpInputs,
3063-
&BuildVector](ArrayRef<int> Mask, unsigned Idx1, unsigned Idx2) {
3062+
&TmpInputs, &BuildVector](ArrayRef<int> Mask, unsigned Idx1,
3063+
unsigned Idx2, bool /*Unused*/) {
30643064
if (AccumulateResults(Idx1)) {
30653065
if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
30663066
Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 82 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5104,7 +5104,6 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
51045104
SDValue V1 = SVN->getOperand(0);
51055105
SDValue V2 = SVN->getOperand(1);
51065106
ArrayRef<int> Mask = SVN->getMask();
5107-
unsigned NumElts = VT.getVectorNumElements();
51085107

51095108
// If we don't know exact data layout, not much we can do. If this
51105109
// is already m1 or smaller, no point in splitting further.
@@ -5121,58 +5120,102 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
51215120

51225121
MVT ElemVT = VT.getVectorElementType();
51235122
unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5124-
unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
5125-
5126-
SmallVector<std::pair<int, SmallVector<int>>>
5127-
OutMasks(VRegsPerSrc, {-1, {}});
5128-
5129-
// Check if our mask can be done as a 1-to-1 mapping from source
5130-
// to destination registers in the group without needing to
5131-
// write each destination more than once.
5132-
for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
5133-
int DstVecIdx = DstIdx / ElemsPerVReg;
5134-
int DstSubIdx = DstIdx % ElemsPerVReg;
5135-
int SrcIdx = Mask[DstIdx];
5136-
if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
5137-
continue;
5138-
int SrcVecIdx = SrcIdx / ElemsPerVReg;
5139-
int SrcSubIdx = SrcIdx % ElemsPerVReg;
5140-
if (OutMasks[DstVecIdx].first == -1)
5141-
OutMasks[DstVecIdx].first = SrcVecIdx;
5142-
if (OutMasks[DstVecIdx].first != SrcVecIdx)
5143-
// Note: This case could easily be handled by keeping track of a chain
5144-
// of source values and generating two element shuffles below. This is
5145-
// less an implementation question, and more a profitability one.
5146-
return SDValue();
5147-
5148-
OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
5149-
OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
5150-
}
51515123

51525124
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
51535125
MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
51545126
MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
51555127
assert(M1VT == getLMUL1VT(M1VT));
51565128
unsigned NumOpElts = M1VT.getVectorMinNumElements();
5157-
SDValue Vec = DAG.getUNDEF(ContainerVT);
5129+
unsigned NumElts = ContainerVT.getVectorMinNumElements();
5130+
unsigned NumOfSrcRegs = NumElts / NumOpElts;
5131+
unsigned NumOfDestRegs = NumElts / NumOpElts;
51585132
// The following semantically builds up a fixed length concat_vector
51595133
// of the component shuffle_vectors. We eagerly lower to scalable here
51605134
// to avoid DAG combining it back to a large shuffle_vector again.
51615135
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
51625136
V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5163-
for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
5164-
auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
5165-
if (SrcVecIdx == -1)
5166-
continue;
5167-
unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
5168-
SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
5137+
SmallVector<SmallVector<std::tuple<unsigned, unsigned, SmallVector<int>>>>
5138+
Operands;
5139+
processShuffleMasks(
5140+
Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5141+
[&]() { Operands.emplace_back(); },
5142+
[&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5143+
Operands.emplace_back().emplace_back(
5144+
SrcVecIdx, UINT_MAX,
5145+
SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5146+
},
5147+
[&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5148+
if (NewReg)
5149+
Operands.emplace_back();
5150+
Operands.back().emplace_back(
5151+
Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5152+
});
5153+
assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5154+
// Note: check that we do not emit too many shuffles here to prevent code
5155+
// size explosion.
5156+
// TODO: investigate, if it can be improved by extra analysis of the masks to
5157+
// check if the code is more profitable.
5158+
unsigned NumShuffles = std::accumulate(
5159+
Operands.begin(), Operands.end(), 0u,
5160+
[&](unsigned N,
5161+
ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5162+
if (Data.empty())
5163+
return N;
5164+
N += Data.size();
5165+
for (const auto &P : Data) {
5166+
unsigned Idx2 = std::get<1>(P);
5167+
ArrayRef<int> Mask = std::get<2>(P);
5168+
if (Idx2 != UINT_MAX)
5169+
++N;
5170+
else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5171+
--N;
5172+
}
5173+
return N;
5174+
});
5175+
if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5176+
(NumOfDestRegs <= 2 && NumShuffles >= 4))
5177+
return SDValue();
5178+
auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
51695179
SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
51705180
DAG.getVectorIdxConstant(ExtractIdx, DL));
51715181
SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5172-
SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5173-
SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5174-
unsigned InsertIdx = DstVecIdx * NumOpElts;
5175-
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
5182+
return SubVec;
5183+
};
5184+
auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5185+
ArrayRef<int> Mask) {
5186+
SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5187+
return SubVec;
5188+
};
5189+
SDValue Vec = DAG.getUNDEF(ContainerVT);
5190+
for (auto [I, Data] : enumerate(Operands)) {
5191+
if (Data.empty())
5192+
continue;
5193+
SmallDenseMap<unsigned, SDValue, 4> Values;
5194+
for (unsigned I : seq<unsigned>(Data.size())) {
5195+
const auto &[Idx1, Idx2, _] = Data[I];
5196+
if (Values.contains(Idx1)) {
5197+
assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5198+
"Expected both indices to be extracted already.");
5199+
break;
5200+
}
5201+
SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5202+
(Idx1 % NumOfSrcRegs) * NumOpElts);
5203+
Values[Idx1] = V;
5204+
if (Idx2 != UINT_MAX)
5205+
Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5206+
(Idx2 % NumOfSrcRegs) * NumOpElts);
5207+
}
5208+
SDValue V;
5209+
for (const auto &[Idx1, Idx2, Mask] : Data) {
5210+
SDValue V1 = Values.at(Idx1);
5211+
SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5212+
V = PerformShuffle(V1, V2, Mask);
5213+
Values[Idx1] = V;
5214+
}
5215+
5216+
unsigned InsertIdx = I * NumOpElts;
5217+
V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5218+
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
51765219
DAG.getVectorIdxConstant(InsertIdx, DL));
51775220
}
51785221
return convertFromScalableVector(VT, Vec, DAG, Subtarget);

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,9 +1774,9 @@ InstructionCost X86TTIImpl::getShuffleCost(
17741774
PrevSrcReg = SrcReg;
17751775
PrevRegMask = RegMask;
17761776
},
1777-
[this, SingleOpTy, CostKind, &Cost](ArrayRef<int> RegMask,
1778-
unsigned /*Unused*/,
1779-
unsigned /*Unused*/) {
1777+
[this, SingleOpTy, CostKind,
1778+
&Cost](ArrayRef<int> RegMask, unsigned /*Unused*/,
1779+
unsigned /*Unused*/, bool /*Unused*/) {
17801780
Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, RegMask,
17811781
CostKind, 0, nullptr);
17821782
});

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll

Lines changed: 48 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,11 @@ define <4 x i64> @m2_splat_into_slide_two_source_v2_lo(<4 x i64> %v1, <4 x i64>
168168
define <4 x i64> @m2_splat_into_slide_two_source(<4 x i64> %v1, <4 x i64> %v2) vscale_range(2,2) {
169169
; CHECK-LABEL: m2_splat_into_slide_two_source:
170170
; CHECK: # %bb.0:
171-
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
172-
; CHECK-NEXT: vmv.v.i v0, 12
173-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
171+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
172+
; CHECK-NEXT: vslidedown.vi v13, v10, 1
173+
; CHECK-NEXT: vslideup.vi v13, v11, 1
174174
; CHECK-NEXT: vrgather.vi v12, v8, 0
175-
; CHECK-NEXT: vslideup.vi v12, v10, 1, v0.t
176-
; CHECK-NEXT: vmv.v.v v8, v12
175+
; CHECK-NEXT: vmv2r.v v8, v12
177176
; CHECK-NEXT: ret
178177
%res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 5, i32 6>
179178
ret <4 x i64> %res
@@ -183,18 +182,17 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
183182
; CHECK-LABEL: shuffle1:
184183
; CHECK: # %bb.0:
185184
; CHECK-NEXT: addi a0, a0, 252
185+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
186+
; CHECK-NEXT: vmv.v.i v8, 0
186187
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
187-
; CHECK-NEXT: vid.v v8
188+
; CHECK-NEXT: vid.v v10
188189
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
189-
; CHECK-NEXT: vle32.v v9, (a0)
190-
; CHECK-NEXT: li a0, 175
191-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
192-
; CHECK-NEXT: vsrl.vi v8, v8, 1
193-
; CHECK-NEXT: vmv.s.x v0, a0
194-
; CHECK-NEXT: vadd.vi v8, v8, 1
195-
; CHECK-NEXT: vrgather.vv v11, v9, v8
196-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
197-
; CHECK-NEXT: vmerge.vim v8, v10, 0, v0
190+
; CHECK-NEXT: vle32.v v11, (a0)
191+
; CHECK-NEXT: vmv.v.i v0, 5
192+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
193+
; CHECK-NEXT: vsrl.vi v10, v10, 1
194+
; CHECK-NEXT: vadd.vi v10, v10, 1
195+
; CHECK-NEXT: vrgather.vv v9, v11, v10, v0.t
198196
; CHECK-NEXT: addi a0, a1, 672
199197
; CHECK-NEXT: vs2r.v v8, (a0)
200198
; CHECK-NEXT: ret
@@ -211,15 +209,15 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
211209
define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) {
212210
; CHECK-LABEL: shuffle2:
213211
; CHECK: # %bb.0:
214-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215-
; CHECK-NEXT: vid.v v9
216-
; CHECK-NEXT: li a0, -97
217-
; CHECK-NEXT: vadd.vv v9, v9, v9
218-
; CHECK-NEXT: vrsub.vi v9, v9, 4
219-
; CHECK-NEXT: vmv.s.x v0, a0
220-
; CHECK-NEXT: vrgather.vv v13, v8, v9
221212
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
222-
; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
213+
; CHECK-NEXT: vmv1r.v v12, v8
214+
; CHECK-NEXT: vmv.v.i v8, 0
215+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
216+
; CHECK-NEXT: vid.v v13
217+
; CHECK-NEXT: vadd.vv v13, v13, v13
218+
; CHECK-NEXT: vmv.v.i v0, 6
219+
; CHECK-NEXT: vrsub.vi v13, v13, 4
220+
; CHECK-NEXT: vrgather.vv v9, v12, v13, v0.t
223221
; CHECK-NEXT: ret
224222
%b = extractelement <4 x float> %a, i32 2
225223
%c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5
@@ -231,16 +229,15 @@ define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) {
231229
define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vscale_range(2,2) {
232230
; RV32-LABEL: extract_any_extend_vector_inreg_v16i64:
233231
; RV32: # %bb.0:
234-
; RV32-NEXT: li a1, 16
235-
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
232+
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
236233
; RV32-NEXT: vmv.v.i v16, 0
237-
; RV32-NEXT: vmv.s.x v0, a1
234+
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu
235+
; RV32-NEXT: vmv.v.i v0, 1
238236
; RV32-NEXT: li a1, 32
239-
; RV32-NEXT: vrgather.vi v16, v8, 15, v0.t
240-
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
237+
; RV32-NEXT: vrgather.vi v18, v15, 1, v0.t
238+
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
241239
; RV32-NEXT: vslidedown.vx v8, v16, a0
242240
; RV32-NEXT: vmv.x.s a0, v8
243-
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
244241
; RV32-NEXT: vsrl.vx v8, v8, a1
245242
; RV32-NEXT: vmv.x.s a1, v8
246243
; RV32-NEXT: ret
@@ -258,13 +255,14 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
258255
; RV64-NEXT: addi s0, sp, 256
259256
; RV64-NEXT: .cfi_def_cfa s0, 0
260257
; RV64-NEXT: andi sp, sp, -128
261-
; RV64-NEXT: li a1, -17
258+
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
259+
; RV64-NEXT: vmv.v.i v0, 1
262260
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
263-
; RV64-NEXT: vmv.s.x v0, a1
264-
; RV64-NEXT: vrgather.vi v16, v8, 15
265-
; RV64-NEXT: vmerge.vim v8, v16, 0, v0
261+
; RV64-NEXT: vmv.v.i v16, 0
262+
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu
263+
; RV64-NEXT: vrgather.vi v18, v15, 1, v0.t
266264
; RV64-NEXT: mv s2, sp
267-
; RV64-NEXT: vs8r.v v8, (s2)
265+
; RV64-NEXT: vs8r.v v16, (s2)
268266
; RV64-NEXT: andi a0, a0, 15
269267
; RV64-NEXT: li a1, 8
270268
; RV64-NEXT: call __muldi3
@@ -290,21 +288,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca
290288
define <4 x double> @shuffles_add(<4 x double> %0, <4 x double> %1) vscale_range(2,2) {
291289
; CHECK-LABEL: shuffles_add:
292290
; CHECK: # %bb.0:
291+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
292+
; CHECK-NEXT: vmv1r.v v13, v10
293+
; CHECK-NEXT: vslideup.vi v13, v11, 1
294+
; CHECK-NEXT: vmv1r.v v8, v9
295+
; CHECK-NEXT: vmv.v.i v0, 1
296+
; CHECK-NEXT: vrgather.vi v12, v9, 0
297+
; CHECK-NEXT: vmv1r.v v9, v11
298+
; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t
293299
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
294-
; CHECK-NEXT: vrgather.vi v12, v8, 2
295-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
296-
; CHECK-NEXT: vid.v v14
297-
; CHECK-NEXT: vmv.v.i v0, 12
298-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
299-
; CHECK-NEXT: vrgather.vi v16, v8, 3
300-
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
301-
; CHECK-NEXT: vadd.vv v8, v14, v14
302-
; CHECK-NEXT: vadd.vi v9, v8, -4
303-
; CHECK-NEXT: vadd.vi v8, v8, -3
304-
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
305-
; CHECK-NEXT: vrgatherei16.vv v12, v10, v9, v0.t
306-
; CHECK-NEXT: vrgatherei16.vv v16, v10, v8, v0.t
307-
; CHECK-NEXT: vfadd.vv v8, v12, v16
300+
; CHECK-NEXT: vfadd.vv v8, v12, v8
308301
; CHECK-NEXT: ret
309302
%3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
310303
%4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
@@ -332,12 +325,13 @@ entry:
332325
define <16 x i32> @m4_linear_num_of_shuffles_in_chunks(<16 x i32> %0) vscale_range(2,2) {
333326
; CHECK-LABEL: m4_linear_num_of_shuffles_in_chunks:
334327
; CHECK: # %bb.0: # %entry
335-
; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
336-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI18_0)
337-
; CHECK-NEXT: vl2re16.v v16, (a0)
338-
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
339-
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
340-
; CHECK-NEXT: vmv.v.v v8, v12
328+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
329+
; CHECK-NEXT: vmv.v.i v0, 8
330+
; CHECK-NEXT: vrgather.vi v12, v10, 0
331+
; CHECK-NEXT: vrgather.vi v12, v11, 0, v0.t
332+
; CHECK-NEXT: vrgather.vi v14, v8, 2
333+
; CHECK-NEXT: vrgather.vi v15, v10, 3
334+
; CHECK-NEXT: vmv4r.v v8, v12
341335
; CHECK-NEXT: ret
342336
entry:
343337
%1 = shufflevector <16 x i32> %0, <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 8, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11, i32 poison>

0 commit comments

Comments
 (0)