Skip to content

Commit c74ba57

Browse files
authored
[RISCV] Convert AVLs with vlenb to VLMAX where possible (#97800)
Given an AVL that's computed from vlenb, if it's equal to VLMAX then we can replace it with the VLMAX sentinel value. The main motiviation is to be able to express an EVL of VLMAX in VP intrinsics whilst emitting vsetvli a0, zero, so that we can replace llvm.riscv.masked.strided.{load,store} with their VP counterparts. This is done in RISCVVectorPeephole (previously RISCVFoldMasks, renamed to account for the fact that it no longer just folds masks) instead of SelectionDAG since there are multiple places places where VP nodes are lowered that would have need to have been handled. This also avoids doing it in RISCVInsertVSETVLI as it's much harder to lookup the value of the AVL, and in RISCVVectorPeephole we can take advantage of DeadMachineInstrElim to remove any leftover PseudoReadVLENBs.
1 parent 7eae9bb commit c74ba57

File tree

12 files changed

+170
-68
lines changed

12 files changed

+170
-68
lines changed

llvm/lib/Target/RISCV/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ add_llvm_target(RISCVCodeGen
3434
RISCVMakeCompressible.cpp
3535
RISCVExpandAtomicPseudoInsts.cpp
3636
RISCVExpandPseudoInsts.cpp
37-
RISCVFoldMasks.cpp
3837
RISCVFrameLowering.cpp
3938
RISCVGatherScatterLowering.cpp
4039
RISCVInsertVSETVLI.cpp
@@ -55,6 +54,7 @@ add_llvm_target(RISCVCodeGen
5554
RISCVTargetMachine.cpp
5655
RISCVTargetObjectFile.cpp
5756
RISCVTargetTransformInfo.cpp
57+
RISCVVectorPeephole.cpp
5858
GISel/RISCVCallLowering.cpp
5959
GISel/RISCVInstructionSelector.cpp
6060
GISel/RISCVLegalizerInfo.cpp

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
4040
FunctionPass *createRISCVGatherScatterLoweringPass();
4141
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
4242

43-
FunctionPass *createRISCVFoldMasksPass();
44-
void initializeRISCVFoldMasksPass(PassRegistry &);
43+
FunctionPass *createRISCVVectorPeepholePass();
44+
void initializeRISCVVectorPeepholePass(PassRegistry &);
4545

4646
FunctionPass *createRISCVOptWInstrsPass();
4747
void initializeRISCVOptWInstrsPass(PassRegistry &);

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
121121
initializeRISCVOptWInstrsPass(*PR);
122122
initializeRISCVPreRAExpandPseudoPass(*PR);
123123
initializeRISCVExpandPseudoPass(*PR);
124-
initializeRISCVFoldMasksPass(*PR);
124+
initializeRISCVVectorPeepholePass(*PR);
125125
initializeRISCVInsertVSETVLIPass(*PR);
126126
initializeRISCVInsertReadWriteCSRPass(*PR);
127127
initializeRISCVInsertWriteVXRMPass(*PR);
@@ -532,7 +532,7 @@ void RISCVPassConfig::addPreEmitPass2() {
532532
}
533533

534534
void RISCVPassConfig::addMachineSSAOptimization() {
535-
addPass(createRISCVFoldMasksPass());
535+
addPass(createRISCVVectorPeepholePass());
536536

537537
TargetPassConfig::addMachineSSAOptimization();
538538

llvm/lib/Target/RISCV/RISCVFoldMasks.cpp renamed to llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 85 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,31 @@
1-
//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===//
1+
//===- RISCVVectorPeephole.cpp - MI Vector Pseudo Peepholes ---------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
7-
//===---------------------------------------------------------------------===//
7+
//===----------------------------------------------------------------------===//
88
//
9-
// This pass performs various peephole optimisations that fold masks into vector
10-
// pseudo instructions after instruction selection.
9+
// This pass performs various vector pseudo peephole optimisations after
10+
// instruction selection.
1111
//
12-
// Currently it converts
12+
// Currently it converts vmerge.vvm to vmv.v.v
1313
// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
1414
// ->
1515
// PseudoVMV_V_V %false, %true, %vl, %sew
1616
//
17-
//===---------------------------------------------------------------------===//
17+
// And masked pseudos to unmasked pseudos
18+
// PseudoVADD_V_V_MASK %passthru, %a, %b, %allonesmask, %vl, sew, policy
19+
// ->
20+
// PseudoVADD_V_V %passthru %a, %b, %vl, sew, policy
21+
//
22+
// It also converts AVLs to VLMAX where possible
23+
// %vl = VLENB * something
24+
// PseudoVADD_V_V %passthru, %a, %b, %vl, sew, policy
25+
// ->
26+
// PseudoVADD_V_V %passthru, %a, %b, -1, sew, policy
27+
//
28+
//===----------------------------------------------------------------------===//
1829

1930
#include "RISCV.h"
2031
#include "RISCVISelDAGToDAG.h"
@@ -26,17 +37,17 @@
2637

2738
using namespace llvm;
2839

29-
#define DEBUG_TYPE "riscv-fold-masks"
40+
#define DEBUG_TYPE "riscv-vector-peephole"
3041

3142
namespace {
3243

33-
class RISCVFoldMasks : public MachineFunctionPass {
44+
class RISCVVectorPeephole : public MachineFunctionPass {
3445
public:
3546
static char ID;
3647
const TargetInstrInfo *TII;
3748
MachineRegisterInfo *MRI;
3849
const TargetRegisterInfo *TRI;
39-
RISCVFoldMasks() : MachineFunctionPass(ID) {}
50+
RISCVVectorPeephole() : MachineFunctionPass(ID) {}
4051

4152
bool runOnMachineFunction(MachineFunction &MF) override;
4253
MachineFunctionProperties getRequiredProperties() const override {
@@ -47,6 +58,7 @@ class RISCVFoldMasks : public MachineFunctionPass {
4758
StringRef getPassName() const override { return "RISC-V Fold Masks"; }
4859

4960
private:
61+
bool convertToVLMAX(MachineInstr &MI) const;
5062
bool convertToUnmasked(MachineInstr &MI) const;
5163
bool convertVMergeToVMv(MachineInstr &MI) const;
5264

@@ -58,11 +70,65 @@ class RISCVFoldMasks : public MachineFunctionPass {
5870

5971
} // namespace
6072

61-
char RISCVFoldMasks::ID = 0;
73+
char RISCVVectorPeephole::ID = 0;
74+
75+
INITIALIZE_PASS(RISCVVectorPeephole, DEBUG_TYPE, "RISC-V Fold Masks", false,
76+
false)
77+
78+
// If an AVL is a VLENB that's possibly scaled to be equal to VLMAX, convert it
79+
// to the VLMAX sentinel value.
80+
bool RISCVVectorPeephole::convertToVLMAX(MachineInstr &MI) const {
81+
if (!RISCVII::hasVLOp(MI.getDesc().TSFlags) ||
82+
!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
83+
return false;
84+
MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
85+
if (!VL.isReg())
86+
return false;
87+
MachineInstr *Def = MRI->getVRegDef(VL.getReg());
88+
if (!Def)
89+
return false;
90+
91+
// Fixed-point value, denominator=8
92+
uint64_t ScaleFixed = 8;
93+
// Check if the VLENB was potentially scaled with slli/srli
94+
if (Def->getOpcode() == RISCV::SLLI) {
95+
assert(Def->getOperand(2).getImm() < 64);
96+
ScaleFixed <<= Def->getOperand(2).getImm();
97+
Def = MRI->getVRegDef(Def->getOperand(1).getReg());
98+
} else if (Def->getOpcode() == RISCV::SRLI) {
99+
assert(Def->getOperand(2).getImm() < 64);
100+
ScaleFixed >>= Def->getOperand(2).getImm();
101+
Def = MRI->getVRegDef(Def->getOperand(1).getReg());
102+
}
103+
104+
if (!Def || Def->getOpcode() != RISCV::PseudoReadVLENB)
105+
return false;
106+
107+
auto LMUL = RISCVVType::decodeVLMUL(RISCVII::getLMul(MI.getDesc().TSFlags));
108+
// Fixed-point value, denominator=8
109+
unsigned LMULFixed = LMUL.second ? (8 / LMUL.first) : 8 * LMUL.first;
110+
unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
111+
// A Log2SEW of 0 is an operation on mask registers only
112+
unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
113+
assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
114+
assert(8 * LMULFixed / SEW > 0);
62115

63-
INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)
116+
// AVL = (VLENB * Scale)
117+
//
118+
// VLMAX = (VLENB * 8 * LMUL) / SEW
119+
//
120+
// AVL == VLMAX
121+
// -> VLENB * Scale == (VLENB * 8 * LMUL) / SEW
122+
// -> Scale == (8 * LMUL) / SEW
123+
if (ScaleFixed != 8 * LMULFixed / SEW)
124+
return false;
64125

65-
bool RISCVFoldMasks::isAllOnesMask(const MachineInstr *MaskDef) const {
126+
VL.ChangeToImmediate(RISCV::VLMaxSentinel);
127+
128+
return true;
129+
}
130+
131+
bool RISCVVectorPeephole::isAllOnesMask(const MachineInstr *MaskDef) const {
66132
assert(MaskDef && MaskDef->isCopy() &&
67133
MaskDef->getOperand(0).getReg() == RISCV::V0);
68134
Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI);
@@ -91,7 +157,7 @@ bool RISCVFoldMasks::isAllOnesMask(const MachineInstr *MaskDef) const {
91157

92158
// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
93159
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
94-
bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI) const {
160+
bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
95161
#define CASE_VMERGE_TO_VMV(lmul) \
96162
case RISCV::PseudoVMERGE_VVM_##lmul: \
97163
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
@@ -134,7 +200,7 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI) const {
134200
return true;
135201
}
136202

137-
bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI) const {
203+
bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
138204
const RISCV::RISCVMaskedPseudoInfo *I =
139205
RISCV::getMaskedPseudoInfo(MI.getOpcode());
140206
if (!I)
@@ -178,7 +244,7 @@ bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI) const {
178244
return true;
179245
}
180246

181-
bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
247+
bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
182248
if (skipFunction(MF.getFunction()))
183249
return false;
184250

@@ -213,6 +279,7 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
213279

214280
for (MachineBasicBlock &MBB : MF) {
215281
for (MachineInstr &MI : MBB) {
282+
Changed |= convertToVLMAX(MI);
216283
Changed |= convertToUnmasked(MI);
217284
Changed |= convertVMergeToVMv(MI);
218285
}
@@ -221,4 +288,6 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
221288
return Changed;
222289
}
223290

224-
FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); }
291+
FunctionPass *llvm::createRISCVVectorPeepholePass() {
292+
return new RISCVVectorPeephole();
293+
}

llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,9 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vsc
305305
; CHECK: # %bb.0:
306306
; CHECK-NEXT: csrr a0, vlenb
307307
; CHECK-NEXT: srli a1, a0, 3
308-
; CHECK-NEXT: sub a1, a0, a1
309-
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
310-
; CHECK-NEXT: vslideup.vx v8, v10, a1
308+
; CHECK-NEXT: sub a0, a0, a1
309+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
310+
; CHECK-NEXT: vslideup.vx v8, v10, a0
311311
; CHECK-NEXT: ret
312312
%v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
313313
ret <vscale x 16 x i8> %v
@@ -318,9 +318,9 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vs
318318
; CHECK: # %bb.0:
319319
; CHECK-NEXT: csrr a0, vlenb
320320
; CHECK-NEXT: srli a1, a0, 3
321-
; CHECK-NEXT: sub a1, a0, a1
322-
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
323-
; CHECK-NEXT: vslideup.vx v9, v10, a1
321+
; CHECK-NEXT: sub a0, a0, a1
322+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
323+
; CHECK-NEXT: vslideup.vx v9, v10, a0
324324
; CHECK-NEXT: ret
325325
%v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
326326
ret <vscale x 16 x i8> %v

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2-
# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \
2+
# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-vector-peephole \
33
# RUN: -verify-machineinstrs | FileCheck %s
44

55
---

llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,20 +1436,17 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <v
14361436
define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) {
14371437
; RV32-LABEL: vadd_vi_nxv32i32_evl_nx16:
14381438
; RV32: # %bb.0:
1439-
; RV32-NEXT: csrr a0, vlenb
1440-
; RV32-NEXT: slli a0, a0, 1
1441-
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1439+
; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
14421440
; RV32-NEXT: vadd.vi v8, v8, -1, v0.t
14431441
; RV32-NEXT: ret
14441442
;
14451443
; RV64-LABEL: vadd_vi_nxv32i32_evl_nx16:
14461444
; RV64: # %bb.0:
14471445
; RV64-NEXT: csrr a0, vlenb
1448-
; RV64-NEXT: srli a1, a0, 2
1449-
; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
1450-
; RV64-NEXT: vslidedown.vx v24, v0, a1
1451-
; RV64-NEXT: slli a0, a0, 1
1452-
; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1446+
; RV64-NEXT: srli a0, a0, 2
1447+
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
1448+
; RV64-NEXT: vslidedown.vx v24, v0, a0
1449+
; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
14531450
; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
14541451
; RV64-NEXT: vmv1r.v v0, v24
14551452
; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
3+
4+
define <vscale x 1 x i1> @sew1_srli(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b) {
5+
; CHECK-LABEL: sew1_srli:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
8+
; CHECK-NEXT: vmand.mm v0, v0, v8
9+
; CHECK-NEXT: ret
10+
%vlmax = call i32 @llvm.vscale()
11+
%x = call <vscale x 1 x i1> @llvm.vp.and.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> splat (i1 true), i32 %vlmax)
12+
ret <vscale x 1 x i1> %x
13+
}
14+
15+
define <vscale x 1 x i64> @sew64_srli(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b) {
16+
; CHECK-LABEL: sew64_srli:
17+
; CHECK: # %bb.0:
18+
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
19+
; CHECK-NEXT: vadd.vv v8, v8, v9
20+
; CHECK-NEXT: ret
21+
%vlmax = call i32 @llvm.vscale()
22+
%x = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 true), i32 %vlmax)
23+
ret <vscale x 1 x i64> %x
24+
}
25+
26+
define <vscale x 8 x i64> @sew64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) {
27+
; CHECK-LABEL: sew64:
28+
; CHECK: # %bb.0:
29+
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
30+
; CHECK-NEXT: vadd.vv v8, v8, v16
31+
; CHECK-NEXT: ret
32+
%vscale = call i32 @llvm.vscale()
33+
%vlmax = shl i32 %vscale, 3
34+
%x = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 true), i32 %vlmax)
35+
ret <vscale x 8 x i64> %x
36+
}
37+
38+
define <vscale x 16 x i32> @sew32_sll(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b) {
39+
; CHECK-LABEL: sew32_sll:
40+
; CHECK: # %bb.0:
41+
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
42+
; CHECK-NEXT: vadd.vv v8, v8, v16
43+
; CHECK-NEXT: ret
44+
%vscale = call i32 @llvm.vscale()
45+
%vlmax = shl i32 %vscale, 4
46+
%x = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 true), i32 %vlmax)
47+
ret <vscale x 16 x i32> %x
48+
}

llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,20 +1073,17 @@ define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i3
10731073
define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
10741074
; RV32-LABEL: vmax_vx_nxv32i32_evl_nx16:
10751075
; RV32: # %bb.0:
1076-
; RV32-NEXT: csrr a1, vlenb
1077-
; RV32-NEXT: slli a1, a1, 1
1078-
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1076+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
10791077
; RV32-NEXT: vmax.vx v8, v8, a0, v0.t
10801078
; RV32-NEXT: ret
10811079
;
10821080
; RV64-LABEL: vmax_vx_nxv32i32_evl_nx16:
10831081
; RV64: # %bb.0:
10841082
; RV64-NEXT: csrr a1, vlenb
1085-
; RV64-NEXT: srli a2, a1, 2
1086-
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
1087-
; RV64-NEXT: vslidedown.vx v24, v0, a2
1088-
; RV64-NEXT: slli a1, a1, 1
1089-
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1083+
; RV64-NEXT: srli a1, a1, 2
1084+
; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
1085+
; RV64-NEXT: vslidedown.vx v24, v0, a1
1086+
; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
10901087
; RV64-NEXT: vmax.vx v8, v8, a0, v0.t
10911088
; RV64-NEXT: vmv1r.v v0, v24
10921089
; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma

llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,20 +1072,17 @@ define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i
10721072
define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx16(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
10731073
; RV32-LABEL: vmaxu_vx_nxv32i32_evl_nx16:
10741074
; RV32: # %bb.0:
1075-
; RV32-NEXT: csrr a1, vlenb
1076-
; RV32-NEXT: slli a1, a1, 1
1077-
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1075+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
10781076
; RV32-NEXT: vmaxu.vx v8, v8, a0, v0.t
10791077
; RV32-NEXT: ret
10801078
;
10811079
; RV64-LABEL: vmaxu_vx_nxv32i32_evl_nx16:
10821080
; RV64: # %bb.0:
10831081
; RV64-NEXT: csrr a1, vlenb
1084-
; RV64-NEXT: srli a2, a1, 2
1085-
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
1086-
; RV64-NEXT: vslidedown.vx v24, v0, a2
1087-
; RV64-NEXT: slli a1, a1, 1
1088-
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1082+
; RV64-NEXT: srli a1, a1, 2
1083+
; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
1084+
; RV64-NEXT: vslidedown.vx v24, v0, a1
1085+
; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
10891086
; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t
10901087
; RV64-NEXT: vmv1r.v v0, v24
10911088
; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma

0 commit comments

Comments
 (0)