Skip to content

[RegAlloc] Scale the spill weight by target factor #113675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/TargetRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,9 @@ class TargetRegisterInfo : public MCRegisterInfo {
/// Returns a -1 terminated array of pressure set IDs.
virtual const int *getRegUnitPressureSets(unsigned RegUnit) const = 0;

/// Get the scale factor of spill weight for this register class.
virtual float getSpillWeightScaleFactor(const TargetRegisterClass *RC) const;

/// Get a list of 'hint' registers that the register allocator should try
/// first when allocating a physical register for the virtual register
/// VirtReg. These registers are effectively moved to the front of the
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/CalcSpillWeights.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
TotalWeight *= 0.5F;

// Finally, we scale the weight by the scale factor of register class.
const TargetRegisterClass *RC = MRI.getRegClass(LI.reg());
TotalWeight *= TRI.getSpillWeightScaleFactor(RC);

if (IsLocalSplitArtifact)
return normalize(TotalWeight, Start->distance(*End), NumInstr);
return normalize(TotalWeight, LI.getSize(), NumInstr);
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/CodeGen/TargetRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,11 @@ bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg);
}

float TargetRegisterInfo::getSpillWeightScaleFactor(
const TargetRegisterClass *RC) const {
return 1.0;
}

// Compute target-independent register allocator hints to help eliminate copies.
bool TargetRegisterInfo::getRegAllocationHints(
Register VirtReg, ArrayRef<MCPhysReg> Order,
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,11 @@ RISCVRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const {
: 0;
}

float RISCVRegisterInfo::getSpillWeightScaleFactor(
const TargetRegisterClass *RC) const {
return getRegClassWeight(RC).RegWeight;
}

// Add two address hints to improve chances of being able to use a compressed
// instruction.
bool RISCVRegisterInfo::getRegAllocationHints(
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {

unsigned getRegisterCostTableIndex(const MachineFunction &MF) const override;

float getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override;

bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF, const VirtRegMap *VRM,
Expand Down
37 changes: 6 additions & 31 deletions llvm/test/CodeGen/RISCV/rvv/abs-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -561,19 +561,8 @@ declare <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64>, i1 immarg
define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_abs_nxv16i64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v7, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: sub a3, a0, a1
Expand All @@ -582,30 +571,16 @@ define <vscale x 16 x i64> @vp_abs_nxv16i64(<vscale x 16 x i64> %va, <vscale x 1
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vrsub.vi v8, v16, 0, v0.t
; CHECK-NEXT: vmax.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vrsub.vi v24, v16, 0, v0.t
; CHECK-NEXT: vmax.vv v16, v16, v24, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB46_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB46_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t
; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: vrsub.vi v24, v8, 0, v0.t
; CHECK-NEXT: vmax.vv v8, v8, v24, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x i64> @llvm.vp.abs.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x i64> %v
Expand Down
136 changes: 57 additions & 79 deletions llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2302,33 +2302,35 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v16, (a5), zero
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 4080
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: vand.vx v16, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be considered as a small regression because we have more reads of vscale (vlenb).

; RV32-NEXT: slli a5, a5, 3
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: slli a5, a5, 3
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
Expand All @@ -2342,7 +2344,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: vand.vx v24, v24, a3, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
Expand All @@ -2360,7 +2362,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a1
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
Expand Down Expand Up @@ -2687,33 +2689,35 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v16, (a5), zero
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 4080
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: vand.vx v16, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 3
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: slli a5, a5, 3
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
Expand All @@ -2727,7 +2731,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: vand.vx v24, v24, a3, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
Expand All @@ -2745,7 +2749,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a1
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
Expand Down Expand Up @@ -3052,19 +3056,8 @@ declare <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16>, <v
define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_bitreverse_nxv64i16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v7, v0
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: lui a1, 1
; CHECK-NEXT: lui a2, 3
Expand All @@ -3080,63 +3073,48 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16(<vscale x 64 x i16> %va, <vsc
; CHECK-NEXT: addi a2, a2, 819
; CHECK-NEXT: addi a1, a6, 1365
; CHECK-NEXT: vsetvli zero, a5, e16, m8, ta, ma
; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t
; CHECK-NEXT: vsrl.vi v24, v16, 8, v0.t
; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
; CHECK-NEXT: vor.vv v16, v16, v24, v0.t
; CHECK-NEXT: vsrl.vi v24, v16, 4, v0.t
; CHECK-NEXT: vand.vx v24, v24, a4, v0.t
; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t
; CHECK-NEXT: vor.vv v16, v24, v16, v0.t
; CHECK-NEXT: vsrl.vi v24, v16, 2, v0.t
; CHECK-NEXT: vand.vx v24, v24, a2, v0.t
; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t
; CHECK-NEXT: vor.vv v16, v24, v16, v0.t
; CHECK-NEXT: vsrl.vi v24, v16, 1, v0.t
; CHECK-NEXT: vand.vx v24, v24, a1, v0.t
; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a5, sp, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t
; CHECK-NEXT: vor.vv v16, v24, v16, v0.t
; CHECK-NEXT: bltu a0, a3, .LBB46_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a3
; CHECK-NEXT: .LBB46_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t
; CHECK-NEXT: vsrl.vi v24, v8, 8, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t
; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t
; CHECK-NEXT: vand.vx v16, v16, a4, v0.t
; CHECK-NEXT: vor.vv v8, v8, v24, v0.t
; CHECK-NEXT: vsrl.vi v24, v8, 4, v0.t
; CHECK-NEXT: vand.vx v24, v24, a4, v0.t
; CHECK-NEXT: vand.vx v8, v8, a4, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t
; CHECK-NEXT: vand.vx v16, v16, a2, v0.t
; CHECK-NEXT: vor.vv v8, v24, v8, v0.t
; CHECK-NEXT: vsrl.vi v24, v8, 2, v0.t
; CHECK-NEXT: vand.vx v24, v24, a2, v0.t
; CHECK-NEXT: vand.vx v8, v8, a2, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t
; CHECK-NEXT: vand.vx v16, v16, a1, v0.t
; CHECK-NEXT: vor.vv v8, v24, v8, v0.t
; CHECK-NEXT: vsrl.vi v24, v8, 1, v0.t
; CHECK-NEXT: vand.vx v24, v24, a1, v0.t
; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: vor.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16:
Expand Down
Loading
Loading