Skip to content

Commit 7149518

Browse files
committed
[RISCV] Use vsetvli instead of vlenb in Prologue/Epilogue
Currently, we use `csrr` with `vlenb` to obtain the `VLEN`, but this is not the only option. We can also use `vsetvli` with `e8`/`m1` to get `VLENMAX`, which is equal to the VLEN. This method is preferable on some microarchitectures and makes it easier to obtain values like `VLEN * 2`, `VLEN * 4`, or `VLEN * 8`, reducing the number of instructions needed to calculate VLEN multiples. However, this approach is *NOT* always interchangeable, as it changes the state of `VTYPE` and `VL`, which can alter the behavior of vector instructions, potentially causing incorrect code generation if applied after a vsetvli insertion. Therefore, we limit its use to the prologue/epilogue for now, as there are no vector operations within the prologue/epilogue sequence. With further analysis, we may extend this approach beyond the prologue/epilogue in the future, but starting here should be a good first step. This feature is gurded by the `+prefer-vsetvli-over-read-vlenb` feature, which is disabled by default for now.
1 parent 369c0a7 commit 7149518

13 files changed

+2082
-27
lines changed

llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
5656
MachineBasicBlock::iterator MBBI);
5757
bool expandRV32ZdinxLoad(MachineBasicBlock &MBB,
5858
MachineBasicBlock::iterator MBBI);
59+
bool expandPseudoReadMulVLENB(MachineBasicBlock &MBB,
60+
MachineBasicBlock::iterator MBBI);
5961
#ifndef NDEBUG
6062
unsigned getInstSizeInBytes(const MachineFunction &MF) const {
6163
unsigned Size = 0;
@@ -164,6 +166,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
164166
case RISCV::PseudoVMSET_M_B64:
165167
// vmset.m vd => vmxnor.mm vd, vd, vd
166168
return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM);
169+
case RISCV::PseudoReadMulVLENB:
170+
return expandPseudoReadMulVLENB(MBB, MBBI);
167171
}
168172

169173
return false;
@@ -415,6 +419,39 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
415419
return true;
416420
}
417421

422+
bool RISCVExpandPseudo::expandPseudoReadMulVLENB(
423+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
424+
DebugLoc DL = MBBI->getDebugLoc();
425+
Register Dst = MBBI->getOperand(0).getReg();
426+
unsigned Mul = MBBI->getOperand(1).getImm();
427+
RISCVVType::VLMUL VLMUL = RISCVVType::VLMUL::LMUL_1;
428+
switch (Mul) {
429+
case 1:
430+
VLMUL = RISCVVType::VLMUL::LMUL_1;
431+
break;
432+
case 2:
433+
VLMUL = RISCVVType::VLMUL::LMUL_2;
434+
break;
435+
case 4:
436+
VLMUL = RISCVVType::VLMUL::LMUL_4;
437+
break;
438+
case 8:
439+
VLMUL = RISCVVType::VLMUL::LMUL_8;
440+
break;
441+
default:
442+
llvm_unreachable("Unexpected VLENB value");
443+
}
444+
unsigned VTypeImm = RISCVVType::encodeVTYPE(
445+
VLMUL, /*SEW*/ 8, /*TailAgnostic*/ true, /*MaskAgnostic*/ true);
446+
447+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::VSETVLI), Dst)
448+
.addReg(RISCV::X0)
449+
.addImm(VTypeImm);
450+
451+
MBBI->eraseFromParent();
452+
return true;
453+
}
454+
418455
class RISCVPreRAExpandPseudo : public MachineFunctionPass {
419456
public:
420457
const RISCVSubtarget *STI;

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,12 @@ def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "V
15341534
def TuneVXRMPipelineFlush : SubtargetFeature<"vxrm-pipeline-flush", "HasVXRMPipelineFlush",
15351535
"true", "VXRM writes causes pipeline flush">;
15361536

1537+
def TunePreferVsetvliOverReadVLENB
1538+
: SubtargetFeature<"prefer-vsetvli-over-read-vlenb",
1539+
"PreferVsetvliOverReadVLENB",
1540+
"true",
1541+
"Prefer vsetvli over read vlenb CSR when calculate VLEN">;
1542+
15371543
// Assume that lock-free native-width atomics are available, even if the target
15381544
// and operating system combination would not usually provide them. The user
15391545
// is responsible for providing any necessary __sync implementations. Code

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,8 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
669669
// Simply allocate the stack if it's not big enough to require a probe.
670670
if (!NeedProbe || Offset <= ProbeSize) {
671671
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset),
672-
MachineInstr::FrameSetup, getStackAlign());
672+
MachineInstr::FrameSetup, getStackAlign(),
673+
/*IsPrologueOrEpilogue*/ true);
673674

674675
if (EmitCFI) {
675676
// Emit ".cfi_def_cfa_offset RealStackSize"
@@ -698,7 +699,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
698699
while (CurrentOffset + ProbeSize <= Offset) {
699700
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
700701
StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
701-
getStackAlign());
702+
getStackAlign(), /*IsPrologueOrEpilogue*/ true);
702703
// s[d|w] zero, 0(sp)
703704
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
704705
.addReg(RISCV::X0)
@@ -721,7 +722,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
721722
if (Residual) {
722723
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
723724
StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
724-
getStackAlign());
725+
getStackAlign(), /*IsPrologueOrEpilogue*/ true);
725726
if (EmitCFI) {
726727
// Emit ".cfi_def_cfa_offset Offset"
727728
unsigned CFIIndex =
@@ -752,7 +753,7 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
752753
// SUB TargetReg, SP, RoundedSize
753754
RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
754755
StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
755-
getStackAlign());
756+
getStackAlign(), /*IsPrologueOrEpilogue*/ true);
756757

757758
if (EmitCFI) {
758759
// Set the CFA register to TargetReg.
@@ -781,7 +782,8 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
781782

782783
if (Residual) {
783784
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
784-
MachineInstr::FrameSetup, getStackAlign());
785+
MachineInstr::FrameSetup, getStackAlign(),
786+
/*IsPrologueOrEpilogue*/ true);
785787
if (DynAllocation) {
786788
// s[d|w] zero, 0(sp)
787789
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
@@ -1014,7 +1016,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
10141016
RI->adjustReg(
10151017
MBB, MBBI, DL, FPReg, SPReg,
10161018
StackOffset::getFixed(RealStackSize - RVFI->getVarArgsSaveSize()),
1017-
MachineInstr::FrameSetup, getStackAlign());
1019+
MachineInstr::FrameSetup, getStackAlign(),
1020+
/*IsPrologueOrEpilogue*/ true);
10181021
}
10191022

10201023
// Emit ".cfi_def_cfa $fp, RVFI->getVarArgsSaveSize()"
@@ -1047,7 +1050,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
10471050
// updates.
10481051
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
10491052
StackOffset::getScalable(-RVVStackSize),
1050-
MachineInstr::FrameSetup, getStackAlign());
1053+
MachineInstr::FrameSetup, getStackAlign(),
1054+
/*IsPrologueOrEpilogue*/ true);
10511055
}
10521056

10531057
if (!hasFP(MF)) {
@@ -1125,7 +1129,8 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
11251129
const RISCVInstrInfo *TII = STI.getInstrInfo();
11261130

11271131
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
1128-
MachineInstr::FrameDestroy, getStackAlign());
1132+
MachineInstr::FrameDestroy, getStackAlign(),
1133+
/*IsPrologueOrEpilogue*/ true);
11291134
StackSize = 0;
11301135

11311136
unsigned CFIIndex =
@@ -1189,7 +1194,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
11891194
if (!RestoreSPFromFP)
11901195
RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, SPReg,
11911196
StackOffset::getScalable(RVVStackSize),
1192-
MachineInstr::FrameDestroy, getStackAlign());
1197+
MachineInstr::FrameDestroy, getStackAlign(),
1198+
/*IsPrologueOrEpilogue*/ true);
11931199

11941200
if (!hasFP(MF)) {
11951201
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
@@ -1214,7 +1220,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
12141220
if (!RestoreSPFromFP)
12151221
RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, SPReg,
12161222
StackOffset::getFixed(SecondSPAdjustAmount),
1217-
MachineInstr::FrameDestroy, getStackAlign());
1223+
MachineInstr::FrameDestroy, getStackAlign(),
1224+
/*IsPrologueOrEpilogue*/ true);
12181225

12191226
if (!hasFP(MF)) {
12201227
unsigned CFIIndex = MF.addFrameInst(
@@ -1240,7 +1247,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
12401247
assert(hasFP(MF) && "frame pointer should not have been eliminated");
12411248
RI->adjustReg(MBB, FirstScalarCSRRestoreInsn, DL, SPReg, FPReg,
12421249
StackOffset::getFixed(-FPOffset), MachineInstr::FrameDestroy,
1243-
getStackAlign());
1250+
getStackAlign(), /*IsPrologueOrEpilogue*/ true);
12441251
}
12451252

12461253
if (hasFP(MF)) {
@@ -1771,7 +1778,8 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
17711778

17721779
const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
17731780
RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
1774-
MachineInstr::NoFlags, getStackAlign());
1781+
MachineInstr::NoFlags, getStackAlign(),
1782+
/*IsPrologueOrEpilogue*/ true);
17751783
}
17761784
}
17771785

@@ -2195,6 +2203,17 @@ bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
21952203
const MachineFunction *MF = MBB.getParent();
21962204
const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
21972205

2206+
// Make sure VTYPE and VL are not live-in since we will use vsetvli in the
2207+
// prologue to get the VLEN, and that will clobber these registers.
2208+
//
2209+
// We may do also check the stack has contain for the object with the
2210+
// scalable vector type, but this will require iterating over all the stack
2211+
// objects, but this may not worth since the situation is rare, we could do
2212+
// further check in future if we find it is necessary.
2213+
if (STI.preferVsetvliOverReadVLENB() &&
2214+
(MBB.isLiveIn(RISCV::VTYPE) || MBB.isLiveIn(RISCV::VL)))
2215+
return false;
2216+
21982217
if (!RVFI->useSaveRestoreLibCalls(*MF))
21992218
return true;
22002219

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6049,6 +6049,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
60496049
[(set GPR:$rd, (riscv_read_vlenb))]>,
60506050
PseudoInstExpansion<(CSRRS GPR:$rd, SysRegVLENB.Encoding, X0)>,
60516051
Sched<[WriteRdVLENB]>;
6052+
let Defs = [VL, VTYPE] in {
6053+
def PseudoReadMulVLENB : Pseudo<(outs GPR:$rd), (ins uimm5:$shamt),
6054+
[]>,
6055+
Sched<[WriteVSETVLI, ReadVSETVLI]>;
6056+
}
60526057
}
60536058

60546059
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1,

llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
179179
const DebugLoc &DL, Register DestReg,
180180
Register SrcReg, StackOffset Offset,
181181
MachineInstr::MIFlag Flag,
182-
MaybeAlign RequiredAlign) const {
182+
MaybeAlign RequiredAlign,
183+
bool IsPrologueOrEpilogue) const {
183184

184185
if (DestReg == SrcReg && !Offset.getFixed() && !Offset.getScalable())
185186
return;
@@ -226,21 +227,44 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
226227
assert(isInt<32>(ScalableValue / (RISCV::RVVBitsPerBlock / 8)) &&
227228
"Expect the number of vector registers within 32-bits.");
228229
uint32_t NumOfVReg = ScalableValue / (RISCV::RVVBitsPerBlock / 8);
229-
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
230-
.setMIFlag(Flag);
231-
232-
if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
233-
(NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
234-
unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD :
235-
(NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
236-
BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
237-
.addReg(ScratchReg, RegState::Kill).addReg(SrcReg)
230+
// Only use vsetvli rather than vlenb if adjusting in the prologue or
231+
// epilogue, otherwise it may distrube the VTYPE and VL status.
232+
bool UseVsetvliRatherThanVlenb =
233+
IsPrologueOrEpilogue && ST.preferVsetvliOverReadVLENB();
234+
if (UseVsetvliRatherThanVlenb && (NumOfVReg == 1 || NumOfVReg == 2 ||
235+
NumOfVReg == 4 || NumOfVReg == 8)) {
236+
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadMulVLENB), ScratchReg)
237+
.addImm(NumOfVReg)
238238
.setMIFlag(Flag);
239-
} else {
240-
TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
241239
BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
242-
.addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
240+
.addReg(SrcReg)
241+
.addReg(ScratchReg, RegState::Kill)
243242
.setMIFlag(Flag);
243+
} else {
244+
if (UseVsetvliRatherThanVlenb)
245+
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadMulVLENB), ScratchReg)
246+
.addImm(1)
247+
.setMIFlag(Flag);
248+
else
249+
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
250+
.setMIFlag(Flag);
251+
252+
if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
253+
(NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
254+
unsigned Opc = NumOfVReg == 2
255+
? RISCV::SH1ADD
256+
: (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
257+
BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
258+
.addReg(ScratchReg, RegState::Kill)
259+
.addReg(SrcReg)
260+
.setMIFlag(Flag);
261+
} else {
262+
TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
263+
BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
264+
.addReg(SrcReg)
265+
.addReg(ScratchReg, RegState::Kill)
266+
.setMIFlag(Flag);
267+
}
244268
}
245269
SrcReg = DestReg;
246270
KillSrcReg = true;
@@ -533,7 +557,8 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
533557
else
534558
DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
535559
adjustReg(*II->getParent(), II, DL, DestReg, FrameReg, Offset,
536-
MachineInstr::NoFlags, std::nullopt);
560+
MachineInstr::NoFlags, std::nullopt,
561+
/*IsPrologueOrEpilogue*/ false);
537562
MI.getOperand(FIOperandNum).ChangeToRegister(DestReg, /*IsDef*/false,
538563
/*IsImp*/false,
539564
/*IsKill*/true);

llvm/lib/Target/RISCV/RISCVRegisterInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,12 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
7575
// used during frame layout, and we may need to ensure that if we
7676
// split the offset internally that the DestReg is always aligned,
7777
// assuming that source reg was.
78+
// If IsPrologueOrEpilogue is true, the function is called during prologue
79+
// or epilogue generation.
7880
void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator II,
7981
const DebugLoc &DL, Register DestReg, Register SrcReg,
8082
StackOffset Offset, MachineInstr::MIFlag Flag,
81-
MaybeAlign RequiredAlign) const;
83+
MaybeAlign RequiredAlign, bool IsPrologueOrEpilogue) const;
8284

8385
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
8486
unsigned FIOperandNum,

0 commit comments

Comments
 (0)