Skip to content

[RISCV] Add stack clash vector support #119458

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 121 additions & 23 deletions llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,54 @@ getPushOrLibCallsSavedInfo(const MachineFunction &MF,
return PushOrLibCallsCSI;
}

void RISCVFrameLowering::allocateAndProbeStackForRVV(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount,
MachineInstr::MIFlag Flag, bool EmitCFI) const {
assert(Amount != 0 && "Did not need to adjust stack pointer for RVV.");

// Emit a variable-length allocation probing loop.

// Get VLEN in TargetReg
const RISCVInstrInfo *TII = STI.getInstrInfo();
Register TargetReg = RISCV::X6;
uint32_t NumOfVReg = Amount / (RISCV::RVVBitsPerBlock / 8);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoReadVLENB), TargetReg)
.setMIFlag(Flag);
TII->mulImm(MF, MBB, MBBI, DL, TargetReg, NumOfVReg, Flag);

if (EmitCFI) {
// Set the CFA register to TargetReg.
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, -Amount));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}

// It will be expanded to a probe loop in `inlineStackProbe`.
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC_RVV))
.addReg(SPReg)
.addReg(TargetReg);

if (EmitCFI) {
// Set the CFA register back to SP.
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}

// SUB SP, SP, T1
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SUB), SPReg)
.addReg(SPReg)
.addReg(TargetReg)
.setMIFlag(Flag);
}

static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
SmallVectorImpl<char> &Expr,
int FixedOffset, int ScalableOffset,
Expand Down Expand Up @@ -857,10 +905,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}

uint64_t SecondSPAdjustAmount = 0;
// Emit the second SP adjustment after saving callee saved registers.
if (FirstSPAdjustAmount) {
uint64_t SecondSPAdjustAmount =
getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
SecondSPAdjustAmount = getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
assert(SecondSPAdjustAmount > 0 &&
"SecondSPAdjustAmount should be greater than zero");

Expand All @@ -870,11 +918,16 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
}

if (RVVStackSize) {
// We must keep the stack pointer aligned through any intermediate
// updates.
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
StackOffset::getScalable(-RVVStackSize),
MachineInstr::FrameSetup, getStackAlign());
if (NeedProbe) {
allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
MachineInstr::FrameSetup, !hasFP(MF));
} else {
// We must keep the stack pointer aligned through any intermediate
// updates.
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
StackOffset::getScalable(-RVVStackSize),
MachineInstr::FrameSetup, getStackAlign());
}

if (!hasFP(MF)) {
// Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
Expand Down Expand Up @@ -914,6 +967,19 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
.addImm(ShiftAmount)
.setMIFlag(MachineInstr::FrameSetup);
}
if (NeedProbe && RVVStackSize == 0) {
// Do a probe if the align + size allocated just passed the probe size
// and was not yet probed.
if (SecondSPAdjustAmount < ProbeSize &&
SecondSPAdjustAmount + MaxAlignment.value() >= ProbeSize) {
bool IsRV64 = STI.is64Bit();
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
.addReg(RISCV::X0)
.addReg(SPReg)
.addImm(0)
.setMIFlags(MachineInstr::FrameSetup);
}
}
// FP will be used to restore the frame in the epilogue, so we need
// another base register BP to record SP after re-alignment. SP will
// track the current stack after allocating variable sized objects.
Expand Down Expand Up @@ -2017,8 +2083,9 @@ TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {

// Synthesize the probe loop.
static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL) {
MachineBasicBlock::iterator MBBI, DebugLoc DL,
Register TargetReg, bool IsRVV) {
assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");

auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
Expand All @@ -2034,7 +2101,6 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
MF.insert(MBBInsertPoint, ExitMBB);
MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
Register TargetReg = RISCV::X6;
Register ScratchReg = RISCV::X7;

// ScratchReg = ProbeSize
Expand All @@ -2055,12 +2121,29 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
.addImm(0)
.setMIFlags(Flags);

// BNE SP, TargetReg, LoopTest
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
.addReg(SPReg)
.addReg(TargetReg)
.addMBB(LoopTestMBB)
.setMIFlags(Flags);
if (IsRVV) {
// SUB TargetReg, TargetReg, ProbeSize
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB),
TargetReg)
.addReg(TargetReg)
.addReg(ScratchReg)
.setMIFlags(Flags);

// BGE TargetReg, ProbeSize, LoopTest
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BGE))
.addReg(TargetReg)
.addReg(ScratchReg)
.addMBB(LoopTestMBB)
.setMIFlags(Flags);

} else {
// BNE SP, TargetReg, LoopTest
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
.addReg(SPReg)
.addReg(TargetReg)
.addMBB(LoopTestMBB)
.setMIFlags(Flags);
}

ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());

Expand All @@ -2073,12 +2156,27 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,

void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &MBB) const {
auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
});
if (Where != MBB.end()) {
DebugLoc DL = MBB.findDebugLoc(Where);
emitStackProbeInline(MF, MBB, Where, DL);
Where->eraseFromParent();
// Get the instructions that need to be replaced. We emit at most two of
// these. Remember them in order to avoid complications coming from the need
// to traverse the block while potentially creating more blocks.
SmallVector<MachineInstr *, 4> ToReplace;
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
if (Opc == RISCV::PROBED_STACKALLOC ||
Opc == RISCV::PROBED_STACKALLOC_RVV) {
ToReplace.push_back(&MI);
}
}

for (MachineInstr *MI : ToReplace) {
if (MI->getOpcode() == RISCV::PROBED_STACKALLOC ||
MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV) {
MachineBasicBlock::iterator MBBI = MI->getIterator();
DebugLoc DL = MBB.findDebugLoc(MBBI);
Register TargetReg = MI->getOperand(1).getReg();
emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg,
(MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
MBBI->eraseFromParent();
}
}
}
5 changes: 5 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ class RISCVFrameLowering : public TargetFrameLowering {
// Replace a StackProbe stub (if any) with the actual probe code inline
void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologueMBB) const override;
void allocateAndProbeStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, int64_t Amount,
MachineInstr::MIFlag Flag,
bool EmitCFI) const;
};
} // namespace llvm
#endif
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1382,6 +1382,10 @@ def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
(ins GPR:$scratch),
[]>,
Sched<[]>;
def PROBED_STACKALLOC_RVV : Pseudo<(outs GPR:$sp),
(ins GPR:$scratch),
[]>,
Sched<[]>;
}

/// HI and ADD_LO address nodes.
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,49 @@ define <vscale x 1 x i64> @access_fixed_and_vector_objects(ptr %val) {

ret <vscale x 1 x i64> %a
}

define <vscale x 1 x i64> @probe_fixed_and_vector_objects(ptr %val, <vscale x 1 x i64> %dummy) "probe-stack"="inline-asm" {
; RV64IV-LABEL: probe_fixed_and_vector_objects:
; RV64IV: # %bb.0:
; RV64IV-NEXT: addi sp, sp, -528
; RV64IV-NEXT: .cfi_def_cfa_offset 528
; RV64IV-NEXT: csrr t1, vlenb
; RV64IV-NEXT: .cfi_def_cfa t1, -8
; RV64IV-NEXT: lui t2, 1
; RV64IV-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; RV64IV-NEXT: sub sp, sp, t2
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: sub t1, t1, t2
; RV64IV-NEXT: bge t1, t2, .LBB2_1
; RV64IV-NEXT: # %bb.2:
; RV64IV-NEXT: .cfi_def_cfa_register sp
; RV64IV-NEXT: sub sp, sp, t1
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 1 * vlenb
; RV64IV-NEXT: addi a0, sp, 8
; RV64IV-NEXT: vl1re64.v v9, (a0)
; RV64IV-NEXT: addi a0, sp, 528
; RV64IV-NEXT: vl1re64.v v10, (a0)
; RV64IV-NEXT: ld a0, 520(sp)
; RV64IV-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; RV64IV-NEXT: vadd.vv v8, v9, v10
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: .cfi_def_cfa sp, 528
; RV64IV-NEXT: addi sp, sp, 528
; RV64IV-NEXT: .cfi_def_cfa_offset 0
; RV64IV-NEXT: ret
%local = alloca i64
%vector = alloca <vscale x 1 x i64>
%array = alloca [64 x i64]
%v1 = load <vscale x 1 x i64>, ptr %array
%v2 = load <vscale x 1 x i64>, ptr %vector
%len = load i64, ptr %local

%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(
<vscale x 1 x i64> %dummy,
<vscale x 1 x i64> %v1,
<vscale x 1 x i64> %v2,
i64 %len)

ret <vscale x 1 x i64> %a
}
Loading
Loading