Skip to content

Commit 6f53886

Browse files
authored
[RISCV] Add stack clash vector support (#119458)
Use the probe loop structure to allocate vector code in the stack as well. We add the pseudo instruction RISCV::PROBED_STACKALLOC_RVV to differentiate from the normal loop.
1 parent 681c83a commit 6f53886

File tree

6 files changed

+644
-23
lines changed

6 files changed

+644
-23
lines changed

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 121 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,54 @@ getPushOrLibCallsSavedInfo(const MachineFunction &MF,
499499
return PushOrLibCallsCSI;
500500
}
501501

502+
void RISCVFrameLowering::allocateAndProbeStackForRVV(
503+
MachineFunction &MF, MachineBasicBlock &MBB,
504+
MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount,
505+
MachineInstr::MIFlag Flag, bool EmitCFI) const {
506+
assert(Amount != 0 && "Did not need to adjust stack pointer for RVV.");
507+
508+
// Emit a variable-length allocation probing loop.
509+
510+
// Get VLEN in TargetReg
511+
const RISCVInstrInfo *TII = STI.getInstrInfo();
512+
Register TargetReg = RISCV::X6;
513+
uint32_t NumOfVReg = Amount / (RISCV::RVVBitsPerBlock / 8);
514+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoReadVLENB), TargetReg)
515+
.setMIFlag(Flag);
516+
TII->mulImm(MF, MBB, MBBI, DL, TargetReg, NumOfVReg, Flag);
517+
518+
if (EmitCFI) {
519+
// Set the CFA register to TargetReg.
520+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
521+
unsigned CFIIndex =
522+
MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, -Amount));
523+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
524+
.addCFIIndex(CFIIndex)
525+
.setMIFlags(MachineInstr::FrameSetup);
526+
}
527+
528+
// It will be expanded to a probe loop in `inlineStackProbe`.
529+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC_RVV))
530+
.addReg(SPReg)
531+
.addReg(TargetReg);
532+
533+
if (EmitCFI) {
534+
// Set the CFA register back to SP.
535+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
536+
unsigned CFIIndex =
537+
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
538+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
539+
.addCFIIndex(CFIIndex)
540+
.setMIFlags(MachineInstr::FrameSetup);
541+
}
542+
543+
// SUB SP, SP, T1
544+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SUB), SPReg)
545+
.addReg(SPReg)
546+
.addReg(TargetReg)
547+
.setMIFlag(Flag);
548+
}
549+
502550
static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
503551
SmallVectorImpl<char> &Expr,
504552
int FixedOffset, int ScalableOffset,
@@ -857,10 +905,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
857905
.setMIFlag(MachineInstr::FrameSetup);
858906
}
859907

908+
uint64_t SecondSPAdjustAmount = 0;
860909
// Emit the second SP adjustment after saving callee saved registers.
861910
if (FirstSPAdjustAmount) {
862-
uint64_t SecondSPAdjustAmount =
863-
getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
911+
SecondSPAdjustAmount = getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
864912
assert(SecondSPAdjustAmount > 0 &&
865913
"SecondSPAdjustAmount should be greater than zero");
866914

@@ -870,11 +918,16 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
870918
}
871919

872920
if (RVVStackSize) {
873-
// We must keep the stack pointer aligned through any intermediate
874-
// updates.
875-
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
876-
StackOffset::getScalable(-RVVStackSize),
877-
MachineInstr::FrameSetup, getStackAlign());
921+
if (NeedProbe) {
922+
allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
923+
MachineInstr::FrameSetup, !hasFP(MF));
924+
} else {
925+
// We must keep the stack pointer aligned through any intermediate
926+
// updates.
927+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
928+
StackOffset::getScalable(-RVVStackSize),
929+
MachineInstr::FrameSetup, getStackAlign());
930+
}
878931

879932
if (!hasFP(MF)) {
880933
// Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
@@ -914,6 +967,19 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
914967
.addImm(ShiftAmount)
915968
.setMIFlag(MachineInstr::FrameSetup);
916969
}
970+
if (NeedProbe && RVVStackSize == 0) {
971+
// Do a probe if the align + size allocated just passed the probe size
972+
// and was not yet probed.
973+
if (SecondSPAdjustAmount < ProbeSize &&
974+
SecondSPAdjustAmount + MaxAlignment.value() >= ProbeSize) {
975+
bool IsRV64 = STI.is64Bit();
976+
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
977+
.addReg(RISCV::X0)
978+
.addReg(SPReg)
979+
.addImm(0)
980+
.setMIFlags(MachineInstr::FrameSetup);
981+
}
982+
}
917983
// FP will be used to restore the frame in the epilogue, so we need
918984
// another base register BP to record SP after re-alignment. SP will
919985
// track the current stack after allocating variable sized objects.
@@ -2019,8 +2085,9 @@ TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
20192085

20202086
// Synthesize the probe loop.
20212087
static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
2022-
MachineBasicBlock::iterator MBBI,
2023-
DebugLoc DL) {
2088+
MachineBasicBlock::iterator MBBI, DebugLoc DL,
2089+
Register TargetReg, bool IsRVV) {
2090+
assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
20242091

20252092
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
20262093
const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
@@ -2036,7 +2103,6 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
20362103
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
20372104
MF.insert(MBBInsertPoint, ExitMBB);
20382105
MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2039-
Register TargetReg = RISCV::X6;
20402106
Register ScratchReg = RISCV::X7;
20412107

20422108
// ScratchReg = ProbeSize
@@ -2057,12 +2123,29 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
20572123
.addImm(0)
20582124
.setMIFlags(Flags);
20592125

2060-
// BNE SP, TargetReg, LoopTest
2061-
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
2062-
.addReg(SPReg)
2063-
.addReg(TargetReg)
2064-
.addMBB(LoopTestMBB)
2065-
.setMIFlags(Flags);
2126+
if (IsRVV) {
2127+
// SUB TargetReg, TargetReg, ProbeSize
2128+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB),
2129+
TargetReg)
2130+
.addReg(TargetReg)
2131+
.addReg(ScratchReg)
2132+
.setMIFlags(Flags);
2133+
2134+
// BGE TargetReg, ProbeSize, LoopTest
2135+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BGE))
2136+
.addReg(TargetReg)
2137+
.addReg(ScratchReg)
2138+
.addMBB(LoopTestMBB)
2139+
.setMIFlags(Flags);
2140+
2141+
} else {
2142+
// BNE SP, TargetReg, LoopTest
2143+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
2144+
.addReg(SPReg)
2145+
.addReg(TargetReg)
2146+
.addMBB(LoopTestMBB)
2147+
.setMIFlags(Flags);
2148+
}
20662149

20672150
ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
20682151

@@ -2075,12 +2158,27 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
20752158

20762159
void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
20772160
MachineBasicBlock &MBB) const {
2078-
auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
2079-
return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
2080-
});
2081-
if (Where != MBB.end()) {
2082-
DebugLoc DL = MBB.findDebugLoc(Where);
2083-
emitStackProbeInline(MF, MBB, Where, DL);
2084-
Where->eraseFromParent();
2161+
// Get the instructions that need to be replaced. We emit at most two of
2162+
// these. Remember them in order to avoid complications coming from the need
2163+
// to traverse the block while potentially creating more blocks.
2164+
SmallVector<MachineInstr *, 4> ToReplace;
2165+
for (MachineInstr &MI : MBB) {
2166+
unsigned Opc = MI.getOpcode();
2167+
if (Opc == RISCV::PROBED_STACKALLOC ||
2168+
Opc == RISCV::PROBED_STACKALLOC_RVV) {
2169+
ToReplace.push_back(&MI);
2170+
}
2171+
}
2172+
2173+
for (MachineInstr *MI : ToReplace) {
2174+
if (MI->getOpcode() == RISCV::PROBED_STACKALLOC ||
2175+
MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV) {
2176+
MachineBasicBlock::iterator MBBI = MI->getIterator();
2177+
DebugLoc DL = MBB.findDebugLoc(MBBI);
2178+
Register TargetReg = MI->getOperand(1).getReg();
2179+
emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg,
2180+
(MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
2181+
MBBI->eraseFromParent();
2182+
}
20852183
}
20862184
}

llvm/lib/Target/RISCV/RISCVFrameLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,11 @@ class RISCVFrameLowering : public TargetFrameLowering {
107107
// Replace a StackProbe stub (if any) with the actual probe code inline
108108
void inlineStackProbe(MachineFunction &MF,
109109
MachineBasicBlock &PrologueMBB) const override;
110+
void allocateAndProbeStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB,
111+
MachineBasicBlock::iterator MBBI,
112+
const DebugLoc &DL, int64_t Amount,
113+
MachineInstr::MIFlag Flag,
114+
bool EmitCFI) const;
110115
};
111116
} // namespace llvm
112117
#endif

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,6 +1424,10 @@ def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
14241424
(ins GPR:$scratch),
14251425
[]>,
14261426
Sched<[]>;
1427+
def PROBED_STACKALLOC_RVV : Pseudo<(outs GPR:$sp),
1428+
(ins GPR:$scratch),
1429+
[]>,
1430+
Sched<[]>;
14271431
}
14281432

14291433
/// HI and ADD_LO address nodes.

llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,49 @@ define <vscale x 1 x i64> @access_fixed_and_vector_objects(ptr %val) {
6464

6565
ret <vscale x 1 x i64> %a
6666
}
67+
68+
define <vscale x 1 x i64> @probe_fixed_and_vector_objects(ptr %val, <vscale x 1 x i64> %dummy) "probe-stack"="inline-asm" {
69+
; RV64IV-LABEL: probe_fixed_and_vector_objects:
70+
; RV64IV: # %bb.0:
71+
; RV64IV-NEXT: addi sp, sp, -528
72+
; RV64IV-NEXT: .cfi_def_cfa_offset 528
73+
; RV64IV-NEXT: csrr t1, vlenb
74+
; RV64IV-NEXT: .cfi_def_cfa t1, -8
75+
; RV64IV-NEXT: lui t2, 1
76+
; RV64IV-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
77+
; RV64IV-NEXT: sub sp, sp, t2
78+
; RV64IV-NEXT: sd zero, 0(sp)
79+
; RV64IV-NEXT: sub t1, t1, t2
80+
; RV64IV-NEXT: bge t1, t2, .LBB2_1
81+
; RV64IV-NEXT: # %bb.2:
82+
; RV64IV-NEXT: .cfi_def_cfa_register sp
83+
; RV64IV-NEXT: sub sp, sp, t1
84+
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 1 * vlenb
85+
; RV64IV-NEXT: addi a0, sp, 8
86+
; RV64IV-NEXT: vl1re64.v v9, (a0)
87+
; RV64IV-NEXT: addi a0, sp, 528
88+
; RV64IV-NEXT: vl1re64.v v10, (a0)
89+
; RV64IV-NEXT: ld a0, 520(sp)
90+
; RV64IV-NEXT: vsetvli zero, a0, e64, m1, tu, ma
91+
; RV64IV-NEXT: vadd.vv v8, v9, v10
92+
; RV64IV-NEXT: csrr a0, vlenb
93+
; RV64IV-NEXT: add sp, sp, a0
94+
; RV64IV-NEXT: .cfi_def_cfa sp, 528
95+
; RV64IV-NEXT: addi sp, sp, 528
96+
; RV64IV-NEXT: .cfi_def_cfa_offset 0
97+
; RV64IV-NEXT: ret
98+
%local = alloca i64
99+
%vector = alloca <vscale x 1 x i64>
100+
%array = alloca [64 x i64]
101+
%v1 = load <vscale x 1 x i64>, ptr %array
102+
%v2 = load <vscale x 1 x i64>, ptr %vector
103+
%len = load i64, ptr %local
104+
105+
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(
106+
<vscale x 1 x i64> %dummy,
107+
<vscale x 1 x i64> %v1,
108+
<vscale x 1 x i64> %v2,
109+
i64 %len)
110+
111+
ret <vscale x 1 x i64> %a
112+
}

0 commit comments

Comments
 (0)