[RISCV] Add stack clash vector support #119458

rzinsly · 2024-12-10T21:14:35Z

Use the probe loop structure to allocate vector code in the stack as well. We add the pseudo instruction RISCV::PROBED_STACKALLOC_RVV to differentiate from the normal loop.

llvmbot · 2024-12-10T21:15:14Z

@llvm/pr-subscribers-backend-risc-v

Author: Raphael Moreira Zinsly (rzinsly)

Changes

Use the probe loop structure to allocate vector code in the stack as well. We add the pseudo instruction RISCV::PROBED_STACKALLOC_RVV to differentiate from the normal loop.

Patch is 27.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119458.diff

5 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (+125-24)
(modified) llvm/lib/Target/RISCV/RISCVFrameLowering.h (+11)
(modified) llvm/lib/Target/RISCV/RISCVInstrInfo.td (+4)
(modified) llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll (+46)
(added) llvm/test/CodeGen/RISCV/rvv/stack-probing-rvv.ll (+400)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 1028149bf513f4..9b149a466658bb 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -499,6 +499,54 @@ getPushOrLibCallsSavedInfo(const MachineFunction &MF,
   return PushOrLibCallsCSI;
 }
 
+void RISCVFrameLowering::allocateAndProbeStackForRVV(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount,
+    MachineInstr::MIFlag Flag, bool EmitCFI) const {
+  assert(Amount != 0 && "Did not need to adjust stack pointer for RVV.");
+
+  // Emit a variable-length allocation probing loop.
+
+  // Get VLEN in TargetReg
+  const RISCVInstrInfo *TII = STI.getInstrInfo();
+  Register TargetReg = RISCV::X6;
+  uint32_t NumOfVReg = Amount / 8;
+  BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoReadVLENB), TargetReg)
+      .setMIFlag(Flag);
+  TII->mulImm(MF, MBB, MBBI, DL, TargetReg, NumOfVReg, Flag);
+
+  if (EmitCFI) {
+    // Set the CFA register to TargetReg.
+    unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
+    unsigned CFIIndex =
+        MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, -Amount));
+    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex)
+        .setMIFlags(MachineInstr::FrameSetup);
+  }
+
+  // It will be expanded to a probe loop in `inlineStackProbe`.
+  BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC_RVV))
+      .addReg(SPReg)
+      .addReg(TargetReg);
+
+  if (EmitCFI) {
+    // Set the CFA register back to SP.
+    unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
+    unsigned CFIIndex =
+        MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex)
+        .setMIFlags(MachineInstr::FrameSetup);
+  }
+
+  // SUB SP, SP, T1
+  BuildMI(MBB, MBBI, DL, TII->get(RISCV::SUB), SPReg)
+      .addReg(SPReg)
+      .addReg(TargetReg)
+      .setMIFlag(Flag);
+}
+
 static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
                                            SmallVectorImpl<char> &Expr,
                                            int FixedOffset, int ScalableOffset,
@@ -857,10 +905,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
         .setMIFlag(MachineInstr::FrameSetup);
   }
 
+  uint64_t SecondSPAdjustAmount = 0;
   // Emit the second SP adjustment after saving callee saved registers.
   if (FirstSPAdjustAmount) {
-    uint64_t SecondSPAdjustAmount =
-        getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
+    SecondSPAdjustAmount = getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
     assert(SecondSPAdjustAmount > 0 &&
            "SecondSPAdjustAmount should be greater than zero");
 
@@ -870,11 +918,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   if (RVVStackSize) {
-    // We must keep the stack pointer aligned through any intermediate
-    // updates.
-    RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
-                  StackOffset::getScalable(-RVVStackSize),
-                  MachineInstr::FrameSetup, getStackAlign());
+    if (NeedProbe)
+      allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
+                                  MachineInstr::FrameSetup, !hasFP(MF));
+    else
+      // We must keep the stack pointer aligned through any intermediate
+      // updates.
+      RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+                    StackOffset::getScalable(-RVVStackSize),
+                    MachineInstr::FrameSetup, getStackAlign());
 
     if (!hasFP(MF)) {
       // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
@@ -914,6 +966,20 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
             .addImm(ShiftAmount)
             .setMIFlag(MachineInstr::FrameSetup);
       }
+      if (NeedProbe) {
+        // Do a probe if the align + size allocated just passed the probe size
+        // and was not yet probed.
+        uint64_t AllocatedStack = SecondSPAdjustAmount + RVVStackSize;
+        if (AllocatedStack < ProbeSize &&
+            AllocatedStack + MaxAlignment.value() >= ProbeSize) {
+          bool IsRV64 = STI.is64Bit();
+          BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+              .addReg(RISCV::X0)
+              .addReg(SPReg)
+              .addImm(0)
+              .setMIFlags(MachineInstr::FrameSetup);
+        }
+      }
       // FP will be used to restore the frame in the epilogue, so we need
       // another base register BP to record SP after re-alignment. SP will
       // track the current stack after allocating variable sized objects.
@@ -2016,9 +2082,11 @@ TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
 }
 
 // Synthesize the probe loop.
-static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI,
-                                 DebugLoc DL) {
+MachineBasicBlock *RISCVFrameLowering::emitStackProbeInline(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI, DebugLoc DL, Register TargetReg,
+    bool IsRVV) const {
+  assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
 
   auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
   const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
@@ -2034,7 +2102,6 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
   MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
   MF.insert(MBBInsertPoint, ExitMBB);
   MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
-  Register TargetReg = RISCV::X6;
   Register ScratchReg = RISCV::X7;
 
   // ScratchReg = ProbeSize
@@ -2055,12 +2122,29 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
       .addImm(0)
       .setMIFlags(Flags);
 
-  //   BNE SP, TargetReg, LoopTest
-  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
-      .addReg(SPReg)
-      .addReg(TargetReg)
-      .addMBB(LoopTestMBB)
-      .setMIFlags(Flags);
+  if (IsRVV) {
+    //  SUB TargetReg, TargetReg, ProbeSize
+    BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB),
+            TargetReg)
+        .addReg(TargetReg)
+        .addReg(ScratchReg)
+        .setMIFlags(Flags);
+
+    //  BGE TargetReg, ProbeSize, LoopTest
+    BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BGE))
+        .addReg(TargetReg)
+        .addReg(ScratchReg)
+        .addMBB(LoopTestMBB)
+        .setMIFlags(Flags);
+
+  } else {
+    //  BNE SP, TargetReg, LoopTest
+    BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
+        .addReg(SPReg)
+        .addReg(TargetReg)
+        .addMBB(LoopTestMBB)
+        .setMIFlags(Flags);
+  }
 
   ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
 
@@ -2069,16 +2153,33 @@ static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.addSuccessor(LoopTestMBB);
   // Update liveins.
   fullyRecomputeLiveIns({ExitMBB, LoopTestMBB});
+
+  return ExitMBB;
 }
 
 void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
                                           MachineBasicBlock &MBB) const {
-  auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
-    return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
-  });
-  if (Where != MBB.end()) {
-    DebugLoc DL = MBB.findDebugLoc(Where);
-    emitStackProbeInline(MF, MBB, Where, DL);
-    Where->eraseFromParent();
+  // Get the instructions that need to be replaced. We emit at most two of
+  // these. Remember them in order to avoid complications coming from the need
+  // to traverse the block while potentially creating more blocks.
+  SmallVector<MachineInstr *, 4> ToReplace;
+  for (MachineInstr &MI : MBB) {
+    int Opc = MI.getOpcode();
+    if (Opc == RISCV::PROBED_STACKALLOC ||
+        Opc == RISCV::PROBED_STACKALLOC_RVV) {
+      ToReplace.push_back(&MI);
+    }
+  }
+
+  for (MachineInstr *MI : ToReplace) {
+    if (MI->getOpcode() == RISCV::PROBED_STACKALLOC ||
+        MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV) {
+      MachineBasicBlock::iterator MBBI = MI->getIterator();
+      DebugLoc DL = MBB.findDebugLoc(MBBI);
+      Register TargetReg = MI->getOperand(1).getReg();
+      emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg,
+                           (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
+      MBBI->eraseFromParent();
+    }
   }
 }
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 190c063d9d3b5d..1a2c6e0302623d 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -83,6 +83,12 @@ class RISCVFrameLowering : public TargetFrameLowering {
                      uint64_t RealStackSize, bool EmitCFI, bool NeedProbe,
                      uint64_t ProbeSize) const;
 
+  MachineBasicBlock *emitStackProbeInline(MachineFunction &MF,
+                                          MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI,
+                                          DebugLoc DL, Register TargetReg,
+                                          bool IsRVV) const;
+
 protected:
   const RISCVSubtarget &STI;
 
@@ -107,6 +113,11 @@ class RISCVFrameLowering : public TargetFrameLowering {
   // Replace a StackProbe stub (if any) with the actual probe code inline
   void inlineStackProbe(MachineFunction &MF,
                         MachineBasicBlock &PrologueMBB) const override;
+  void allocateAndProbeStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   const DebugLoc &DL, int64_t Amount,
+                                   MachineInstr::MIFlag Flag,
+                                   bool EmitCFI) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 14b571cebe1fec..d77e416a970b2f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1382,6 +1382,10 @@ def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
                                (ins GPR:$scratch),
                                []>,
                                Sched<[]>;
+def PROBED_STACKALLOC_RVV : Pseudo<(outs GPR:$sp),
+                               (ins GPR:$scratch),
+                               []>,
+                               Sched<[]>;
 }
 
 /// HI and ADD_LO address nodes.
diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
index c6a3649c9ba8fe..0772181f7488df 100644
--- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
@@ -64,3 +64,49 @@ define <vscale x 1 x i64> @access_fixed_and_vector_objects(ptr %val) {
 
   ret <vscale x 1 x i64> %a
 }
+
+define <vscale x 1 x i64> @probe_fixed_and_vector_objects(ptr %val) "probe-stack"="inline-asm" {
+; RV64IV-LABEL: probe_fixed_and_vector_objects:
+; RV64IV:       # %bb.0:
+; RV64IV-NEXT:    addi sp, sp, -528
+; RV64IV-NEXT:    .cfi_def_cfa_offset 528
+; RV64IV-NEXT:    csrr t1, vlenb
+; RV64IV-NEXT:    .cfi_def_cfa t1, -8
+; RV64IV-NEXT:    lui t2, 1
+; RV64IV-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV64IV-NEXT:    sub sp, sp, t2
+; RV64IV-NEXT:    sd zero, 0(sp)
+; RV64IV-NEXT:    sub t1, t1, t2
+; RV64IV-NEXT:    bge t1, t2, .LBB2_1
+; RV64IV-NEXT:  # %bb.2:
+; RV64IV-NEXT:    .cfi_def_cfa_register sp
+; RV64IV-NEXT:    sub sp, sp, t1
+; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 1 * vlenb
+; RV64IV-NEXT:    addi a0, sp, 8
+; RV64IV-NEXT:    vl1re64.v v8, (a0)
+; RV64IV-NEXT:    addi a0, sp, 528
+; RV64IV-NEXT:    vl1re64.v v9, (a0)
+; RV64IV-NEXT:    ld a0, 520(sp)
+; RV64IV-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64IV-NEXT:    vadd.vv v8, v8, v9
+; RV64IV-NEXT:    csrr a0, vlenb
+; RV64IV-NEXT:    add sp, sp, a0
+; RV64IV-NEXT:    .cfi_def_cfa sp, 528
+; RV64IV-NEXT:    addi sp, sp, 528
+; RV64IV-NEXT:    .cfi_def_cfa_offset 0
+; RV64IV-NEXT:    ret
+  %local = alloca i64
+  %vector = alloca <vscale x 1 x i64>
+  %array = alloca [64 x i64]
+  %v1 = load <vscale x 1 x i64>, ptr %array
+  %v2 = load <vscale x 1 x i64>, ptr %vector
+  %len = load i64, ptr %local
+
+  %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(
+    <vscale x 1 x i64> undef,
+    <vscale x 1 x i64> %v1,
+    <vscale x 1 x i64> %v2,
+    i64 %len)
+
+  ret <vscale x 1 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-rvv.ll
new file mode 100644
index 00000000000000..d7f9ae73eaea54
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-rvv.ll
@@ -0,0 +1,400 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IV
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IV
+
+; Tests adapted from AArch64.
+
+; Test prolog sequences for stack probing when vector is involved.
+
+; The space for vector objects needs probing in the general case, because
+; the stack adjustment may happen to be too big (i.e. greater than the
+; probe size).
+
+define void @f_vector(ptr %out) #0 {
+; RV64IV-LABEL: f_vector:
+; RV64IV:       # %bb.0: # %entry
+; RV64IV-NEXT:    csrr t1, vlenb
+; RV64IV-NEXT:    slli t1, t1, 1
+; RV64IV-NEXT:    .cfi_def_cfa t1, -16
+; RV64IV-NEXT:    lui t2, 1
+; RV64IV-NEXT:  .LBB0_1: # %entry
+; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64IV-NEXT:    sub sp, sp, t2
+; RV64IV-NEXT:    sd zero, 0(sp)
+; RV64IV-NEXT:    sub t1, t1, t2
+; RV64IV-NEXT:    bge t1, t2, .LBB0_1
+; RV64IV-NEXT:  # %bb.2: # %entry
+; RV64IV-NEXT:    .cfi_def_cfa_register sp
+; RV64IV-NEXT:    sub sp, sp, t1
+; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb
+; RV64IV-NEXT:    csrr a0, vlenb
+; RV64IV-NEXT:    slli a0, a0, 1
+; RV64IV-NEXT:    add sp, sp, a0
+; RV64IV-NEXT:    .cfi_def_cfa sp, 0
+; RV64IV-NEXT:    ret
+;
+; RV32IV-LABEL: f_vector:
+; RV32IV:       # %bb.0: # %entry
+; RV32IV-NEXT:    csrr t1, vlenb
+; RV32IV-NEXT:    slli t1, t1, 1
+; RV32IV-NEXT:    .cfi_def_cfa t1, -16
+; RV32IV-NEXT:    lui t2, 1
+; RV32IV-NEXT:  .LBB0_1: # %entry
+; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IV-NEXT:    sub sp, sp, t2
+; RV32IV-NEXT:    sw zero, 0(sp)
+; RV32IV-NEXT:    sub t1, t1, t2
+; RV32IV-NEXT:    bge t1, t2, .LBB0_1
+; RV32IV-NEXT:  # %bb.2: # %entry
+; RV32IV-NEXT:    .cfi_def_cfa_register sp
+; RV32IV-NEXT:    sub sp, sp, t1
+; RV32IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb
+; RV32IV-NEXT:    csrr a0, vlenb
+; RV32IV-NEXT:    slli a0, a0, 1
+; RV32IV-NEXT:    add sp, sp, a0
+; RV32IV-NEXT:    .cfi_def_cfa sp, 0
+; RV32IV-NEXT:    ret
+entry:
+  %vec = alloca <vscale x 4 x float>, align 16
+  ret void
+}
+
+; As above, but with 4 vectors of stack space.
+define void @f4_vector(ptr %out) #0 {
+; RV64IV-LABEL: f4_vector:
+; RV64IV:       # %bb.0: # %entry
+; RV64IV-NEXT:    csrr t1, vlenb
+; RV64IV-NEXT:    slli t1, t1, 3
+; RV64IV-NEXT:    .cfi_def_cfa t1, -64
+; RV64IV-NEXT:    lui t2, 1
+; RV64IV-NEXT:  .LBB1_1: # %entry
+; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64IV-NEXT:    sub sp, sp, t2
+; RV64IV-NEXT:    sd zero, 0(sp)
+; RV64IV-NEXT:    sub t1, t1, t2
+; RV64IV-NEXT:    bge t1, t2, .LBB1_1
+; RV64IV-NEXT:  # %bb.2: # %entry
+; RV64IV-NEXT:    .cfi_def_cfa_register sp
+; RV64IV-NEXT:    sub sp, sp, t1
+; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb
+; RV64IV-NEXT:    csrr a0, vlenb
+; RV64IV-NEXT:    slli a0, a0, 3
+; RV64IV-NEXT:    add sp, sp, a0
+; RV64IV-NEXT:    .cfi_def_cfa sp, 0
+; RV64IV-NEXT:    ret
+;
+; RV32IV-LABEL: f4_vector:
+; RV32IV:       # %bb.0: # %entry
+; RV32IV-NEXT:    csrr t1, vlenb
+; RV32IV-NEXT:    slli t1, t1, 3
+; RV32IV-NEXT:    .cfi_def_cfa t1, -64
+; RV32IV-NEXT:    lui t2, 1
+; RV32IV-NEXT:  .LBB1_1: # %entry
+; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IV-NEXT:    sub sp, sp, t2
+; RV32IV-NEXT:    sw zero, 0(sp)
+; RV32IV-NEXT:    sub t1, t1, t2
+; RV32IV-NEXT:    bge t1, t2, .LBB1_1
+; RV32IV-NEXT:  # %bb.2: # %entry
+; RV32IV-NEXT:    .cfi_def_cfa_register sp
+; RV32IV-NEXT:    sub sp, sp, t1
+; RV32IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb
+; RV32IV-NEXT:    csrr a0, vlenb
+; RV32IV-NEXT:    slli a0, a0, 3
+; RV32IV-NEXT:    add sp, sp, a0
+; RV32IV-NEXT:    .cfi_def_cfa sp, 0
+; RV32IV-NEXT:    ret
+entry:
+  %vec1 = alloca <vscale x 4 x float>, align 16
+  %vec2 = alloca <vscale x 4 x float>, align 16
+  %vec3 = alloca <vscale x 4 x float>, align 16
+  %vec4 = alloca <vscale x 4 x float>, align 16
+  ret void
+}
+
+; As above, but with 16 vectors of stack space.
+; The stack adjustment is less than or equal to 16 x 256 = 4096, so
+; we can allocate the locals at once.
+define void @f16_vector(ptr %out) #0 {
+; RV64IV-LABEL: f16_vector:
+; RV64IV:       # %bb.0: # %entry
+; RV64IV-NEXT:    csrr t1, vlenb
+; RV64IV-NEXT:    slli t1, t1, 5
+; RV64IV-NEXT:    .cfi_def_cfa t1, -256
+; RV64IV-NEXT:    lui t2, 1
+; RV64IV-NEXT:  .LBB2_1: # %entry
+; RV64IV-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64IV-NEXT:    sub sp, sp, t2
+; RV64IV-NEXT:    sd zero, 0(sp)
+; RV64IV-NEXT:    sub t1, t1, t2
+; RV64IV-NEXT:    bge t1, t2, .LBB2_1
+; RV64IV-NEXT:  # %bb.2: # %entry
+; RV64IV-NEXT:    .cfi_def_cfa_register sp
+; RV64IV-NEXT:    sub sp, sp, t1
+; RV64IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb
+; RV64IV-NEXT:    csrr a0, vlenb
+; RV64IV-NEXT:    slli a0, a0, 5
+; RV64IV-NEXT:    add sp, sp, a0
+; RV64IV-NEXT:    .cfi_def_cfa sp, 0
+; RV64IV-NEXT:    ret
+;
+; RV32IV-LABEL: f16_vector:
+; RV32IV:       # %bb.0: # %entry
+; RV32IV-NEXT:    csrr t1, vlenb
+; RV32IV-NEXT:    slli t1, t1, 5
+; RV32IV-NEXT:    .cfi_def_cfa t1, -256
+; RV32IV-NEXT:    lui t2, 1
+; RV32IV-NEXT:  .LBB2_1: # %entry
+; RV32IV-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IV-NEXT:    sub sp, sp, t2
+; RV32IV-NEXT:    sw zero, 0(sp)
+; RV32IV-NEXT:    sub t1, t1, t2
+; RV32IV-NEXT:    bge t1, t2, .LBB2_1
+; RV32IV-NEXT:  # %bb.2: # %entry
+; RV32IV-NEXT:    .cfi_def_cfa_register sp
+; RV32IV-NEXT:    sub sp, sp, t1
+; RV32IV-NEXT:    .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb
+; RV32IV-NEXT:    csrr a0, vlenb
+; RV32IV-NEXT:    slli a0, a0, 5
+; RV32IV-NEXT:    add sp, sp, a0
+; RV32IV-NEXT:    .cfi_def_cfa sp, 0
+; RV32IV-NEXT:    ret
+entry:
+  %vec1 = alloca <vscale x 4 x float>, align 16
+  %vec2 = alloca <vscale x 4 x float>, align 16
+  %vec3 = alloca <vscale x 4 x float>, align 16
+  %vec4 = alloca <vscale x 4 x float>, align 16
+  %vec5 = alloca <vscale x 4 x float>, align 16
+  %vec6 = alloca <vscale x 4 x float>, align 16
+  %vec7 = alloca <vscale x 4 x float>, align 16
+  %vec8 = alloca <vscale x 4 x float>, align 16
+  %vec9 = alloca <vscale x 4 x float>, align 16
+  %vec10 = alloca <vscale x 4 x float>, align 16
+  %vec11 = alloca <vscale x 4 x float>, align 16
+  %vec12 = alloca <vscale x 4 x float>, align 16
+  %vec13 = alloca <vscale x 4 x float>, align 16
+  %vec14 = alloca <vscale x 4 x float>, align 16
+  %vec15 = alloca <vscale x 4 x float>, align 16
+  %vec16 = alloca <vscale x 4 x float>, align 16
+  ret void
+}
+
+; As above, but with 17 vectors of stack space.
+define void @f17_vector(ptr %out) #0 {
+; RV64IV-LABEL: f17_vector:
+; RV64IV:       # %bb.0: # %entry
+; RV64IV-NEXT:    csrr t1, vlenb
+; RV64IV-NEXT:    li a0, 34
+; RV6...
[truncated]

rzinsly · 2024-12-16T15:46:14Z

ping

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

topperc · 2024-12-16T17:58:21Z

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

+      if (NeedProbe) {
+        // Do a probe if the align + size allocated just passed the probe size
+        // and was not yet probed.
+        uint64_t AllocatedStack = SecondSPAdjustAmount + RVVStackSize;


RVVStackSize is relative to vscale. Does this addition make sense? It undercounts the allocated size if VLEN is greater than 64.

We can't know how much of the stack rvv allocated, but just by adding RVVStackSize we would likely pass the probe size.
In the end I was trying to just capture when we had a vector allocation as it would already probed the stack after the second adjustment.
Your question shows that the code is misleading, so I changed it to test for RVVStackSize instead.

github-actions · 2024-12-17T14:31:16Z

✅ With the latest revision this PR passed the undef deprecator.

Use the probe loop structure to allocate vector code in the stack as well. We add the pseudo instruction RISCV::PROBED_STACKALLOC_RVV to differentiate from the normal loop.

topperc · 2024-12-17T23:17:55Z

Please don't force push unless you need to.

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

topperc · 2024-12-17T23:22:12Z

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

-static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI,
-                                 DebugLoc DL) {
+MachineBasicBlock *RISCVFrameLowering::emitStackProbeInline(


Why did the return type change? The caller doesn't use the returned MachineBasicBlock*

I mixed up the patches again, this will only be needed for the dynamic allocation probing.
Removed.

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

rzinsly · 2025-01-06T13:39:49Z

@topperc ping

topperc · 2025-01-06T18:58:35Z

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

+    if (NeedProbe)
+      allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, RVVStackSize,
+                                  MachineInstr::FrameSetup, !hasFP(MF));
+    else


Add curly braces around this since the comment makes it multiple lines for the reader. See https://llvm.org/docs/CodingStandards.html#don-t-use-braces-on-simple-single-statement-bodies-of-if-else-loop-statements

You'll also need to add curly braces around if body for consistency.

topperc

LGTM

rzinsly · 2025-01-10T13:27:34Z

@topperc Thank you! Can you merge this for me?
There is one last patch in this series.

Use the probe loop structure to allocate vector code in the stack as well. We add the pseudo instruction RISCV::PROBED_STACKALLOC_RVV to differentiate from the normal loop.

llvmbot added the backend:RISC-V label Dec 10, 2024

topperc reviewed Dec 16, 2024

View reviewed changes

rzinsly force-pushed the stack_clash_vector branch from b9de4a4 to 6baead9 Compare December 17, 2024 14:27

[RISCV] Add stack clash vector support

68e37b2

Use the probe loop structure to allocate vector code in the stack as well. We add the pseudo instruction RISCV::PROBED_STACKALLOC_RVV to differentiate from the normal loop.

rzinsly force-pushed the stack_clash_vector branch from 6baead9 to 68e37b2 Compare December 17, 2024 15:40

topperc reviewed Dec 17, 2024

View reviewed changes

Add align test and fix types

5a54950

topperc reviewed Jan 6, 2025

View reviewed changes

Add braces as requested

4fb0ef0

topperc approved these changes Jan 9, 2025

View reviewed changes

topperc merged commit 6f53886 into llvm:main Jan 10, 2025
8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV] Add stack clash vector support #119458

[RISCV] Add stack clash vector support #119458

Uh oh!

rzinsly commented Dec 10, 2024

Uh oh!

llvmbot commented Dec 10, 2024

Uh oh!

rzinsly commented Dec 16, 2024

Uh oh!

Uh oh!

topperc Dec 16, 2024 •

edited

Loading

Uh oh!

rzinsly Dec 17, 2024

Uh oh!

github-actions bot commented Dec 17, 2024 •

edited

Loading

Uh oh!

topperc commented Dec 17, 2024

Uh oh!

Uh oh!

topperc Dec 17, 2024

Uh oh!

rzinsly Dec 18, 2024

Uh oh!

Uh oh!

rzinsly commented Jan 6, 2025

Uh oh!

topperc Jan 6, 2025

Uh oh!

rzinsly Jan 7, 2025

Uh oh!

topperc left a comment

Uh oh!

rzinsly commented Jan 10, 2025

Uh oh!

Uh oh!

Uh oh!

[RISCV] Add stack clash vector support #119458

[RISCV] Add stack clash vector support #119458

Uh oh!

Conversation

rzinsly commented Dec 10, 2024

Uh oh!

llvmbot commented Dec 10, 2024

Uh oh!

rzinsly commented Dec 16, 2024

Uh oh!

Uh oh!

topperc Dec 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

rzinsly Dec 17, 2024

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Dec 17, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc commented Dec 17, 2024

Uh oh!

Uh oh!

topperc Dec 17, 2024

Choose a reason for hiding this comment

Uh oh!

rzinsly Dec 18, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

rzinsly commented Jan 6, 2025

Uh oh!

topperc Jan 6, 2025

Choose a reason for hiding this comment

Uh oh!

rzinsly Jan 7, 2025

Choose a reason for hiding this comment

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

rzinsly commented Jan 10, 2025

Uh oh!

Uh oh!

Uh oh!

topperc Dec 16, 2024 •

edited

Loading

github-actions bot commented Dec 17, 2024 •

edited

Loading