Skip to content

Commit 2278dd2

Browse files
Fix a failure to issue a probe when there can be more than 1024 unprobed bytes at top of stack
1 parent 45213a1 commit 2278dd2

File tree

2 files changed

+39
-10
lines changed

2 files changed

+39
-10
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -787,18 +787,17 @@ void AArch64FrameLowering::allocateStackSpace(
787787
.addReg(ScratchReg, RegState::Kill)
788788
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
789789
.setMIFlags(MachineInstr::FrameSetup);
790-
if (MFI.hasVarSizedObjects() ||
791-
upperBound(AllocSize) + RealignmentPadding >
792-
AArch64::StackProbeMaxUnprobedStack) {
793-
// STR XZR, [SP]
794-
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
795-
.addReg(AArch64::XZR)
796-
.addReg(AArch64::SP)
797-
.addImm(0)
798-
.setMIFlags(MachineInstr::FrameSetup);
799-
}
800790
AFI.setStackRealigned(true);
801791
}
792+
if (MFI.hasVarSizedObjects() || upperBound(AllocSize) + RealignmentPadding >
793+
AArch64::StackProbeMaxUnprobedStack) {
794+
// STR XZR, [SP]
795+
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
796+
.addReg(AArch64::XZR)
797+
.addReg(AArch64::SP)
798+
.addImm(0)
799+
.setMIFlags(MachineInstr::FrameSetup);
800+
}
802801
return;
803802
}
804803

llvm/test/CodeGen/AArch64/stack-probing-sve.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ define void @sve_16_vector(ptr %out) #0 {
6464
; CHECK-NEXT: .cfi_offset w29, -16
6565
; CHECK-NEXT: addvl sp, sp, #-16
6666
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 128 * VG
67+
; CHECK-NEXT: str xzr, [sp]
6768
; CHECK-NEXT: addvl sp, sp, #16
6869
; CHECK-NEXT: .cfi_def_cfa wsp, 16
6970
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -215,6 +216,7 @@ define void @sve_16v_csr(<vscale x 4 x float> %a) #0 {
215216
; CHECK-NEXT: .cfi_offset w29, -16
216217
; CHECK-NEXT: addvl sp, sp, #-16
217218
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 128 * VG
219+
; CHECK-NEXT: str xzr, [sp]
218220
; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill
219221
; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
220222
; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
@@ -549,6 +551,7 @@ define void @sve_1024_64k_guard(ptr %out) #0 "stack-probe-size"="65536" {
549551
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1792 * VG
550552
; CHECK-NEXT: addvl sp, sp, #-32
551553
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 2048 * VG
554+
; CHECK-NEXT: str xzr, [sp]
552555
; CHECK-NEXT: addvl sp, sp, #31
553556
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1800 * VG
554557
; CHECK-NEXT: addvl sp, sp, #31
@@ -641,4 +644,31 @@ entry:
641644
ret void
642645
}
643646

647+
; With 5 SVE vectors of stack space the unprobed area
648+
; at the top of the stack can exceed 1024 bytes (5 x 256 == 1280),
649+
; hence we need to issue a probe.
650+
define void @sve_5_vector(ptr %out) #0 {
651+
; CHECK-LABEL: sve_5_vector:
652+
; CHECK: // %bb.0: // %entry
653+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
654+
; CHECK-NEXT: .cfi_def_cfa_offset 16
655+
; CHECK-NEXT: .cfi_offset w29, -16
656+
; CHECK-NEXT: addvl sp, sp, #-5
657+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 40 * VG
658+
; CHECK-NEXT: str xzr, [sp]
659+
; CHECK-NEXT: addvl sp, sp, #5
660+
; CHECK-NEXT: .cfi_def_cfa wsp, 16
661+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
662+
; CHECK-NEXT: .cfi_def_cfa_offset 0
663+
; CHECK-NEXT: .cfi_restore w29
664+
; CHECK-NEXT: ret
665+
entry:
666+
%vec1 = alloca <vscale x 4 x float>, align 16
667+
%vec2 = alloca <vscale x 4 x float>, align 16
668+
%vec3 = alloca <vscale x 4 x float>, align 16
669+
%vec4 = alloca <vscale x 4 x float>, align 16
670+
%vec5 = alloca <vscale x 4 x float>, align 16
671+
ret void
672+
}
673+
644674
attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" "target-features"="+sve" }

0 commit comments

Comments
 (0)