@@ -64,6 +64,7 @@ define void @sve_16_vector(ptr %out) #0 {
64
64
; CHECK-NEXT: .cfi_offset w29, -16
65
65
; CHECK-NEXT: addvl sp, sp, #-16
66
66
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 128 * VG
67
+ ; CHECK-NEXT: str xzr, [sp]
67
68
; CHECK-NEXT: addvl sp, sp, #16
68
69
; CHECK-NEXT: .cfi_def_cfa wsp, 16
69
70
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -215,6 +216,7 @@ define void @sve_16v_csr(<vscale x 4 x float> %a) #0 {
215
216
; CHECK-NEXT: .cfi_offset w29, -16
216
217
; CHECK-NEXT: addvl sp, sp, #-16
217
218
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 128 * VG
219
+ ; CHECK-NEXT: str xzr, [sp]
218
220
; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill
219
221
; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
220
222
; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
@@ -549,6 +551,7 @@ define void @sve_1024_64k_guard(ptr %out) #0 "stack-probe-size"="65536" {
549
551
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1792 * VG
550
552
; CHECK-NEXT: addvl sp, sp, #-32
551
553
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x80, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 2048 * VG
554
+ ; CHECK-NEXT: str xzr, [sp]
552
555
; CHECK-NEXT: addvl sp, sp, #31
553
556
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x0e, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 1800 * VG
554
557
; CHECK-NEXT: addvl sp, sp, #31
@@ -641,4 +644,31 @@ entry:
641
644
ret void
642
645
}
643
646
647
+ ; With 5 SVE vectors of stack space the unprobed area
648
+ ; at the top of the stack can exceed 1024 bytes (5 x 256 == 1280),
649
+ ; hence we need to issue a probe.
650
+ define void @sve_5_vector (ptr %out ) #0 {
651
+ ; CHECK-LABEL: sve_5_vector:
652
+ ; CHECK: // %bb.0: // %entry
653
+ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
654
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
655
+ ; CHECK-NEXT: .cfi_offset w29, -16
656
+ ; CHECK-NEXT: addvl sp, sp, #-5
657
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 40 * VG
658
+ ; CHECK-NEXT: str xzr, [sp]
659
+ ; CHECK-NEXT: addvl sp, sp, #5
660
+ ; CHECK-NEXT: .cfi_def_cfa wsp, 16
661
+ ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
662
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
663
+ ; CHECK-NEXT: .cfi_restore w29
664
+ ; CHECK-NEXT: ret
665
+ entry:
666
+ %vec1 = alloca <vscale x 4 x float >, align 16
667
+ %vec2 = alloca <vscale x 4 x float >, align 16
668
+ %vec3 = alloca <vscale x 4 x float >, align 16
669
+ %vec4 = alloca <vscale x 4 x float >, align 16
670
+ %vec5 = alloca <vscale x 4 x float >, align 16
671
+ ret void
672
+ }
673
+
644
674
attributes #0 = { uwtable (async) "probe-stack" ="inline-asm" "frame-pointer" ="none" "target-features" ="+sve" }
0 commit comments