Skip to content

Commit 5248e1d

Browse files
authored
[AArch64] Fix frame-pointer offset with hazard padding (#118091)
The `-aarch64-stack-hazard-size=<val>` option disables register paring (as the hazard padding may mean the offset is too large for STP/LDP). This broke setting the frame-pointer offset, as the code to find the frame record looked for a (FP, LR) register pair. This patch resolves this by looking for FP, LR as two unpaired registers when hazard padding is enabled.
1 parent 7a7a426 commit 5248e1d

File tree

3 files changed

+265
-29
lines changed

3 files changed

+265
-29
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3167,11 +3167,24 @@ static void computeCalleeSaveRegisterPairs(
31673167
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
31683168
"Offset out of bounds for LDP/STP immediate");
31693169

3170+
auto isFrameRecord = [&] {
3171+
if (RPI.isPaired())
3172+
return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
3173+
: RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
3174+
// Otherwise, look for the frame record as two unpaired registers. This is
3175+
// needed for -aarch64-stack-hazard-size=<val>, which disables register
3176+
// pairing (as the padding may be too large for the LDP/STP offset). Note:
3177+
// On Windows, this check works out as current reg == FP, next reg == LR,
3178+
// and on other platforms current reg == FP, previous reg == LR. This
3179+
// works out as the correct pre-increment or post-increment offsets
3180+
// respectively.
3181+
return i > 0 && RPI.Reg1 == AArch64::FP &&
3182+
CSI[i - 1].getReg() == AArch64::LR;
3183+
};
3184+
31703185
// Save the offset to frame record so that the FP register can point to the
31713186
// innermost frame record (spilled FP and LR registers).
3172-
if (NeedsFrameRecord &&
3173-
((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
3174-
(IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR)))
3187+
if (NeedsFrameRecord && isFrameRecord())
31753188
AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
31763189

31773190
RegPairs.push_back(RPI);
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefixes=CHECK0
3+
; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=64 | FileCheck %s --check-prefixes=CHECK64
4+
; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK1024
5+
6+
define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
7+
; CHECK0-LABEL: fpr_csr_stackobj:
8+
; CHECK0: .seh_proc fpr_csr_stackobj
9+
; CHECK0-NEXT: // %bb.0: // %entry
10+
; CHECK0-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill
11+
; CHECK0-NEXT: .seh_save_reg_x x23, 48
12+
; CHECK0-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
13+
; CHECK0-NEXT: .seh_save_fplr 8
14+
; CHECK0-NEXT: stp d9, d10, [sp, #24] // 16-byte Folded Spill
15+
; CHECK0-NEXT: .seh_save_fregp d9, 24
16+
; CHECK0-NEXT: add x29, sp, #8
17+
; CHECK0-NEXT: .seh_add_fp 8
18+
; CHECK0-NEXT: .seh_endprologue
19+
; CHECK0-NEXT: mov w0, wzr
20+
; CHECK0-NEXT: //APP
21+
; CHECK0-NEXT: //NO_APP
22+
; CHECK0-NEXT: str d0, [x29, #32]
23+
; CHECK0-NEXT: .seh_startepilogue
24+
; CHECK0-NEXT: ldp d9, d10, [sp, #24] // 16-byte Folded Reload
25+
; CHECK0-NEXT: .seh_save_fregp d9, 24
26+
; CHECK0-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
27+
; CHECK0-NEXT: .seh_save_fplr 8
28+
; CHECK0-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload
29+
; CHECK0-NEXT: .seh_save_reg_x x23, 48
30+
; CHECK0-NEXT: .seh_endepilogue
31+
; CHECK0-NEXT: ret
32+
; CHECK0-NEXT: .seh_endfunclet
33+
; CHECK0-NEXT: .seh_endproc
34+
;
35+
; CHECK64-LABEL: fpr_csr_stackobj:
36+
; CHECK64: .seh_proc fpr_csr_stackobj
37+
; CHECK64-NEXT: // %bb.0: // %entry
38+
; CHECK64-NEXT: sub sp, sp, #192
39+
; CHECK64-NEXT: .seh_stackalloc 192
40+
; CHECK64-NEXT: str x23, [sp, #80] // 8-byte Folded Spill
41+
; CHECK64-NEXT: .seh_save_reg x23, 80
42+
; CHECK64-NEXT: str x29, [sp, #88] // 8-byte Folded Spill
43+
; CHECK64-NEXT: .seh_save_reg x29, 88
44+
; CHECK64-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
45+
; CHECK64-NEXT: .seh_save_reg x30, 96
46+
; CHECK64-NEXT: str d9, [sp, #168] // 8-byte Folded Spill
47+
; CHECK64-NEXT: .seh_save_freg d9, 168
48+
; CHECK64-NEXT: str d10, [sp, #176] // 8-byte Folded Spill
49+
; CHECK64-NEXT: .seh_save_freg d10, 176
50+
; CHECK64-NEXT: add x29, sp, #88
51+
; CHECK64-NEXT: .seh_add_fp 88
52+
; CHECK64-NEXT: .seh_endprologue
53+
; CHECK64-NEXT: mov w0, wzr
54+
; CHECK64-NEXT: //APP
55+
; CHECK64-NEXT: //NO_APP
56+
; CHECK64-NEXT: stur d0, [x29, #-16]
57+
; CHECK64-NEXT: .seh_startepilogue
58+
; CHECK64-NEXT: ldr d10, [sp, #176] // 8-byte Folded Reload
59+
; CHECK64-NEXT: .seh_save_freg d10, 176
60+
; CHECK64-NEXT: ldr d9, [sp, #168] // 8-byte Folded Reload
61+
; CHECK64-NEXT: .seh_save_freg d9, 168
62+
; CHECK64-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
63+
; CHECK64-NEXT: .seh_save_reg x30, 96
64+
; CHECK64-NEXT: ldr x29, [sp, #88] // 8-byte Folded Reload
65+
; CHECK64-NEXT: .seh_save_reg x29, 88
66+
; CHECK64-NEXT: ldr x23, [sp, #80] // 8-byte Folded Reload
67+
; CHECK64-NEXT: .seh_save_reg x23, 80
68+
; CHECK64-NEXT: add sp, sp, #192
69+
; CHECK64-NEXT: .seh_stackalloc 192
70+
; CHECK64-NEXT: .seh_endepilogue
71+
; CHECK64-NEXT: ret
72+
; CHECK64-NEXT: .seh_endfunclet
73+
; CHECK64-NEXT: .seh_endproc
74+
;
75+
; CHECK1024-LABEL: fpr_csr_stackobj:
76+
; CHECK1024: .seh_proc fpr_csr_stackobj
77+
; CHECK1024-NEXT: // %bb.0: // %entry
78+
; CHECK1024-NEXT: sub sp, sp, #1072
79+
; CHECK1024-NEXT: str x23, [sp] // 8-byte Folded Spill
80+
; CHECK1024-NEXT: str x29, [sp, #8] // 8-byte Folded Spill
81+
; CHECK1024-NEXT: .seh_save_reg x29, 8
82+
; CHECK1024-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
83+
; CHECK1024-NEXT: .seh_save_reg x30, 16
84+
; CHECK1024-NEXT: str d9, [sp, #1048] // 8-byte Folded Spill
85+
; CHECK1024-NEXT: .seh_save_freg d9, 1048
86+
; CHECK1024-NEXT: str d10, [sp, #1056] // 8-byte Folded Spill
87+
; CHECK1024-NEXT: .seh_save_freg d10, 1056
88+
; CHECK1024-NEXT: add x29, sp, #8
89+
; CHECK1024-NEXT: .seh_add_fp 8
90+
; CHECK1024-NEXT: .seh_endprologue
91+
; CHECK1024-NEXT: sub sp, sp, #1040
92+
; CHECK1024-NEXT: mov w0, wzr
93+
; CHECK1024-NEXT: //APP
94+
; CHECK1024-NEXT: //NO_APP
95+
; CHECK1024-NEXT: stur d0, [x29, #-16]
96+
; CHECK1024-NEXT: .seh_startepilogue
97+
; CHECK1024-NEXT: add sp, sp, #1040
98+
; CHECK1024-NEXT: .seh_stackalloc 1040
99+
; CHECK1024-NEXT: ldr d10, [sp, #1056] // 8-byte Folded Reload
100+
; CHECK1024-NEXT: .seh_save_freg d10, 1056
101+
; CHECK1024-NEXT: ldr d9, [sp, #1048] // 8-byte Folded Reload
102+
; CHECK1024-NEXT: .seh_save_freg d9, 1048
103+
; CHECK1024-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
104+
; CHECK1024-NEXT: .seh_save_reg x30, 16
105+
; CHECK1024-NEXT: ldr x29, [sp, #8] // 8-byte Folded Reload
106+
; CHECK1024-NEXT: .seh_save_reg x29, 8
107+
; CHECK1024-NEXT: ldr x23, [sp] // 8-byte Folded Reload
108+
; CHECK1024-NEXT: add sp, sp, #1072
109+
; CHECK1024-NEXT: .seh_endepilogue
110+
; CHECK1024-NEXT: ret
111+
; CHECK1024-NEXT: .seh_endfunclet
112+
; CHECK1024-NEXT: .seh_endproc
113+
entry:
114+
%a = alloca double
115+
tail call void asm sideeffect "", "~{x23},~{d9},~{d10}"()
116+
store double %x, ptr %a
117+
ret i32 0
118+
}

llvm/test/CodeGen/AArch64/stack-hazard.ll

Lines changed: 131 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -337,19 +337,18 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
337337
; CHECK64-LABEL: csr_d8_allocd_framepointer:
338338
; CHECK64: // %bb.0: // %entry
339339
; CHECK64-NEXT: sub sp, sp, #176
340-
; CHECK64-NEXT: str d8, [sp, #80] // 8-byte Folded Spill
340+
; CHECK64-NEXT: stp d0, d8, [sp, #72] // 8-byte Folded Spill
341341
; CHECK64-NEXT: stp x29, x30, [sp, #152] // 16-byte Folded Spill
342-
; CHECK64-NEXT: add x29, sp, #80
343-
; CHECK64-NEXT: .cfi_def_cfa w29, 96
342+
; CHECK64-NEXT: add x29, sp, #152
343+
; CHECK64-NEXT: .cfi_def_cfa w29, 24
344344
; CHECK64-NEXT: .cfi_offset w30, -16
345345
; CHECK64-NEXT: .cfi_offset w29, -24
346346
; CHECK64-NEXT: .cfi_offset b8, -96
347347
; CHECK64-NEXT: //APP
348348
; CHECK64-NEXT: //NO_APP
349-
; CHECK64-NEXT: stur d0, [x29, #-8]
350349
; CHECK64-NEXT: ldr x29, [sp, #152] // 8-byte Folded Reload
351-
; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
352350
; CHECK64-NEXT: mov w0, wzr
351+
; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
353352
; CHECK64-NEXT: add sp, sp, #176
354353
; CHECK64-NEXT: ret
355354
;
@@ -358,17 +357,17 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
358357
; CHECK1024-NEXT: sub sp, sp, #1056
359358
; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill
360359
; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill
361-
; CHECK1024-NEXT: mov x29, sp
360+
; CHECK1024-NEXT: add x29, sp, #1032
362361
; CHECK1024-NEXT: str x30, [sp, #1040] // 8-byte Folded Spill
363362
; CHECK1024-NEXT: sub sp, sp, #1040
364-
; CHECK1024-NEXT: .cfi_def_cfa w29, 1056
363+
; CHECK1024-NEXT: .cfi_def_cfa w29, 24
365364
; CHECK1024-NEXT: .cfi_offset w30, -16
366365
; CHECK1024-NEXT: .cfi_offset w29, -24
367366
; CHECK1024-NEXT: .cfi_offset b8, -1056
368367
; CHECK1024-NEXT: mov w0, wzr
369368
; CHECK1024-NEXT: //APP
370369
; CHECK1024-NEXT: //NO_APP
371-
; CHECK1024-NEXT: stur d0, [x29, #-8]
370+
; CHECK1024-NEXT: str d0, [sp, #1032]
372371
; CHECK1024-NEXT: add sp, sp, #1040
373372
; CHECK1024-NEXT: ldr x30, [sp, #1040] // 8-byte Folded Reload
374373
; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload
@@ -2893,8 +2892,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
28932892
; CHECK64-NEXT: stp x29, x30, [sp, #128] // 16-byte Folded Spill
28942893
; CHECK64-NEXT: stp x9, x20, [sp, #144] // 16-byte Folded Spill
28952894
; CHECK64-NEXT: str x19, [sp, #160] // 8-byte Folded Spill
2896-
; CHECK64-NEXT: mov x29, sp
2897-
; CHECK64-NEXT: .cfi_def_cfa w29, 176
2895+
; CHECK64-NEXT: add x29, sp, #128
2896+
; CHECK64-NEXT: .cfi_def_cfa w29, 48
28982897
; CHECK64-NEXT: .cfi_offset w19, -16
28992898
; CHECK64-NEXT: .cfi_offset w20, -24
29002899
; CHECK64-NEXT: .cfi_offset w30, -40
@@ -2913,11 +2912,11 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29132912
; CHECK64-NEXT: mov w20, w0
29142913
; CHECK64-NEXT: msub x9, x8, x8, x9
29152914
; CHECK64-NEXT: mov sp, x9
2916-
; CHECK64-NEXT: stur x9, [x29, #-80]
2917-
; CHECK64-NEXT: sub x9, x29, #80
2918-
; CHECK64-NEXT: sturh wzr, [x29, #-70]
2919-
; CHECK64-NEXT: stur wzr, [x29, #-68]
2920-
; CHECK64-NEXT: sturh w8, [x29, #-72]
2915+
; CHECK64-NEXT: stur x9, [x29, #-208]
2916+
; CHECK64-NEXT: sub x9, x29, #208
2917+
; CHECK64-NEXT: sturh wzr, [x29, #-198]
2918+
; CHECK64-NEXT: stur wzr, [x29, #-196]
2919+
; CHECK64-NEXT: sturh w8, [x29, #-200]
29212920
; CHECK64-NEXT: msr TPIDR2_EL0, x9
29222921
; CHECK64-NEXT: .cfi_offset vg, -32
29232922
; CHECK64-NEXT: smstop sm
@@ -2926,14 +2925,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29262925
; CHECK64-NEXT: .cfi_restore vg
29272926
; CHECK64-NEXT: smstart za
29282927
; CHECK64-NEXT: mrs x8, TPIDR2_EL0
2929-
; CHECK64-NEXT: sub x0, x29, #80
2928+
; CHECK64-NEXT: sub x0, x29, #208
29302929
; CHECK64-NEXT: cbnz x8, .LBB33_2
29312930
; CHECK64-NEXT: // %bb.1: // %entry
29322931
; CHECK64-NEXT: bl __arm_tpidr2_restore
29332932
; CHECK64-NEXT: .LBB33_2: // %entry
29342933
; CHECK64-NEXT: mov w0, w20
29352934
; CHECK64-NEXT: msr TPIDR2_EL0, xzr
2936-
; CHECK64-NEXT: mov sp, x29
2935+
; CHECK64-NEXT: sub sp, x29, #128
29372936
; CHECK64-NEXT: .cfi_def_cfa wsp, 176
29382937
; CHECK64-NEXT: ldp x20, x19, [sp, #152] // 16-byte Folded Reload
29392938
; CHECK64-NEXT: ldr d14, [sp, #8] // 8-byte Folded Reload
@@ -2972,8 +2971,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29722971
; CHECK1024-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
29732972
; CHECK1024-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill
29742973
; CHECK1024-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill
2975-
; CHECK1024-NEXT: mov x29, sp
2976-
; CHECK1024-NEXT: .cfi_def_cfa w29, 1136
2974+
; CHECK1024-NEXT: add x29, sp, #1088
2975+
; CHECK1024-NEXT: .cfi_def_cfa w29, 48
29772976
; CHECK1024-NEXT: .cfi_offset w19, -8
29782977
; CHECK1024-NEXT: .cfi_offset w20, -16
29792978
; CHECK1024-NEXT: .cfi_offset w28, -24
@@ -2993,14 +2992,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29932992
; CHECK1024-NEXT: mov w20, w0
29942993
; CHECK1024-NEXT: msub x9, x8, x8, x9
29952994
; CHECK1024-NEXT: mov sp, x9
2996-
; CHECK1024-NEXT: sub x10, x29, #784
2995+
; CHECK1024-NEXT: sub x10, x29, #1872
29972996
; CHECK1024-NEXT: stur x9, [x10, #-256]
2998-
; CHECK1024-NEXT: sub x9, x29, #774
2999-
; CHECK1024-NEXT: sub x10, x29, #772
2997+
; CHECK1024-NEXT: sub x9, x29, #1862
2998+
; CHECK1024-NEXT: sub x10, x29, #1860
30002999
; CHECK1024-NEXT: sturh wzr, [x9, #-256]
3001-
; CHECK1024-NEXT: sub x9, x29, #1040
3000+
; CHECK1024-NEXT: sub x9, x29, #2128
30023001
; CHECK1024-NEXT: stur wzr, [x10, #-256]
3003-
; CHECK1024-NEXT: sub x10, x29, #776
3002+
; CHECK1024-NEXT: sub x10, x29, #1864
30043003
; CHECK1024-NEXT: sturh w8, [x10, #-256]
30053004
; CHECK1024-NEXT: msr TPIDR2_EL0, x9
30063005
; CHECK1024-NEXT: .cfi_offset vg, -32
@@ -3010,14 +3009,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
30103009
; CHECK1024-NEXT: .cfi_restore vg
30113010
; CHECK1024-NEXT: smstart za
30123011
; CHECK1024-NEXT: mrs x8, TPIDR2_EL0
3013-
; CHECK1024-NEXT: sub x0, x29, #1040
3012+
; CHECK1024-NEXT: sub x0, x29, #2128
30143013
; CHECK1024-NEXT: cbnz x8, .LBB33_2
30153014
; CHECK1024-NEXT: // %bb.1: // %entry
30163015
; CHECK1024-NEXT: bl __arm_tpidr2_restore
30173016
; CHECK1024-NEXT: .LBB33_2: // %entry
30183017
; CHECK1024-NEXT: mov w0, w20
30193018
; CHECK1024-NEXT: msr TPIDR2_EL0, xzr
3020-
; CHECK1024-NEXT: mov sp, x29
3019+
; CHECK1024-NEXT: sub sp, x29, #1088
30213020
; CHECK1024-NEXT: .cfi_def_cfa wsp, 1136
30223021
; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
30233022
; CHECK1024-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload
@@ -3049,3 +3048,109 @@ entry:
30493048
ret i32 %x
30503049
}
30513050
declare void @other()
3051+
3052+
declare void @bar(ptr noundef) "aarch64_pstate_sm_compatible"
3053+
3054+
define i32 @sve_stack_object_and_vla(double %d, i64 %sz) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
3055+
; CHECK0-LABEL: sve_stack_object_and_vla:
3056+
; CHECK0: // %bb.0: // %entry
3057+
; CHECK0-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
3058+
; CHECK0-NEXT: stp x28, x19, [sp, #16] // 16-byte Folded Spill
3059+
; CHECK0-NEXT: mov x29, sp
3060+
; CHECK0-NEXT: addvl sp, sp, #-1
3061+
; CHECK0-NEXT: mov x19, sp
3062+
; CHECK0-NEXT: .cfi_def_cfa w29, 32
3063+
; CHECK0-NEXT: .cfi_offset w19, -8
3064+
; CHECK0-NEXT: .cfi_offset w28, -16
3065+
; CHECK0-NEXT: .cfi_offset w30, -24
3066+
; CHECK0-NEXT: .cfi_offset w29, -32
3067+
; CHECK0-NEXT: lsl x9, x0, #2
3068+
; CHECK0-NEXT: mov x8, sp
3069+
; CHECK0-NEXT: add x9, x9, #15
3070+
; CHECK0-NEXT: and x9, x9, #0xfffffffffffffff0
3071+
; CHECK0-NEXT: sub x0, x8, x9
3072+
; CHECK0-NEXT: mov sp, x0
3073+
; CHECK0-NEXT: mov z0.s, #0 // =0x0
3074+
; CHECK0-NEXT: ptrue p0.s
3075+
; CHECK0-NEXT: st1w { z0.s }, p0, [x29, #-1, mul vl]
3076+
; CHECK0-NEXT: bl bar
3077+
; CHECK0-NEXT: mov w0, wzr
3078+
; CHECK0-NEXT: mov sp, x29
3079+
; CHECK0-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload
3080+
; CHECK0-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
3081+
; CHECK0-NEXT: ret
3082+
;
3083+
; CHECK64-LABEL: sve_stack_object_and_vla:
3084+
; CHECK64: // %bb.0: // %entry
3085+
; CHECK64-NEXT: sub sp, sp, #96
3086+
; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
3087+
; CHECK64-NEXT: add x29, sp, #64
3088+
; CHECK64-NEXT: stp x28, x19, [sp, #80] // 16-byte Folded Spill
3089+
; CHECK64-NEXT: sub sp, sp, #64
3090+
; CHECK64-NEXT: addvl sp, sp, #-1
3091+
; CHECK64-NEXT: mov x19, sp
3092+
; CHECK64-NEXT: .cfi_def_cfa w29, 32
3093+
; CHECK64-NEXT: .cfi_offset w19, -8
3094+
; CHECK64-NEXT: .cfi_offset w28, -16
3095+
; CHECK64-NEXT: .cfi_offset w30, -24
3096+
; CHECK64-NEXT: .cfi_offset w29, -32
3097+
; CHECK64-NEXT: lsl x9, x0, #2
3098+
; CHECK64-NEXT: mov x8, sp
3099+
; CHECK64-NEXT: add x9, x9, #15
3100+
; CHECK64-NEXT: and x9, x9, #0xfffffffffffffff0
3101+
; CHECK64-NEXT: sub x0, x8, x9
3102+
; CHECK64-NEXT: mov sp, x0
3103+
; CHECK64-NEXT: mov z0.s, #0 // =0x0
3104+
; CHECK64-NEXT: ptrue p0.s
3105+
; CHECK64-NEXT: sub x8, x29, #64
3106+
; CHECK64-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
3107+
; CHECK64-NEXT: bl bar
3108+
; CHECK64-NEXT: mov w0, wzr
3109+
; CHECK64-NEXT: sub sp, x29, #64
3110+
; CHECK64-NEXT: ldp x28, x19, [sp, #80] // 16-byte Folded Reload
3111+
; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
3112+
; CHECK64-NEXT: add sp, sp, #96
3113+
; CHECK64-NEXT: ret
3114+
;
3115+
; CHECK1024-LABEL: sve_stack_object_and_vla:
3116+
; CHECK1024: // %bb.0: // %entry
3117+
; CHECK1024-NEXT: sub sp, sp, #1056
3118+
; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
3119+
; CHECK1024-NEXT: add x29, sp, #1024
3120+
; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
3121+
; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill
3122+
; CHECK1024-NEXT: str x19, [sp, #1048] // 8-byte Folded Spill
3123+
; CHECK1024-NEXT: sub sp, sp, #1024
3124+
; CHECK1024-NEXT: addvl sp, sp, #-1
3125+
; CHECK1024-NEXT: mov x19, sp
3126+
; CHECK1024-NEXT: .cfi_def_cfa w29, 32
3127+
; CHECK1024-NEXT: .cfi_offset w19, -8
3128+
; CHECK1024-NEXT: .cfi_offset w28, -16
3129+
; CHECK1024-NEXT: .cfi_offset w30, -24
3130+
; CHECK1024-NEXT: .cfi_offset w29, -32
3131+
; CHECK1024-NEXT: lsl x9, x0, #2
3132+
; CHECK1024-NEXT: mov x8, sp
3133+
; CHECK1024-NEXT: add x9, x9, #15
3134+
; CHECK1024-NEXT: and x9, x9, #0xfffffffffffffff0
3135+
; CHECK1024-NEXT: sub x0, x8, x9
3136+
; CHECK1024-NEXT: mov sp, x0
3137+
; CHECK1024-NEXT: mov z0.s, #0 // =0x0
3138+
; CHECK1024-NEXT: ptrue p0.s
3139+
; CHECK1024-NEXT: sub x8, x29, #1024
3140+
; CHECK1024-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
3141+
; CHECK1024-NEXT: bl bar
3142+
; CHECK1024-NEXT: mov w0, wzr
3143+
; CHECK1024-NEXT: sub sp, x29, #1024
3144+
; CHECK1024-NEXT: ldr x19, [sp, #1048] // 8-byte Folded Reload
3145+
; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload
3146+
; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
3147+
; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
3148+
; CHECK1024-NEXT: add sp, sp, #1056
3149+
; CHECK1024-NEXT: ret
3150+
entry:
3151+
%a = alloca <vscale x 4 x i32>
3152+
%b = alloca i32, i64 %sz, align 4
3153+
store <vscale x 4 x i32> zeroinitializer, ptr %a
3154+
call void @bar(ptr noundef nonnull %b)
3155+
ret i32 0
3156+
}

0 commit comments

Comments
 (0)