Skip to content

Commit 702c3f5

Browse files
committed
[SME] Don't scavenge a spillslot in callee-save area in presence of streaming-mode changes.
If no frame-pointer is available and the compiler has scavenged a spill-slot in the callee-save area, the compiler may be forced to emit an 'addvl' inside the streaming-mode-changing call sequence when it needs to fill (reload) an FP register being passed to the call. We can avoid this entirely by disabling stack-slot scavenging when there are streaming-mode-changing call-sequences in the function. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D159196
1 parent a4318a2 commit 702c3f5

File tree

5 files changed

+125
-66
lines changed

5 files changed

+125
-66
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
427427
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
428428
const MachineFrameInfo &MFI = MF.getFrameInfo();
429429
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
430+
430431
// Win64 EH requires a frame pointer if funclets are present, as the locals
431432
// are accessed off the frame pointer in both the parent function and the
432433
// funclets.
@@ -3278,6 +3279,12 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
32783279
bool AArch64FrameLowering::enableStackSlotScavenging(
32793280
const MachineFunction &MF) const {
32803281
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
3282+
// If the function has streaming-mode changes, don't scavenge a
3283+
// spillslot in the callee-save area, as that might require an
3284+
// 'addvl' in the streaming-mode-changing call-sequence when the
3285+
// function doesn't use a FP.
3286+
if (AFI->hasStreamingModeChanges() && !hasFP(MF))
3287+
return false;
32813288
return AFI->hasCalleeSaveStackFreeSpace();
32823289
}
32833290

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 46 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline
1717
; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1818
; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
1919
; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
20-
; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
20+
; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
2121
; CHECK-FISEL-NEXT: smstart sm
22-
; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
22+
; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
2323
; CHECK-FISEL-NEXT: bl streaming_callee
24-
; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
24+
; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
2525
; CHECK-FISEL-NEXT: smstop sm
2626
; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0
2727
; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0]
28-
; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
28+
; CHECK-FISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
2929
; CHECK-FISEL-NEXT: fadd d0, d1, d0
3030
; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
3131
; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
@@ -43,15 +43,15 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline
4343
; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
4444
; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
4545
; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
46-
; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
46+
; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
4747
; CHECK-GISEL-NEXT: smstart sm
48-
; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
48+
; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
4949
; CHECK-GISEL-NEXT: bl streaming_callee
50-
; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
50+
; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
5151
; CHECK-GISEL-NEXT: smstop sm
5252
; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
5353
; CHECK-GISEL-NEXT: fmov d0, x8
54-
; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
54+
; CHECK-GISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
5555
; CHECK-GISEL-NEXT: fadd d0, d1, d0
5656
; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
5757
; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
@@ -76,15 +76,15 @@ define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline
7676
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
7777
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
7878
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
79-
; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
79+
; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
8080
; CHECK-COMMON-NEXT: smstop sm
81-
; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
81+
; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
8282
; CHECK-COMMON-NEXT: bl normal_callee
83-
; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
83+
; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
8484
; CHECK-COMMON-NEXT: smstart sm
8585
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
8686
; CHECK-COMMON-NEXT: fmov d0, x8
87-
; CHECK-COMMON-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
87+
; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
8888
; CHECK-COMMON-NEXT: fadd d0, d1, d0
8989
; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
9090
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
@@ -102,32 +102,32 @@ entry:
102102
define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" {
103103
; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee:
104104
; CHECK-COMMON: // %bb.0:
105-
; CHECK-COMMON-NEXT: sub sp, sp, #96
106-
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
107-
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
108-
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
109-
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
110-
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
111-
; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
105+
; CHECK-COMMON-NEXT: sub sp, sp, #112
106+
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
107+
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
108+
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
109+
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
110+
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
111+
; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill
112112
; CHECK-COMMON-NEXT: smstart sm
113113
; CHECK-COMMON-NEXT: smstop sm
114-
; CHECK-COMMON-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload
114+
; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload
115115
; CHECK-COMMON-NEXT: bl normal_callee
116-
; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
116+
; CHECK-COMMON-NEXT: str d0, [sp, #16] // 8-byte Folded Spill
117117
; CHECK-COMMON-NEXT: smstart sm
118118
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
119119
; CHECK-COMMON-NEXT: fmov d0, x8
120-
; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
120+
; CHECK-COMMON-NEXT: ldr d1, [sp, #16] // 8-byte Folded Reload
121121
; CHECK-COMMON-NEXT: fadd d0, d1, d0
122-
; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
122+
; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
123123
; CHECK-COMMON-NEXT: smstop sm
124-
; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
125-
; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
126-
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
127-
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
128-
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
129-
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
130-
; CHECK-COMMON-NEXT: add sp, sp, #96
124+
; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
125+
; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
126+
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload
127+
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload
128+
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload
129+
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload
130+
; CHECK-COMMON-NEXT: add sp, sp, #112
131131
; CHECK-COMMON-NEXT: ret
132132
%call = call double @normal_callee(double %x);
133133
%add = fadd double %call, 4.200000e+01
@@ -378,23 +378,25 @@ define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nou
378378
define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
379379
; CHECK-COMMON-LABEL: frem_call_sm:
380380
; CHECK-COMMON: // %bb.0:
381-
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
382-
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
383-
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
384-
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
385-
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
386-
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #72] // 8-byte Folded Spill
381+
; CHECK-COMMON-NEXT: sub sp, sp, #96
382+
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
383+
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
384+
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
385+
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
386+
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
387+
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
387388
; CHECK-COMMON-NEXT: smstop sm
388-
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #72] // 8-byte Folded Reload
389+
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
389390
; CHECK-COMMON-NEXT: bl fmodf
390-
; CHECK-COMMON-NEXT: str s0, [sp, #76] // 4-byte Folded Spill
391+
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
391392
; CHECK-COMMON-NEXT: smstart sm
392-
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
393-
; CHECK-COMMON-NEXT: ldr s0, [sp, #76] // 4-byte Folded Reload
394-
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
395-
; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
396-
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
397-
; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
393+
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
394+
; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
395+
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
396+
; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
397+
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
398+
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
399+
; CHECK-COMMON-NEXT: add sp, sp, #96
398400
; CHECK-COMMON-NEXT: ret
399401
%res = frem float %a, %b
400402
ret float %res

llvm/test/CodeGen/AArch64/sme-streaming-body.ll

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
33

44
declare void @normal_callee();
55
declare void @streaming_callee() "aarch64_pstate_sm_enabled";
@@ -237,25 +237,27 @@ declare void @use_ptr(ptr) "aarch64_pstate_sm_compatible"
237237
define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_body" {
238238
; CHECK-LABEL: call_to_intrinsic_without_chain:
239239
; CHECK: // %bb.0: // %entry
240-
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
241-
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
242-
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
243-
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
244-
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
245-
; CHECK-NEXT: str d0, [sp, #72] // 8-byte Folded Spill
240+
; CHECK-NEXT: sub sp, sp, #96
241+
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
242+
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
243+
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
244+
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
245+
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
246+
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
246247
; CHECK-NEXT: smstart sm
247248
; CHECK-NEXT: smstop sm
248-
; CHECK-NEXT: ldr d0, [sp, #72] // 8-byte Folded Reload
249+
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
249250
; CHECK-NEXT: bl cos
250-
; CHECK-NEXT: str d0, [sp, #72] // 8-byte Folded Spill
251+
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
251252
; CHECK-NEXT: smstart sm
252253
; CHECK-NEXT: smstop sm
253-
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
254-
; CHECK-NEXT: ldr d0, [sp, #72] // 8-byte Folded Reload
255-
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
256-
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
257-
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
258-
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
254+
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
255+
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
256+
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
257+
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
258+
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
259+
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
260+
; CHECK-NEXT: add sp, sp, #96
259261
; CHECK-NEXT: ret
260262
entry:
261263
%0 = call fast double @llvm.cos.f64(double %x)

llvm/test/CodeGen/AArch64/sme-streaming-interface.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
33

44
; This file tests the following combinations related to streaming-enabled functions:
55
; [ ] N -> S (Normal -> Streaming)
@@ -313,19 +313,18 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta
313313
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
314314
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
315315
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
316-
; CHECK-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
316+
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
317317
; CHECK-NEXT: smstop sm
318-
; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload
318+
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
319319
; CHECK-NEXT: bl cos
320-
; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
320+
; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill
321321
; CHECK-NEXT: smstart sm
322-
; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload
323-
; CHECK-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload
322+
; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload
324323
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
325324
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
326325
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
327-
; CHECK-NEXT: fadd d0, d1, d0
328326
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
327+
; CHECK-NEXT: fadd d0, d1, d0
329328
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
330329
; CHECK-NEXT: add sp, sp, #96
331330
; CHECK-NEXT: ret
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64"
5+
6+
; This function would normally scavenge a stackslot from the callee-save
7+
; area, which would lead to spilling 's0' to that stackslot before the
8+
; smstop and filling it with 'addvl + <offset>' after the smstop because
9+
; the frame-pointer is not available.
10+
; This would not be valid, since the vector-length has changed so 'addvl'
11+
; cannot be used. This is testing that the stackslot-scavenging is disabled
12+
; when there are any streaming-mode-changing call-sequences in the
13+
; function.
14+
define void @test_no_stackslot_scavenging(float %f) #0 {
15+
; CHECK-LABEL: test_no_stackslot_scavenging:
16+
; CHECK: // %bb.0:
17+
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
18+
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
19+
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
20+
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
21+
; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
22+
; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill
23+
; CHECK-NEXT: addvl sp, sp, #-1
24+
; CHECK-NEXT: sub sp, sp, #16
25+
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
26+
; CHECK-NEXT: //APP
27+
; CHECK-NEXT: //NO_APP
28+
; CHECK-NEXT: smstop sm
29+
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
30+
; CHECK-NEXT: bl use_f
31+
; CHECK-NEXT: smstart sm
32+
; CHECK-NEXT: addvl sp, sp, #1
33+
; CHECK-NEXT: add sp, sp, #16
34+
; CHECK-NEXT: ldp x30, x24, [sp, #80] // 16-byte Folded Reload
35+
; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
36+
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
37+
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
38+
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
39+
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
40+
; CHECK-NEXT: ret
41+
%ptr = alloca <vscale x 16 x i8>
42+
call void asm sideeffect "", "~{x24}"() nounwind
43+
call void @use_f(float %f)
44+
ret void
45+
}
46+
47+
declare void @use_f(float)
48+
49+
attributes #0 = { nounwind "target-features"="+sme" "aarch64_pstate_sm_enabled" }

0 commit comments

Comments
 (0)