Skip to content

Commit cb3721b

Browse files
- Prevent spill of VG for functions marked as 'nounwind'
- Added a test to sme-vg-to-stack.ll with the 'nounwind' attribute
1 parent 061c7db commit cb3721b

9 files changed

+300
-394
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,7 +1657,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
16571657

16581658
// If the function contains streaming mode changes, we expect the first
16591659
// instruction of MBB to be a CNTD. Move past this instruction if found.
1660-
if (AFI->hasStreamingModeChanges()) {
1660+
if (AFI->hasStreamingModeChanges() && F.needsUnwindTableEntry()) {
16611661
assert(MBBI->getOpcode() == AArch64::CNTD_XPiI && "Unexpected instruction");
16621662
MBBI = std::next(MBBI);
16631663
}
@@ -3348,7 +3348,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
33483348

33493349
// Increase the callee-saved stack size if the function has streaming mode
33503350
// changes, as we will need to spill the value of the VG register.
3351-
if (AFI->hasStreamingModeChanges())
3351+
const Function &F = MF.getFunction();
3352+
if (AFI->hasStreamingModeChanges() && F.needsUnwindTableEntry())
33523353
CSStackSize += 8;
33533354

33543355
// Save number of saved regs, so we can easily update CSStackSize later.
@@ -3488,7 +3489,8 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
34883489
}
34893490

34903491
// Insert VG into the list of CSRs, immediately before LR if saved.
3491-
if (AFI->hasStreamingModeChanges()) {
3492+
const Function &F = MF.getFunction();
3493+
if (AFI->hasStreamingModeChanges() && F.needsUnwindTableEntry()) {
34923494
auto VGInfo = CalleeSavedInfo(AArch64::VG);
34933495
VGInfo.setRestored(false);
34943496
bool InsertBeforeLR = false;

llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,11 @@ target triple = "aarch64"
1010
define void @streaming_compatible() #0 {
1111
; CHECK-LABEL: streaming_compatible:
1212
; CHECK: // %bb.0:
13-
; CHECK-NEXT: cntd x9
14-
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
13+
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
1514
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1615
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1716
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
18-
; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
19-
; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
17+
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
2018
; CHECK-NEXT: bl __arm_sme_state
2119
; CHECK-NEXT: and x19, x0, #0x1
2220
; CHECK-NEXT: tbz w19, #0, .LBB0_2
@@ -28,12 +26,11 @@ define void @streaming_compatible() #0 {
2826
; CHECK-NEXT: // %bb.3:
2927
; CHECK-NEXT: smstart sm
3028
; CHECK-NEXT: .LBB0_4:
29+
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
3130
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
32-
; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
3331
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
34-
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
3532
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
36-
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
33+
; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
3734
; CHECK-NEXT: ret
3835
call void @non_streaming()
3936
ret void

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,12 @@ declare double @normal_callee(double)
1111
define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline optnone {
1212
; CHECK-FISEL-LABEL: nonstreaming_caller_streaming_callee:
1313
; CHECK-FISEL: // %bb.0: // %entry
14-
; CHECK-FISEL-NEXT: cntd x9
1514
; CHECK-FISEL-NEXT: sub sp, sp, #96
1615
; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
1716
; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
1817
; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
1918
; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
2019
; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
21-
; CHECK-FISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
2220
; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
2321
; CHECK-FISEL-NEXT: smstart sm
2422
; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -39,14 +37,12 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline
3937
;
4038
; CHECK-GISEL-LABEL: nonstreaming_caller_streaming_callee:
4139
; CHECK-GISEL: // %bb.0: // %entry
42-
; CHECK-GISEL-NEXT: cntd x9
4340
; CHECK-GISEL-NEXT: sub sp, sp, #96
4441
; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
4542
; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
4643
; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
4744
; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
4845
; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
49-
; CHECK-GISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
5046
; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill
5147
; CHECK-GISEL-NEXT: smstart sm
5248
; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -74,14 +70,12 @@ entry:
7470
define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
7571
; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
7672
; CHECK-COMMON: // %bb.0: // %entry
77-
; CHECK-COMMON-NEXT: cntd x9
7873
; CHECK-COMMON-NEXT: sub sp, sp, #96
7974
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
8075
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
8176
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
8277
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
8378
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
84-
; CHECK-COMMON-NEXT: str x9, [sp, #88] // 8-byte Folded Spill
8579
; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill
8680
; CHECK-COMMON-NEXT: smstop sm
8781
; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload
@@ -108,14 +102,12 @@ entry:
108102
define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" {
109103
; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee:
110104
; CHECK-COMMON: // %bb.0:
111-
; CHECK-COMMON-NEXT: cntd x9
112105
; CHECK-COMMON-NEXT: sub sp, sp, #112
113106
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
114107
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
115108
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
116109
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
117110
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
118-
; CHECK-COMMON-NEXT: str x9, [sp, #104] // 8-byte Folded Spill
119111
; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill
120112
; CHECK-COMMON-NEXT: smstart sm
121113
; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload
@@ -174,13 +166,11 @@ define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noi
174166
define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" {
175167
; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_callee_ptr:
176168
; CHECK-COMMON: // %bb.0:
177-
; CHECK-COMMON-NEXT: cntd x9
178169
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
179170
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
180171
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
181172
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
182173
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
183-
; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
184174
; CHECK-COMMON-NEXT: smstart sm
185175
; CHECK-COMMON-NEXT: blr x0
186176
; CHECK-COMMON-NEXT: smstop sm
@@ -197,13 +187,11 @@ define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noin
197187
define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optnone {
198188
; CHECK-COMMON-LABEL: normal_call_to_streaming_callee_ptr:
199189
; CHECK-COMMON: // %bb.0:
200-
; CHECK-COMMON-NEXT: cntd x9
201190
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
202191
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
203192
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
204193
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
205194
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
206-
; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
207195
; CHECK-COMMON-NEXT: smstart sm
208196
; CHECK-COMMON-NEXT: blr x0
209197
; CHECK-COMMON-NEXT: smstop sm
@@ -337,13 +325,12 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
337325
define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounwind {
338326
; CHECK-COMMON-LABEL: f128_call_sm:
339327
; CHECK-COMMON: // %bb.0:
340-
; CHECK-COMMON-NEXT: cntd x9
341328
; CHECK-COMMON-NEXT: sub sp, sp, #112
342329
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill
343330
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill
344331
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill
345332
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill
346-
; CHECK-COMMON-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill
333+
; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
347334
; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
348335
; CHECK-COMMON-NEXT: smstop sm
349336
; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
@@ -399,13 +386,12 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
399386
define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
400387
; CHECK-COMMON-LABEL: frem_call_sm:
401388
; CHECK-COMMON: // %bb.0:
402-
; CHECK-COMMON-NEXT: cntd x9
403389
; CHECK-COMMON-NEXT: sub sp, sp, #96
404390
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
405391
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
406392
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
407393
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
408-
; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
394+
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
409395
; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill
410396
; CHECK-COMMON-NEXT: smstop sm
411397
; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload
@@ -428,14 +414,12 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw
428414
define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind {
429415
; CHECK-COMMON-LABEL: frem_call_sm_compat:
430416
; CHECK-COMMON: // %bb.0:
431-
; CHECK-COMMON-NEXT: cntd x9
432-
; CHECK-COMMON-NEXT: sub sp, sp, #112
417+
; CHECK-COMMON-NEXT: sub sp, sp, #96
433418
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
434419
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
435420
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
436421
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
437-
; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
438-
; CHECK-COMMON-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
422+
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
439423
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
440424
; CHECK-COMMON-NEXT: bl __arm_sme_state
441425
; CHECK-COMMON-NEXT: ldp s2, s0, [sp, #8] // 8-byte Folded Reload
@@ -452,14 +436,13 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
452436
; CHECK-COMMON-NEXT: // %bb.3:
453437
; CHECK-COMMON-NEXT: smstart sm
454438
; CHECK-COMMON-NEXT: .LBB12_4:
455-
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
439+
; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
456440
; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
441+
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
457442
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
458-
; CHECK-COMMON-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
459-
; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
460443
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
461444
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
462-
; CHECK-COMMON-NEXT: add sp, sp, #112
445+
; CHECK-COMMON-NEXT: add sp, sp, #96
463446
; CHECK-COMMON-NEXT: ret
464447
%res = frem float %a, %b
465448
ret float %res

llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,13 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou
121121
define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" {
122122
; CHECK-LABEL: test_lazy_save_and_conditional_smstart:
123123
; CHECK: // %bb.0:
124-
; CHECK-NEXT: cntd x9
125124
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
126125
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
127126
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
128127
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
129128
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
130129
; CHECK-NEXT: add x29, sp, #64
131-
; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
130+
; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
132131
; CHECK-NEXT: sub sp, sp, #16
133132
; CHECK-NEXT: rdsvl x8, #1
134133
; CHECK-NEXT: mov x9, sp
@@ -161,7 +160,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
161160
; CHECK-NEXT: msr TPIDR2_EL0, xzr
162161
; CHECK-NEXT: sub sp, x29, #64
163162
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
164-
; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
163+
; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
165164
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
166165
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
167166
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload

0 commit comments

Comments
 (0)