Skip to content

Commit 6f2f9bd

Browse files
committed
[AArch64] Support 'swiftcorocc' "popless" calls.
'swiftcorocc' calls are allowed to have "popless" returns, which don't fully restore the stack, thereby allowing the caller to access some stack allocations made in the 'swiftcorocc' callee. Concretely, calls to these functions don't restore SP (but do restore FP). So the most important characteristic of a 'swiftcorocc' call is that it forces the caller function to access its stack through FP, like it does with e.g., variable-size allocas. Support this on AArch64 by marking the frame as having a popless call, which we generally honor when we decide whether the frame needs FP and FP-based addressing, as we do today for variably-sized allocas. rdar://135984630
1 parent d257da7 commit 6f2f9bd

File tree

5 files changed

+158
-6
lines changed

5 files changed

+158
-6
lines changed

llvm/include/llvm/CodeGen/MachineFrameInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,10 @@ class MachineFrameInfo {
319319
/// instructions which manipulate the stack pointer.
320320
bool HasCopyImplyingStackAdjustment = false;
321321

322+
/// True if the function contains a call using a calling convention that
323+
/// allows it to be "popless", i.e., to not restore SP when the call returns.
324+
bool HasPoplessCall = false;
325+
322326
/// True if the function contains a call to the llvm.vastart intrinsic.
323327
bool HasVAStart = false;
324328

@@ -634,6 +638,9 @@ class MachineFrameInfo {
634638
HasCopyImplyingStackAdjustment = B;
635639
}
636640

641+
bool hasPoplessCall() const { return HasPoplessCall; }
642+
void setHasPoplessCall(bool B = true) { HasPoplessCall = B; }
643+
637644
/// Returns true if the function calls the llvm.va_start intrinsic.
638645
bool hasVAStart() const { return HasVAStart; }
639646
void setHasVAStart(bool B) { HasVAStart = B; }

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,8 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
347347
// Bail on stack adjustment needed on return for simplicity.
348348
const MachineFrameInfo &MFI = MF.getFrameInfo();
349349
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
350-
if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
350+
if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
351+
MFI.hasPoplessCall())
351352
return false;
352353
if (Exit && getArgumentStackToRestore(MF, *Exit))
353354
return false;
@@ -499,6 +500,7 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
499500
if (MF.getTarget().Options.DisableFramePointerElim(MF))
500501
return true;
501502
if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
503+
MFI.hasPoplessCall() ||
502504
MFI.hasStackMap() || MFI.hasPatchPoint() ||
503505
RegInfo->hasStackRealignment(MF))
504506
return true;
@@ -1180,6 +1182,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
11801182
if (MFI.hasVarSizedObjects())
11811183
return false;
11821184

1185+
if (MFI.hasPoplessCall())
1186+
return false;
1187+
11831188
if (RegInfo->hasStackRealignment(MF))
11841189
return false;
11851190

@@ -2193,7 +2198,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
21932198
StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
21942199
allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
21952200
nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
2196-
MFI.hasVarSizedObjects() || LocalsSize);
2201+
MFI.hasVarSizedObjects() || LocalsSize ||
2202+
MFI.hasPoplessCall());
21972203
CFAOffset += SVECalleeSavesSize;
21982204

21992205
if (EmitAsyncCFI)
@@ -2210,7 +2216,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
22102216
allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
22112217
SVELocalsSize + StackOffset::getFixed(NumBytes),
22122218
NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
2213-
CFAOffset, MFI.hasVarSizedObjects());
2219+
CFAOffset,
2220+
MFI.hasVarSizedObjects() || MFI.hasPoplessCall());
22142221
}
22152222

22162223
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -2531,7 +2538,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25312538
// If we have stack realignment or variable sized objects on the stack,
25322539
// restore the stack pointer from the frame pointer prior to SVE CSR
25332540
// restoration.
2534-
if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
2541+
if (AFI->isStackRealigned() || MFI.hasVarSizedObjects() ||
2542+
MFI.hasPoplessCall()) {
25352543
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
25362544
// Set SP to start of SVE callee-save area from which they can
25372545
// be reloaded. The code below will deallocate the stack space
@@ -2603,7 +2611,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
26032611
// FIXME: Rather than doing the math here, we should instead just use
26042612
// non-post-indexed loads for the restores if we aren't actually going to
26052613
// be able to save any instructions.
2606-
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
2614+
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned() ||
2615+
MFI.hasPoplessCall())) {
26072616
emitFrameOffset(
26082617
MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
26092618
StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
@@ -2796,7 +2805,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
27962805
bool FPOffsetFits = !ForSimm || FPOffset >= -256;
27972806
PreferFP |= Offset > -FPOffset && !SVEStackSize;
27982807

2799-
if (MFI.hasVarSizedObjects()) {
2808+
if (MFI.hasVarSizedObjects() || MFI.hasPoplessCall()) {
28002809
// If we have variable sized objects, we can use either FP or BP, as the
28012810
// SP offset is unknown. We can use the base pointer if we have one and
28022811
// FP is not preferred. If not, we're stuck with using FP.
@@ -4679,6 +4688,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
46794688

46804689
// Go to common code if we cannot provide sp + offset.
46814690
if (MFI.hasVarSizedObjects() ||
4691+
MFI.hasPoplessCall() ||
46824692
MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
46834693
MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
46844694
return getFrameIndexReference(MF, FI, FrameReg);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7205,6 +7205,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
72057205
case CallingConv::CXX_FAST_TLS:
72067206
case CallingConv::Swift:
72077207
case CallingConv::SwiftTail:
7208+
case CallingConv::SwiftCoro:
72087209
case CallingConv::Tail:
72097210
case CallingConv::GRAAL:
72107211
if (Subtarget->isTargetWindows()) {
@@ -8317,6 +8318,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
83178318
CallConv = CallingConv::AArch64_SVE_VectorCall;
83188319
}
83198320

8321+
if (CallConv == CallingConv::SwiftCoro)
8322+
MF.getFrameInfo().setHasPoplessCall();
8323+
83208324
if (IsTailCall) {
83218325
// Check if it's really possible to do a tail call.
83228326
IsTailCall = isEligibleForTailCallOptimization(CLI);

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
14541454
return false;
14551455
}
14561456

1457+
if (Info.CallConv == CallingConv::SwiftCoro)
1458+
MF.getFrameInfo().setHasPoplessCall();
1459+
14571460
if (Info.SwiftErrorVReg) {
14581461
MIB.addDef(AArch64::X21, RegState::Implicit);
14591462
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple arm64e-apple-darwin -verify-machineinstrs -o - %s \
3+
; RUN: | FileCheck %s
4+
5+
; RUN: llc -mtriple arm64e-apple-darwin -verify-machineinstrs -o - %s \
6+
; RUN: -global-isel -global-isel-abort=2 | FileCheck %s
7+
8+
declare i64 @g(ptr, ptr)
9+
10+
define i64 @test_call_to_swiftcoro() #0 {
11+
; CHECK-LABEL: test_call_to_swiftcoro:
12+
; CHECK: ; %bb.0:
13+
; CHECK-NEXT: pacibsp
14+
; CHECK-NEXT: stp x26, x25, [sp, #-32]! ; 16-byte Folded Spill
15+
; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
16+
; CHECK-NEXT: add x29, sp, #16
17+
; CHECK-NEXT: sub sp, sp, #16
18+
; CHECK-NEXT: .cfi_def_cfa w29, 16
19+
; CHECK-NEXT: .cfi_offset w30, -8
20+
; CHECK-NEXT: .cfi_offset w29, -16
21+
; CHECK-NEXT: .cfi_offset w25, -24
22+
; CHECK-NEXT: .cfi_offset w26, -32
23+
; CHECK-NEXT: ; InlineAsm Start
24+
; CHECK-NEXT: ; InlineAsm End
25+
; CHECK-NEXT: bl _test_call
26+
; CHECK-NEXT: sub x0, x29, #24
27+
; CHECK-NEXT: sub x1, x29, #32
28+
; CHECK-NEXT: bl _g
29+
; CHECK-NEXT: sub sp, x29, #16
30+
; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
31+
; CHECK-NEXT: ldp x26, x25, [sp], #32 ; 16-byte Folded Reload
32+
; CHECK-NEXT: retab
33+
%v1 = alloca i64
34+
%v2 = alloca i64
35+
call void asm sideeffect "", "~{x25},~{x26}"()
36+
%v3 = call swiftcorocc i64 @test_call()
37+
%v4 = call i64 @g(ptr %v1, ptr %v2)
38+
ret i64 %v4
39+
}
40+
41+
define i64 @test_call_to_normal() #0 {
42+
; CHECK-LABEL: test_call_to_normal:
43+
; CHECK: ; %bb.0:
44+
; CHECK-NEXT: pacibsp
45+
; CHECK-NEXT: sub sp, sp, #48
46+
; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill
47+
; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
48+
; CHECK-NEXT: .cfi_def_cfa_offset 48
49+
; CHECK-NEXT: .cfi_offset w30, -8
50+
; CHECK-NEXT: .cfi_offset w29, -16
51+
; CHECK-NEXT: .cfi_offset w25, -24
52+
; CHECK-NEXT: .cfi_offset w26, -32
53+
; CHECK-NEXT: ; InlineAsm Start
54+
; CHECK-NEXT: ; InlineAsm End
55+
; CHECK-NEXT: bl _test_call_normal
56+
; CHECK-NEXT: add x0, sp, #8
57+
; CHECK-NEXT: mov x1, sp
58+
; CHECK-NEXT: bl _g
59+
; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
60+
; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload
61+
; CHECK-NEXT: add sp, sp, #48
62+
; CHECK-NEXT: retab
63+
%v1 = alloca i64
64+
%v2 = alloca i64
65+
call void asm sideeffect "", "~{x25},~{x26}"()
66+
%v3 = call i64 @test_call_normal()
67+
%v4 = call i64 @g(ptr %v1, ptr %v2)
68+
ret i64 %v4
69+
}
70+
71+
define swiftcorocc i64 @test_call() #0 {
72+
; CHECK-LABEL: test_call:
73+
; CHECK: ; %bb.0:
74+
; CHECK-NEXT: pacibsp
75+
; CHECK-NEXT: sub sp, sp, #48
76+
; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill
77+
; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
78+
; CHECK-NEXT: .cfi_def_cfa_offset 48
79+
; CHECK-NEXT: .cfi_offset w30, -8
80+
; CHECK-NEXT: .cfi_offset w29, -16
81+
; CHECK-NEXT: .cfi_offset w25, -24
82+
; CHECK-NEXT: .cfi_offset w26, -32
83+
; CHECK-NEXT: ; InlineAsm Start
84+
; CHECK-NEXT: ; InlineAsm End
85+
; CHECK-NEXT: add x0, sp, #8
86+
; CHECK-NEXT: mov x1, sp
87+
; CHECK-NEXT: bl _g
88+
; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
89+
; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload
90+
; CHECK-NEXT: add sp, sp, #48
91+
; CHECK-NEXT: retab
92+
%v1 = alloca i64
93+
%v2 = alloca i64
94+
call void asm sideeffect "", "~{x25},~{x26}"()
95+
%v3 = call i64 @g(ptr %v1, ptr %v2)
96+
ret i64 %v3
97+
}
98+
99+
define i64 @test_call_normal() #0 {
100+
; CHECK-LABEL: test_call_normal:
101+
; CHECK: ; %bb.0:
102+
; CHECK-NEXT: pacibsp
103+
; CHECK-NEXT: sub sp, sp, #48
104+
; CHECK-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill
105+
; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
106+
; CHECK-NEXT: .cfi_def_cfa_offset 48
107+
; CHECK-NEXT: .cfi_offset w30, -8
108+
; CHECK-NEXT: .cfi_offset w29, -16
109+
; CHECK-NEXT: .cfi_offset w25, -24
110+
; CHECK-NEXT: .cfi_offset w26, -32
111+
; CHECK-NEXT: ; InlineAsm Start
112+
; CHECK-NEXT: ; InlineAsm End
113+
; CHECK-NEXT: add x0, sp, #8
114+
; CHECK-NEXT: mov x1, sp
115+
; CHECK-NEXT: bl _g
116+
; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
117+
; CHECK-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload
118+
; CHECK-NEXT: add sp, sp, #48
119+
; CHECK-NEXT: retab
120+
%v1 = alloca i64
121+
%v2 = alloca i64
122+
call void asm sideeffect "", "~{x25},~{x26}"()
123+
%v3 = call i64 @g(ptr %v1, ptr %v2)
124+
ret i64 %v3
125+
}
126+
127+
128+
attributes #0 = { "ptrauth-returns" }

0 commit comments

Comments
 (0)