@@ -504,6 +504,11 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
504
504
MFI.hasStackMap () || MFI.hasPatchPoint () ||
505
505
RegInfo->hasStackRealignment (MF))
506
506
return true ;
507
+
508
+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
509
+ if (AFI->hasPoplessEpilogue ())
510
+ return true ;
511
+
507
512
// With large callframes around we may need to use FP to access the scavenging
508
513
// emergency spillslot.
509
514
//
@@ -1119,6 +1124,12 @@ bool AArch64FrameLowering::canUseAsPrologue(
1119
1124
return false ;
1120
1125
}
1121
1126
1127
+ // If we have some return path that's popless, it needs its own very-special
1128
+ // epilogue, so we can't shrink-wrap it away.
1129
+ // FIXME: this and some of the below checks belong in enableShrinkWrapping.
1130
+ if (AFI->hasPoplessEpilogue ())
1131
+ return false ;
1132
+
1122
1133
// Certain stack probing sequences might clobber flags, then we can't use
1123
1134
// the block as a prologue if the flags register is a live-in.
1124
1135
if (MF->getInfo <AArch64FunctionInfo>()->hasStackProbing () &&
@@ -1204,6 +1215,12 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
1204
1215
1205
1216
bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue (
1206
1217
MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
1218
+
1219
+ MachineFunction &MF = *MBB.getParent ();
1220
+ AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1221
+ if (AFI->hasPoplessEpilogue ())
1222
+ return false ;
1223
+
1207
1224
if (!shouldCombineCSRLocalStackBump (*MBB.getParent (), StackBumpBytes))
1208
1225
return false ;
1209
1226
@@ -1560,6 +1577,47 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
1560
1577
return std::prev (MBB.erase (MBBI));
1561
1578
}
1562
1579
1580
+ static void fixupCalleeSaveRestoreToFPBased (MachineInstr &MI,
1581
+ uint64_t FPSPOffset) {
1582
+ assert (!AArch64InstrInfo::isSEHInstruction (MI));
1583
+
1584
+ unsigned Opc = MI.getOpcode ();
1585
+ unsigned Scale;
1586
+ switch (Opc) {
1587
+ case AArch64::STPXi:
1588
+ case AArch64::STRXui:
1589
+ case AArch64::STPDi:
1590
+ case AArch64::STRDui:
1591
+ case AArch64::LDPXi:
1592
+ case AArch64::LDRXui:
1593
+ case AArch64::LDPDi:
1594
+ case AArch64::LDRDui:
1595
+ Scale = 8 ;
1596
+ break ;
1597
+ case AArch64::STPQi:
1598
+ case AArch64::STRQui:
1599
+ case AArch64::LDPQi:
1600
+ case AArch64::LDRQui:
1601
+ Scale = 16 ;
1602
+ break ;
1603
+ default :
1604
+ llvm_unreachable (" Unexpected callee-save save/restore opcode!" );
1605
+ }
1606
+
1607
+ unsigned OffsetIdx = MI.getNumExplicitOperands () - 1 ;
1608
+
1609
+ MachineOperand &BaseRegOpnd = MI.getOperand (OffsetIdx - 1 );
1610
+ assert (BaseRegOpnd.getReg () == AArch64::SP &&
1611
+ " Unexpected base register in callee-save save/restore instruction!" );
1612
+ BaseRegOpnd.setReg (AArch64::FP); // XXX TRI
1613
+
1614
+ // Last operand is immediate offset that needs fixing.
1615
+ MachineOperand &OffsetOpnd = MI.getOperand (OffsetIdx);
1616
+ // All generated opcodes have scaled offsets.
1617
+ assert (FPSPOffset % Scale == 0 );
1618
+ OffsetOpnd.setImm (OffsetOpnd.getImm () - FPSPOffset / Scale);
1619
+ }
1620
+
1563
1621
// Fixup callee-save register save/restore instructions to take into account
1564
1622
// combined SP bump by adding the local stack size to the stack offsets.
1565
1623
static void fixupCalleeSaveRestoreStackOffset (MachineInstr &MI,
@@ -2298,10 +2356,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2298
2356
bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo (MF);
2299
2357
bool HasWinCFI = false ;
2300
2358
bool IsFunclet = false ;
2359
+ bool IsSwiftCoroPartialReturn = false ;
2301
2360
2302
2361
if (MBB.end () != MBBI) {
2303
2362
DL = MBBI->getDebugLoc ();
2304
2363
IsFunclet = isFuncletReturnInstr (*MBBI);
2364
+ IsSwiftCoroPartialReturn = MBBI->getOpcode () == AArch64::RET_POPLESS;
2365
+ }
2366
+
2367
+ if (IsSwiftCoroPartialReturn) {
2368
+ // The partial-return intrin/instr requires the swiftcoro cc
2369
+ if (MF.getFunction ().getCallingConv () != CallingConv::SwiftCoro)
2370
+ report_fatal_error (" llvm.ret.popless requires swiftcorocc" );
2371
+ assert (MBBI->getOpcode () == AArch64::RET_POPLESS);
2372
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::RET_ReallyLR))
2373
+ .setMIFlag (MachineInstr::FrameDestroy);
2374
+ MBB.erase (MBBI);
2305
2375
}
2306
2376
2307
2377
MachineBasicBlock::iterator EpilogStartI = MBB.end ();
@@ -2350,6 +2420,39 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2350
2420
if (Info.getReg () != AArch64::LR)
2351
2421
continue ;
2352
2422
MachineBasicBlock::iterator TI = MBB.getFirstTerminator ();
2423
+
2424
+ // When we're doing a popless ret (i.e., that doesn't restore SP), we
2425
+ // can't rely on the exit SP being the same as the entry, but they need
2426
+ // to match for the LR auth to succeed. Instead, derive the entry SP
2427
+ // from our FP (using a -16 static offset for the size of the frame
2428
+ // record itself), save that into X16, and use that as the discriminator
2429
+ // in an AUTIB.
2430
+ if (IsSwiftCoroPartialReturn) {
2431
+ const auto *TRI = Subtarget.getRegisterInfo ();
2432
+
2433
+ MachineBasicBlock::iterator EpilogStartI = MBB.getFirstTerminator ();
2434
+ MachineBasicBlock::iterator Begin = MBB.begin ();
2435
+ while (EpilogStartI != Begin) {
2436
+ --EpilogStartI;
2437
+ if (!EpilogStartI->getFlag (MachineInstr::FrameDestroy)) {
2438
+ ++EpilogStartI;
2439
+ break ;
2440
+ }
2441
+ if (EpilogStartI->readsRegister (AArch64::X16, TRI) ||
2442
+ EpilogStartI->modifiesRegister (AArch64::X16, TRI))
2443
+ report_fatal_error (" unable to use x16 for popless ret LR auth" );
2444
+ }
2445
+
2446
+ emitFrameOffset (MBB, EpilogStartI, DL, AArch64::X16, AArch64::FP,
2447
+ StackOffset::getFixed (16 ), TII,
2448
+ MachineInstr::FrameDestroy);
2449
+ BuildMI (MBB, TI, DL, TII->get (AArch64::AUTIB), AArch64::LR)
2450
+ .addUse (AArch64::LR)
2451
+ .addUse (AArch64::X16)
2452
+ .setMIFlag (MachineInstr::FrameDestroy);
2453
+ return ;
2454
+ }
2455
+
2353
2456
if (TI != MBB.end () && TI->getOpcode () == AArch64::RET_ReallyLR) {
2354
2457
// If there is a terminator and it's a RET, we can fold AUTH into it.
2355
2458
// Be careful to keep the implicitly returned registers.
@@ -2383,6 +2486,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2383
2486
AFI->setLocalStackSize (NumBytes - PrologueSaveSize);
2384
2487
if (homogeneousPrologEpilog (MF, &MBB)) {
2385
2488
assert (!NeedsWinCFI);
2489
+ assert (!IsSwiftCoroPartialReturn);
2386
2490
auto LastPopI = MBB.getFirstTerminator ();
2387
2491
if (LastPopI != MBB.begin ()) {
2388
2492
auto HomogeneousEpilog = std::prev (LastPopI);
@@ -2404,7 +2508,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2404
2508
// Assume we can't combine the last pop with the sp restore.
2405
2509
2406
2510
bool CombineAfterCSRBump = false ;
2407
- if (!CombineSPBump && PrologueSaveSize != 0 ) {
2511
+ if (!CombineSPBump && PrologueSaveSize != 0 && !IsSwiftCoroPartialReturn ) {
2408
2512
MachineBasicBlock::iterator Pop = std::prev (MBB.getFirstTerminator ());
2409
2513
while (Pop->getOpcode () == TargetOpcode::CFI_INSTRUCTION ||
2410
2514
AArch64InstrInfo::isSEHInstruction (*Pop))
@@ -2440,6 +2544,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2440
2544
IsSVECalleeSave (LastPopI)) {
2441
2545
++LastPopI;
2442
2546
break ;
2547
+ } else if (IsSwiftCoroPartialReturn) {
2548
+ assert (!EmitCFI);
2549
+ assert (hasFP (MF));
2550
+ fixupCalleeSaveRestoreStackOffset (*LastPopI, AFI->getLocalStackSize (),
2551
+ NeedsWinCFI, &HasWinCFI);
2552
+ // if FP-based addressing, rewrite CSR restores from SP to FP
2553
+ fixupCalleeSaveRestoreToFPBased (
2554
+ *LastPopI, AFI->getCalleeSaveBaseToFrameRecordOffset ());
2443
2555
} else if (CombineSPBump)
2444
2556
fixupCalleeSaveRestoreStackOffset (*LastPopI, AFI->getLocalStackSize (),
2445
2557
NeedsWinCFI, &HasWinCFI);
@@ -2459,6 +2571,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2459
2571
}
2460
2572
2461
2573
if (hasFP (MF) && AFI->hasSwiftAsyncContext ()) {
2574
+ assert (!IsSwiftCoroPartialReturn);
2462
2575
switch (MF.getTarget ().Options .SwiftAsyncFramePointer ) {
2463
2576
case SwiftAsyncFramePointerMode::DeploymentBased:
2464
2577
// Avoid the reload as it is GOT relative, and instead fall back to the
@@ -2492,6 +2605,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2492
2605
// If there is a single SP update, insert it before the ret and we're done.
2493
2606
if (CombineSPBump) {
2494
2607
assert (!SVEStackSize && " Cannot combine SP bump with SVE" );
2608
+ assert (!IsSwiftCoroPartialReturn);
2495
2609
2496
2610
// When we are about to restore the CSRs, the CFA register is SP again.
2497
2611
if (EmitCFI && hasFP (MF)) {
@@ -2577,6 +2691,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2577
2691
}
2578
2692
2579
2693
if (!hasFP (MF)) {
2694
+ assert (!IsSwiftCoroPartialReturn);
2580
2695
bool RedZone = canUseRedZone (MF);
2581
2696
// If this was a redzone leaf function, we don't need to restore the
2582
2697
// stack pointer (but we may need to pop stack args for fastcc).
@@ -2607,6 +2722,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
2607
2722
NumBytes = 0 ;
2608
2723
}
2609
2724
2725
+ if (IsSwiftCoroPartialReturn)
2726
+ return ;
2727
+
2610
2728
// Restore the original stack pointer.
2611
2729
// FIXME: Rather than doing the math here, we should instead just use
2612
2730
// non-post-indexed loads for the restores if we aren't actually going to
@@ -3449,9 +3567,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
3449
3567
DebugLoc DL;
3450
3568
SmallVector<RegPairInfo, 8 > RegPairs;
3451
3569
bool NeedsWinCFI = needsWinCFI (MF);
3570
+ bool IsSwiftCoroPartialReturn = false ;
3452
3571
3453
- if (MBBI != MBB.end ())
3572
+ if (MBBI != MBB.end ()) {
3454
3573
DL = MBBI->getDebugLoc ();
3574
+ IsSwiftCoroPartialReturn = MBBI->getOpcode () == AArch64::RET_POPLESS;
3575
+ }
3576
+
3577
+ // The partial-return intrin/instr requires the swiftcoro cc
3578
+ if (IsSwiftCoroPartialReturn &&
3579
+ MF.getFunction ().getCallingConv () != CallingConv::SwiftCoro)
3580
+ report_fatal_error (" llvm.ret.popless requires swiftcorocc" );
3455
3581
3456
3582
computeCalleeSaveRegisterPairs (MF, CSI, TRI, RegPairs, hasFP (MF));
3457
3583
if (homogeneousPrologEpilog (MF, &MBB)) {
@@ -3464,6 +3590,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
3464
3590
return true ;
3465
3591
}
3466
3592
3593
+ // If doing a partial/popless return, CSR restores are from FP, so do it last.
3594
+ if (IsSwiftCoroPartialReturn) {
3595
+ auto IsFPLR = [](const RegPairInfo &c) {
3596
+ return c.Reg1 == AArch64::LR && c.Reg2 == AArch64::FP;
3597
+ };
3598
+ auto FPLRBegin = std::find_if (RegPairs.begin (), RegPairs.end (), IsFPLR);
3599
+ const RegPairInfo FPLRRPI = *FPLRBegin;
3600
+ FPLRBegin = std::remove_if (RegPairs.begin (), RegPairs.end (), IsFPLR);
3601
+ *FPLRBegin = FPLRRPI;
3602
+ }
3603
+
3467
3604
// For performance reasons restore SVE register in increasing order
3468
3605
auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
3469
3606
auto PPRBegin = std::find_if (RegPairs.begin (), RegPairs.end (), IsPPR);
@@ -4796,6 +4933,10 @@ void AArch64FrameLowering::orderFrameObjects(
4796
4933
4797
4934
const AArch64FunctionInfo &AFI = *MF.getInfo <AArch64FunctionInfo>();
4798
4935
const MachineFrameInfo &MFI = MF.getFrameInfo ();
4936
+
4937
+ if (AFI.hasPoplessEpilogue ())
4938
+ return ;
4939
+
4799
4940
std::vector<FrameObject> FrameObjects (MFI.getObjectIndexEnd ());
4800
4941
for (auto &Obj : ObjectsToAllocate) {
4801
4942
FrameObjects[Obj].IsValid = true ;
0 commit comments