@@ -331,7 +331,9 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
331
331
static bool produceCompactUnwindFrame (MachineFunction &MF);
332
332
static bool needsWinCFI (const MachineFunction &MF);
333
333
static StackOffset getSVEStackSize (const MachineFunction &MF);
334
- static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB);
334
+ static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB,
335
+ bool HasCall = false );
336
+ static bool requiresSaveVG (const MachineFunction &MF);
335
337
336
338
// / Returns true if a homogeneous prolog or epilog code can be emitted
337
339
// / for the size optimization. If possible, a frame helper call is injected.
@@ -1006,6 +1008,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
1006
1008
}
1007
1009
}
1008
1010
1011
+ static bool windowsRequiresStackProbe (const MachineFunction &MF,
1012
+ uint64_t StackSizeInBytes) {
1013
+ const AArch64Subtarget &Subtarget = MF.getSubtarget <AArch64Subtarget>();
1014
+ const AArch64FunctionInfo &MFI = *MF.getInfo <AArch64FunctionInfo>();
1015
+ // TODO: When implementing stack protectors, take that into account
1016
+ // for the probe threshold.
1017
+ return Subtarget.isTargetWindows () && MFI.hasStackProbing () &&
1018
+ StackSizeInBytes >= uint64_t (MFI.getStackProbeSize ());
1019
+ }
1020
+
1009
1021
static void getLiveRegsForEntryMBB (LivePhysRegs &LiveRegs,
1010
1022
const MachineBasicBlock &MBB) {
1011
1023
const MachineFunction *MF = MBB.getParent ();
@@ -1027,7 +1039,8 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
1027
1039
// but we would then have to make sure that we were in fact saving at least one
1028
1040
// callee-save register in the prologue, which is additional complexity that
1029
1041
// doesn't seem worth the benefit.
1030
- static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB) {
1042
+ static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB,
1043
+ bool HasCall) {
1031
1044
MachineFunction *MF = MBB->getParent ();
1032
1045
1033
1046
// If MBB is an entry block, use X9 as the scratch register
@@ -1041,6 +1054,11 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
1041
1054
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo ();
1042
1055
LivePhysRegs LiveRegs (TRI);
1043
1056
getLiveRegsForEntryMBB (LiveRegs, *MBB);
1057
+ if (HasCall) {
1058
+ LiveRegs.addReg (AArch64::X16);
1059
+ LiveRegs.addReg (AArch64::X17);
1060
+ LiveRegs.addReg (AArch64::X18);
1061
+ }
1044
1062
1045
1063
// Prefer X9 since it was historically used for the prologue scratch reg.
1046
1064
const MachineRegisterInfo &MRI = MF->getRegInfo ();
@@ -1081,23 +1099,18 @@ bool AArch64FrameLowering::canUseAsPrologue(
1081
1099
MBB.isLiveIn (AArch64::NZCV))
1082
1100
return false ;
1083
1101
1084
- // Don't need a scratch register if we're not going to re-align the stack or
1085
- // emit stack probes.
1086
- if (!RegInfo->hasStackRealignment (*MF) && !TLI->hasInlineStackProbe (*MF))
1087
- return true ;
1088
- // Otherwise, we can use any block as long as it has a scratch register
1089
- // available.
1090
- return findScratchNonCalleeSaveRegister (TmpMBB) != AArch64::NoRegister;
1091
- }
1102
+ if (RegInfo->hasStackRealignment (*MF) || TLI->hasInlineStackProbe (*MF))
1103
+ if (findScratchNonCalleeSaveRegister (TmpMBB) == AArch64::NoRegister)
1104
+ return false ;
1092
1105
1093
- static bool windowsRequiresStackProbe (MachineFunction &MF,
1094
- uint64_t StackSizeInBytes) {
1095
- const AArch64Subtarget &Subtarget = MF. getSubtarget <AArch64Subtarget>();
1096
- const AArch64FunctionInfo &MFI = *MF. getInfo <AArch64FunctionInfo>();
1097
- // TODO: When implementing stack protectors, take that into account
1098
- // for the probe threshold.
1099
- return Subtarget. isTargetWindows () && MFI. hasStackProbing () &&
1100
- StackSizeInBytes >= uint64_t (MFI. getStackProbeSize ()) ;
1106
+ // May need a scratch register (for return value) if require making a special
1107
+ // call
1108
+ if ( requiresSaveVG (*MF) ||
1109
+ windowsRequiresStackProbe ( *MF, std::numeric_limits< uint64_t >:: max ()))
1110
+ if ( findScratchNonCalleeSaveRegister (TmpMBB, true ) == AArch64::NoRegister)
1111
+ return false ;
1112
+
1113
+ return true ;
1101
1114
}
1102
1115
1103
1116
static bool needsWinCFI (const MachineFunction &MF) {
@@ -1378,8 +1391,8 @@ bool requiresGetVGCall(MachineFunction &MF) {
1378
1391
!MF.getSubtarget <AArch64Subtarget>().hasSVE ();
1379
1392
}
1380
1393
1381
- static bool requiresSaveVG (MachineFunction &MF) {
1382
- AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1394
+ static bool requiresSaveVG (const MachineFunction &MF) {
1395
+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1383
1396
// For Darwin platforms we don't save VG for non-SVE functions, even if SME
1384
1397
// is enabled with streaming mode changes.
1385
1398
if (!AFI->hasStreamingModeChanges ())
@@ -2049,6 +2062,29 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
2049
2062
if (AFI->getSVECalleeSavedStackSize ())
2050
2063
report_fatal_error (
2051
2064
" SVE callee saves not yet supported with stack probing" );
2065
+
2066
+ // Find an available register to spill the value of X15 to, if X15 is being
2067
+ // used already for nest.
2068
+ unsigned X15Scratch = AArch64::NoRegister;
2069
+ const AArch64Subtarget &STI = MF.getSubtarget <AArch64Subtarget>();
2070
+ if (llvm::any_of (MBB.liveins (),
2071
+ [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
2072
+ return STI.getRegisterInfo ()->isSuperOrSubRegisterEq (
2073
+ AArch64::X15, LiveIn.PhysReg );
2074
+ })) {
2075
+ X15Scratch = findScratchNonCalleeSaveRegister (&MBB, true );
2076
+ assert (X15Scratch != AArch64::NoRegister &&
2077
+ (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
2078
+ #ifndef NDEBUG
2079
+ LiveRegs.removeReg (AArch64::X15); // ignore X15 since we restore it
2080
+ #endif
2081
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::ORRXrr), X15Scratch)
2082
+ .addReg (AArch64::XZR)
2083
+ .addReg (AArch64::X15, RegState::Undef)
2084
+ .addReg (AArch64::X15, RegState::Implicit)
2085
+ .setMIFlag (MachineInstr::FrameSetup);
2086
+ }
2087
+
2052
2088
uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4 ;
2053
2089
if (NeedsWinCFI) {
2054
2090
HasWinCFI = true ;
@@ -2171,6 +2207,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
2171
2207
// we've set a frame pointer and already finished the SEH prologue.
2172
2208
assert (!NeedsWinCFI);
2173
2209
}
2210
+ if (X15Scratch != AArch64::NoRegister) {
2211
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::ORRXrr), AArch64::X15)
2212
+ .addReg (AArch64::XZR)
2213
+ .addReg (X15Scratch, RegState::Undef)
2214
+ .addReg (X15Scratch, RegState::Implicit)
2215
+ .setMIFlag (MachineInstr::FrameSetup);
2216
+ }
2174
2217
}
2175
2218
2176
2219
StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
@@ -3355,7 +3398,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
3355
3398
unsigned X0Scratch = AArch64::NoRegister;
3356
3399
if (Reg1 == AArch64::VG) {
3357
3400
// Find an available register to store value of VG to.
3358
- Reg1 = findScratchNonCalleeSaveRegister (&MBB);
3401
+ Reg1 = findScratchNonCalleeSaveRegister (&MBB, true );
3359
3402
assert (Reg1 != AArch64::NoRegister);
3360
3403
SMEAttrs Attrs = AFI->getSMEFnAttrs ();
3361
3404
0 commit comments