@@ -550,6 +550,7 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
550
550
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
551
551
MachineFunction &MF = *MBB.getParent ();
552
552
MachineFrameInfo &MFI = MF.getFrameInfo ();
553
+ AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
553
554
554
555
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo ();
555
556
if (CSI.empty ())
@@ -561,14 +562,20 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
561
562
DebugLoc DL = MBB.findDebugLoc (MBBI);
562
563
563
564
for (const auto &Info : CSI) {
564
- if (MFI.getStackID (Info.getFrameIdx ()) == TargetStackID::ScalableVector)
565
+ unsigned FrameIdx = Info.getFrameIdx ();
566
+ if (MFI.getStackID (FrameIdx) == TargetStackID::ScalableVector)
565
567
continue ;
566
568
567
569
assert (!Info.isSpilledToReg () && " Spilling to registers not implemented" );
568
570
unsigned DwarfReg = TRI.getDwarfRegNum (Info.getReg (), true );
571
+ int64_t Offset = MFI.getObjectOffset (FrameIdx) - getOffsetOfLocalArea ();
572
+
573
+ // Locally streaming functions save two values for VG, but we should only
574
+ // emit the location of the non-streaming value here.
575
+ if (DwarfReg == TRI.getDwarfRegNum (AArch64::VG, true ) &&
576
+ FrameIdx == AFI->getStreamingVGIdx ())
577
+ continue ;
569
578
570
- int64_t Offset =
571
- MFI.getObjectOffset (Info.getFrameIdx ()) - getOffsetOfLocalArea ();
572
579
unsigned CFIIndex = MF.addFrameInst (
573
580
MCCFIInstruction::createOffset (nullptr , DwarfReg, Offset));
574
581
BuildMI (MBB, MBBI, DL, TII.get (TargetOpcode::CFI_INSTRUCTION))
@@ -1348,6 +1355,20 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
1348
1355
int CFAOffset = 0 ) {
1349
1356
unsigned NewOpc;
1350
1357
1358
+ // If the function contains streaming mode changes, we expect instructions
1359
+ // to calculate the value of VG before spilling. For locally-streaming
1360
+ // functions, we need to do this for both the streaming and non-streaming
1361
+ // vector length. Move past these instructions if necessary.
1362
+ unsigned Opc = MBBI->getOpcode ();
1363
+ if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI) {
1364
+ AArch64FunctionInfo AFI = *MBB.getParent ()->getInfo <AArch64FunctionInfo>();
1365
+ assert (AFI.hasStreamingModeChanges () &&
1366
+ " Unexpected callee-save save/restore opcode!" );
1367
+ ++MBBI;
1368
+ if (MBBI->getOpcode () == AArch64::UBFMXri)
1369
+ ++MBBI;
1370
+ }
1371
+
1351
1372
switch (MBBI->getOpcode ()) {
1352
1373
default :
1353
1374
llvm_unreachable (" Unexpected callee-save save/restore opcode!" );
@@ -1655,13 +1676,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
1655
1676
LiveRegs.removeReg (AArch64::LR);
1656
1677
}
1657
1678
1658
- // If the function contains streaming mode changes, we expect the first
1659
- // instruction of MBB to be a CNTD. Move past this instruction if found.
1660
- if (AFI->hasStreamingModeChanges () && F.needsUnwindTableEntry ()) {
1661
- assert (MBBI->getOpcode () == AArch64::CNTD_XPiI && " Unexpected instruction" );
1662
- MBBI = std::next (MBBI);
1663
- }
1664
-
1665
1679
auto VerifyClobberOnExit = make_scope_exit ([&]() {
1666
1680
if (NonFrameStart == MBB.end ())
1667
1681
return ;
@@ -1846,6 +1860,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
1846
1860
// pointer bump above.
1847
1861
while (MBBI != End && MBBI->getFlag (MachineInstr::FrameSetup) &&
1848
1862
!IsSVECalleeSave (MBBI)) {
1863
+ unsigned Opc = MBBI->getOpcode ();
1864
+ // Move past instructions generated to calculate VG
1865
+ if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1866
+ Opc == AArch64::UBFMXri) {
1867
+ assert (AFI->hasStreamingModeChanges () && " Unexpected opcode!" );
1868
+ ++MBBI;
1869
+ }
1849
1870
if (CombineSPBump)
1850
1871
fixupCalleeSaveRestoreStackOffset (*MBBI, AFI->getLocalStackSize (),
1851
1872
NeedsWinCFI, &HasWinCFI);
@@ -2999,6 +3020,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
2999
3020
bool NeedsWinCFI = needsWinCFI (MF);
3000
3021
DebugLoc DL;
3001
3022
SmallVector<RegPairInfo, 8 > RegPairs;
3023
+ bool SpilledStreamingVG = false ;
3024
+ MachineFrameInfo &MFI = MF.getFrameInfo ();
3002
3025
3003
3026
computeCalleeSaveRegisterPairs (MF, CSI, TRI, RegPairs, hasFP (MF));
3004
3027
@@ -3073,10 +3096,30 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
3073
3096
// Find an available register to store value of VG to.
3074
3097
Reg1 = findScratchNonCalleeSaveRegister (&MBB);
3075
3098
assert (Reg1 != AArch64::NoRegister);
3099
+ SMEAttrs Attrs (MF.getFunction ());
3100
+
3101
+ if (Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface () &&
3102
+ !SpilledStreamingVG) {
3103
+ // For locally-streaming functions, we need to store both the streaming
3104
+ // & non-streaming VG. Spill the streaming value first.
3105
+ BuildMI (MBB, MI, DL, TII.get (AArch64::RDSVLI_XI), Reg1)
3106
+ .addImm (1 )
3107
+ .setMIFlag (MachineInstr::FrameSetup);
3108
+ BuildMI (MBB, MI, DL, TII.get (AArch64::UBFMXri), Reg1)
3109
+ .addReg (Reg1)
3110
+ .addImm (3 )
3111
+ .addImm (63 )
3112
+ .setMIFlag (MachineInstr::FrameSetup);
3076
3113
3077
- BuildMI (MBB, MBB.begin (), DL, TII.get (AArch64::CNTD_XPiI), Reg1)
3078
- .addImm (31 )
3079
- .addImm (1 );
3114
+ AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
3115
+ AFI->setStreamingVGIdx (RPI.FrameIdx );
3116
+ SpilledStreamingVG = true ;
3117
+ } else {
3118
+ BuildMI (MBB, MI, DL, TII.get (AArch64::CNTD_XPiI), Reg1)
3119
+ .addImm (31 )
3120
+ .addImm (1 )
3121
+ .setMIFlag (MachineInstr::FrameSetup);
3122
+ }
3080
3123
}
3081
3124
3082
3125
LLVM_DEBUG (dbgs () << " CSR spill: (" << printReg (Reg1, TRI);
@@ -3122,7 +3165,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
3122
3165
MachineFrameInfo &MFI = MF.getFrameInfo ();
3123
3166
if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
3124
3167
MFI.setStackID (RPI.FrameIdx , TargetStackID::ScalableVector);
3125
-
3126
3168
}
3127
3169
return true ;
3128
3170
}
@@ -3348,9 +3390,16 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
3348
3390
3349
3391
// Increase the callee-saved stack size if the function has streaming mode
3350
3392
// changes, as we will need to spill the value of the VG register.
3393
+ // For locally streaming functions, we spill both the streaming and
3394
+ // non-streaming VG value.
3351
3395
const Function &F = MF.getFunction ();
3352
- if (AFI->hasStreamingModeChanges () && F.needsUnwindTableEntry ())
3353
- CSStackSize += 8 ;
3396
+ SMEAttrs Attrs (F);
3397
+ if (AFI->hasStreamingModeChanges () && F.needsUnwindTableEntry ()) {
3398
+ if (Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface ())
3399
+ CSStackSize += 16 ;
3400
+ else
3401
+ CSStackSize += 8 ;
3402
+ }
3354
3403
3355
3404
// Save number of saved regs, so we can easily update CSStackSize later.
3356
3405
unsigned NumSavedRegs = SavedRegs.count ();
@@ -3491,19 +3540,29 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
3491
3540
// Insert VG into the list of CSRs, immediately before LR if saved.
3492
3541
const Function &F = MF.getFunction ();
3493
3542
if (AFI->hasStreamingModeChanges () && F.needsUnwindTableEntry ()) {
3543
+ std::vector<CalleeSavedInfo> VGSaves;
3544
+ SMEAttrs Attrs (MF.getFunction ());
3545
+
3494
3546
auto VGInfo = CalleeSavedInfo (AArch64::VG);
3495
3547
VGInfo.setRestored (false );
3548
+ VGSaves.push_back (VGInfo);
3549
+
3550
+ // Add VG again if the function is locally-streaming, as we will spill two
3551
+ // values.
3552
+ if (Attrs.hasStreamingBody () && !Attrs.hasStreamingInterface ())
3553
+ VGSaves.push_back (VGInfo);
3554
+
3496
3555
bool InsertBeforeLR = false ;
3497
3556
3498
3557
for (unsigned I = 0 ; I < CSI.size (); I++)
3499
3558
if (CSI[I].getReg () == AArch64::LR) {
3500
3559
InsertBeforeLR = true ;
3501
- CSI.insert (CSI.begin () + I, VGInfo );
3560
+ CSI.insert (CSI.begin () + I, VGSaves. begin (), VGSaves. end () );
3502
3561
break ;
3503
3562
}
3504
3563
3505
3564
if (!InsertBeforeLR)
3506
- CSI.push_back (VGInfo );
3565
+ CSI.insert (CSI. end (), VGSaves. begin (), VGSaves. end () );
3507
3566
}
3508
3567
3509
3568
for (auto &CS : CSI) {
0 commit comments