Skip to content

Commit 93c8e0f

Browse files
[AArch64][SME] Save VG for unwind info when changing streaming-mode (llvm#83301)
If a function requires any streaming-mode change, the vector granule value must be stored to the stack and unwind info must also describe the save of VG to this location. This patch adds VG to the list of callee-saved registers and increases the callee-saved stack size if the function requires streaming-mode changes. A new type is added to RegPairInfo, which is also used to skip restoring the register used to spill the VG value in the epilogue. See https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst
1 parent 46080ab commit 93c8e0f

18 files changed

+2167
-452
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 236 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
321321
return false;
322322

323323
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
324-
if (AFI->hasSwiftAsyncContext())
324+
if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
325325
return false;
326326

327327
// If there are an odd number of GPRs before LR and FP in the CSRs list,
@@ -558,6 +558,10 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
558558
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
559559
MachineFunction &MF = *MBB.getParent();
560560
MachineFrameInfo &MFI = MF.getFrameInfo();
561+
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
562+
SMEAttrs Attrs(MF.getFunction());
563+
bool LocallyStreaming =
564+
Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
561565

562566
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
563567
if (CSI.empty())
@@ -569,14 +573,22 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations(
569573
DebugLoc DL = MBB.findDebugLoc(MBBI);
570574

571575
for (const auto &Info : CSI) {
572-
if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)
576+
unsigned FrameIdx = Info.getFrameIdx();
577+
if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
573578
continue;
574579

575580
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
576-
unsigned DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);
581+
int64_t DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);
582+
int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();
583+
584+
// The location of VG will be emitted before each streaming-mode change in
585+
// the function. Only locally-streaming functions require emitting the
586+
// non-streaming VG location here.
587+
if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) ||
588+
(!LocallyStreaming &&
589+
DwarfReg == TRI.getDwarfRegNum(AArch64::VG, true)))
590+
continue;
577591

578-
int64_t Offset =
579-
MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
580592
unsigned CFIIndex = MF.addFrameInst(
581593
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
582594
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -699,6 +711,9 @@ static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
699711
!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
700712
continue;
701713

714+
if (!Info.isRestored())
715+
continue;
716+
702717
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
703718
nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
704719
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1342,6 +1357,32 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
13421357
ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
13431358
}
13441359

1360+
bool requiresGetVGCall(MachineFunction &MF) {
1361+
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1362+
return AFI->hasStreamingModeChanges() &&
1363+
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
1364+
}
1365+
1366+
bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
1367+
unsigned Opc = MBBI->getOpcode();
1368+
if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1369+
Opc == AArch64::UBFMXri)
1370+
return true;
1371+
1372+
if (requiresGetVGCall(*MBBI->getMF())) {
1373+
if (Opc == AArch64::ORRXrr)
1374+
return true;
1375+
1376+
if (Opc == AArch64::BL) {
1377+
auto Op1 = MBBI->getOperand(0);
1378+
return Op1.isSymbol() &&
1379+
(StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");
1380+
}
1381+
}
1382+
1383+
return false;
1384+
}
1385+
13451386
// Convert callee-save register save/restore instruction to do stack pointer
13461387
// decrement/increment to allocate/deallocate the callee-save stack area by
13471388
// converting store/load to use pre/post increment version.
@@ -1352,6 +1393,17 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
13521393
MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
13531394
int CFAOffset = 0) {
13541395
unsigned NewOpc;
1396+
1397+
// If the function contains streaming mode changes, we expect instructions
1398+
// to calculate the value of VG before spilling. For locally-streaming
1399+
// functions, we need to do this for both the streaming and non-streaming
1400+
// vector length. Move past these instructions if necessary.
1401+
MachineFunction &MF = *MBB.getParent();
1402+
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1403+
if (AFI->hasStreamingModeChanges())
1404+
while (isVGInstruction(MBBI))
1405+
++MBBI;
1406+
13551407
switch (MBBI->getOpcode()) {
13561408
default:
13571409
llvm_unreachable("Unexpected callee-save save/restore opcode!");
@@ -1408,7 +1460,6 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
14081460

14091461
// If the first store isn't right where we want SP then we can't fold the
14101462
// update in so create a normal arithmetic instruction instead.
1411-
MachineFunction &MF = *MBB.getParent();
14121463
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
14131464
CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
14141465
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
@@ -1660,6 +1711,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
16601711
LiveRegs.removeReg(AArch64::X19);
16611712
LiveRegs.removeReg(AArch64::FP);
16621713
LiveRegs.removeReg(AArch64::LR);
1714+
1715+
// X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
1716+
// This is necessary to spill VG if required where SVE is unavailable, but
1717+
// X0 is preserved around this call.
1718+
if (requiresGetVGCall(MF))
1719+
LiveRegs.removeReg(AArch64::X0);
16631720
}
16641721

16651722
auto VerifyClobberOnExit = make_scope_exit([&]() {
@@ -1846,6 +1903,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
18461903
// pointer bump above.
18471904
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
18481905
!IsSVECalleeSave(MBBI)) {
1906+
// Move past instructions generated to calculate VG
1907+
if (AFI->hasStreamingModeChanges())
1908+
while (isVGInstruction(MBBI))
1909+
++MBBI;
1910+
18491911
if (CombineSPBump)
18501912
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
18511913
NeedsWinCFI, &HasWinCFI);
@@ -2768,7 +2830,7 @@ struct RegPairInfo {
27682830
unsigned Reg2 = AArch64::NoRegister;
27692831
int FrameIdx;
27702832
int Offset;
2771-
enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
2833+
enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;
27722834

27732835
RegPairInfo() = default;
27742836

@@ -2780,6 +2842,7 @@ struct RegPairInfo {
27802842
return 2;
27812843
case GPR:
27822844
case FPR64:
2845+
case VG:
27832846
return 8;
27842847
case ZPR:
27852848
case FPR128:
@@ -2855,6 +2918,8 @@ static void computeCalleeSaveRegisterPairs(
28552918
RPI.Type = RegPairInfo::ZPR;
28562919
else if (AArch64::PPRRegClass.contains(RPI.Reg1))
28572920
RPI.Type = RegPairInfo::PPR;
2921+
else if (RPI.Reg1 == AArch64::VG)
2922+
RPI.Type = RegPairInfo::VG;
28582923
else
28592924
llvm_unreachable("Unsupported register class.");
28602925

@@ -2887,6 +2952,8 @@ static void computeCalleeSaveRegisterPairs(
28872952
if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1))
28882953
RPI.Reg2 = NextReg;
28892954
break;
2955+
case RegPairInfo::VG:
2956+
break;
28902957
}
28912958
}
28922959

@@ -3003,6 +3070,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
30033070
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
30043071
MachineFunction &MF = *MBB.getParent();
30053072
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
3073+
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
30063074
bool NeedsWinCFI = needsWinCFI(MF);
30073075
DebugLoc DL;
30083076
SmallVector<RegPairInfo, 8> RegPairs;
@@ -3070,7 +3138,70 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
30703138
Size = 2;
30713139
Alignment = Align(2);
30723140
break;
3141+
case RegPairInfo::VG:
3142+
StrOpc = AArch64::STRXui;
3143+
Size = 8;
3144+
Alignment = Align(8);
3145+
break;
30733146
}
3147+
3148+
unsigned X0Scratch = AArch64::NoRegister;
3149+
if (Reg1 == AArch64::VG) {
3150+
// Find an available register to store value of VG to.
3151+
Reg1 = findScratchNonCalleeSaveRegister(&MBB);
3152+
assert(Reg1 != AArch64::NoRegister);
3153+
SMEAttrs Attrs(MF.getFunction());
3154+
3155+
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
3156+
AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) {
3157+
// For locally-streaming functions, we need to store both the streaming
3158+
// & non-streaming VG. Spill the streaming value first.
3159+
BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1)
3160+
.addImm(1)
3161+
.setMIFlag(MachineInstr::FrameSetup);
3162+
BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1)
3163+
.addReg(Reg1)
3164+
.addImm(3)
3165+
.addImm(63)
3166+
.setMIFlag(MachineInstr::FrameSetup);
3167+
3168+
AFI->setStreamingVGIdx(RPI.FrameIdx);
3169+
} else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
3170+
BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1)
3171+
.addImm(31)
3172+
.addImm(1)
3173+
.setMIFlag(MachineInstr::FrameSetup);
3174+
AFI->setVGIdx(RPI.FrameIdx);
3175+
} else {
3176+
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
3177+
if (llvm::any_of(
3178+
MBB.liveins(),
3179+
[&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
3180+
return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3181+
AArch64::X0, LiveIn.PhysReg);
3182+
}))
3183+
X0Scratch = Reg1;
3184+
3185+
if (X0Scratch != AArch64::NoRegister)
3186+
BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1)
3187+
.addReg(AArch64::XZR)
3188+
.addReg(AArch64::X0, RegState::Undef)
3189+
.addReg(AArch64::X0, RegState::Implicit)
3190+
.setMIFlag(MachineInstr::FrameSetup);
3191+
3192+
const uint32_t *RegMask = TRI->getCallPreservedMask(
3193+
MF,
3194+
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1);
3195+
BuildMI(MBB, MI, DL, TII.get(AArch64::BL))
3196+
.addExternalSymbol("__arm_get_current_vg")
3197+
.addRegMask(RegMask)
3198+
.addReg(AArch64::X0, RegState::ImplicitDefine)
3199+
.setMIFlag(MachineInstr::FrameSetup);
3200+
Reg1 = AArch64::X0;
3201+
AFI->setVGIdx(RPI.FrameIdx);
3202+
}
3203+
}
3204+
30743205
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
30753206
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
30763207
dbgs() << ") -> fi#(" << RPI.FrameIdx;
@@ -3162,6 +3293,13 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
31623293
if (RPI.isPaired())
31633294
MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
31643295
}
3296+
3297+
if (X0Scratch != AArch64::NoRegister)
3298+
BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0)
3299+
.addReg(AArch64::XZR)
3300+
.addReg(X0Scratch, RegState::Undef)
3301+
.addReg(X0Scratch, RegState::Implicit)
3302+
.setMIFlag(MachineInstr::FrameSetup);
31653303
}
31663304
return true;
31673305
}
@@ -3241,6 +3379,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
32413379
Size = 2;
32423380
Alignment = Align(2);
32433381
break;
3382+
case RegPairInfo::VG:
3383+
continue;
32443384
}
32453385
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
32463386
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
@@ -3440,6 +3580,19 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
34403580
CSStackSize += RegSize;
34413581
}
34423582

3583+
// Increase the callee-saved stack size if the function has streaming mode
3584+
// changes, as we will need to spill the value of the VG register.
3585+
// For locally streaming functions, we spill both the streaming and
3586+
// non-streaming VG value.
3587+
const Function &F = MF.getFunction();
3588+
SMEAttrs Attrs(F);
3589+
if (AFI->hasStreamingModeChanges()) {
3590+
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3591+
CSStackSize += 16;
3592+
else
3593+
CSStackSize += 8;
3594+
}
3595+
34433596
// Save number of saved regs, so we can easily update CSStackSize later.
34443597
unsigned NumSavedRegs = SavedRegs.count();
34453598

@@ -3576,6 +3729,33 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
35763729
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
35773730
}
35783731

3732+
// Insert VG into the list of CSRs, immediately before LR if saved.
3733+
if (AFI->hasStreamingModeChanges()) {
3734+
std::vector<CalleeSavedInfo> VGSaves;
3735+
SMEAttrs Attrs(MF.getFunction());
3736+
3737+
auto VGInfo = CalleeSavedInfo(AArch64::VG);
3738+
VGInfo.setRestored(false);
3739+
VGSaves.push_back(VGInfo);
3740+
3741+
// Add VG again if the function is locally-streaming, as we will spill two
3742+
// values.
3743+
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3744+
VGSaves.push_back(VGInfo);
3745+
3746+
bool InsertBeforeLR = false;
3747+
3748+
for (unsigned I = 0; I < CSI.size(); I++)
3749+
if (CSI[I].getReg() == AArch64::LR) {
3750+
InsertBeforeLR = true;
3751+
CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());
3752+
break;
3753+
}
3754+
3755+
if (!InsertBeforeLR)
3756+
CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
3757+
}
3758+
35793759
for (auto &CS : CSI) {
35803760
Register Reg = CS.getReg();
35813761
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
@@ -4191,12 +4371,58 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
41914371
}
41924372
} // namespace
41934373

4374+
MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
4375+
const AArch64FrameLowering *TFI) {
4376+
MachineInstr &MI = *II;
4377+
MachineBasicBlock *MBB = MI.getParent();
4378+
MachineFunction *MF = MBB->getParent();
4379+
4380+
if (MI.getOpcode() != AArch64::VGSavePseudo &&
4381+
MI.getOpcode() != AArch64::VGRestorePseudo)
4382+
return II;
4383+
4384+
SMEAttrs FuncAttrs(MF->getFunction());
4385+
bool LocallyStreaming =
4386+
FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface();
4387+
const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
4388+
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
4389+
const AArch64InstrInfo *TII =
4390+
MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
4391+
4392+
int64_t VGFrameIdx =
4393+
LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx();
4394+
assert(VGFrameIdx != std::numeric_limits<int>::max() &&
4395+
"Expected FrameIdx for VG");
4396+
4397+
unsigned CFIIndex;
4398+
if (MI.getOpcode() == AArch64::VGSavePseudo) {
4399+
const MachineFrameInfo &MFI = MF->getFrameInfo();
4400+
int64_t Offset =
4401+
MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea();
4402+
CFIIndex = MF->addFrameInst(MCCFIInstruction::createOffset(
4403+
nullptr, TRI->getDwarfRegNum(AArch64::VG, true), Offset));
4404+
} else
4405+
CFIIndex = MF->addFrameInst(MCCFIInstruction::createRestore(
4406+
nullptr, TRI->getDwarfRegNum(AArch64::VG, true)));
4407+
4408+
MachineInstr *UnwindInst = BuildMI(*MBB, II, II->getDebugLoc(),
4409+
TII->get(TargetOpcode::CFI_INSTRUCTION))
4410+
.addCFIIndex(CFIIndex);
4411+
4412+
MI.eraseFromParent();
4413+
return UnwindInst->getIterator();
4414+
}
4415+
41944416
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
41954417
MachineFunction &MF, RegScavenger *RS = nullptr) const {
4196-
if (StackTaggingMergeSetTag)
4197-
for (auto &BB : MF)
4198-
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();)
4418+
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
4419+
for (auto &BB : MF)
4420+
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
4421+
if (AFI->hasStreamingModeChanges())
4422+
II = emitVGSaveRestore(II, this);
4423+
if (StackTaggingMergeSetTag)
41994424
II = tryMergeAdjacentSTG(II, this, RS);
4425+
}
42004426
}
42014427

42024428
/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP

0 commit comments

Comments
 (0)