Skip to content

[AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext. #73332

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,8 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
Register CtxReg = MBBI->getOperand(0).getReg();
Register BaseReg = MBBI->getOperand(1).getReg();
int Offset = MBBI->getOperand(2).getImm();
Register ScratchReg1 = MBBI->getOperand(3).getReg();
Register ScratchReg2 = MBBI->getOperand(4).getReg();
DebugLoc DL(MBBI->getDebugLoc());
auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();

Expand All @@ -872,35 +874,35 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(

// We need to sign the context in an address-discriminated way. 0xc31a is a
// fixed random value, chosen as part of the ABI.
// add x16, xBase, #Offset
// movk x16, #0xc31a, lsl #48
// mov x17, x22/xzr
// pacdb x17, x16
// str x17, [xBase, #Offset]
// add ScratchReg1, xBase, #Offset
// movk ScratchReg1, #0xc31a, lsl #48
// mov ScratchReg2, x22/xzr
// pacdb ScratchReg2, ScratchReg1
// str ScratchReg2, [xBase, #Offset]
unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
BuildMI(MBB, MBBI, DL, TII->get(Opc), ScratchReg1)
.addUse(BaseReg)
.addImm(abs(Offset))
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
.addUse(AArch64::X16)
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), ScratchReg1)
.addUse(ScratchReg1)
.addImm(0xc31a)
.addImm(48)
.setMIFlag(MachineInstr::FrameSetup);
// We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
// move it somewhere before signing.
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), ScratchReg2)
.addUse(AArch64::XZR)
.addUse(CtxReg)
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
.addUse(AArch64::X17)
.addUse(AArch64::X16)
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), ScratchReg2)
.addUse(ScratchReg2)
.addUse(ScratchReg1)
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
.addUse(AArch64::X17)
.addUse(ScratchReg2)
.addUse(BaseReg)
.addImm(Offset / 8)
.setMIFlag(MachineInstr::FrameSetup);
Expand Down
30 changes: 27 additions & 3 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
unsigned FirstScratchReg = 0);

/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
Expand Down Expand Up @@ -870,17 +871,24 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
// but we would then have to make sure that we were in fact saving at least one
// callee-save register in the prologue, which is additional complexity that
// doesn't seem worth the benefit.
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
//
// If \p FirstScratchReg is not 0, it specifies the register that was chosen as
// first scratch register and indicates that it should return another scratch
// register, if possible.
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
unsigned FirstScratchReg) {
MachineFunction *MF = MBB->getParent();

// If MBB is an entry block, use X9 as the scratch register
if (&MF->front() == MBB)
if (&MF->front() == MBB && !FirstScratchReg)
return AArch64::X9;

const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
LivePhysRegs LiveRegs(TRI);
LiveRegs.addLiveIns(*MBB);
if (FirstScratchReg)
LiveRegs.addReg(FirstScratchReg);

// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
Expand All @@ -905,6 +913,17 @@ bool AArch64FrameLowering::canUseAsPrologue(
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();

if (AFI->hasSwiftAsyncContext()) {
// Expanding StoreSwiftAsyncContext requires 2 scratch registers.
unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(TmpMBB);
unsigned SecondScratchReg =
findScratchNonCalleeSaveRegister(TmpMBB, FirstScratchReg);
if (FirstScratchReg == AArch64::NoRegister ||
SecondScratchReg == AArch64::NoRegister)
return false;
}

// Don't need a scratch register if we're not going to re-align the stack.
if (!RegInfo->hasStackRealignment(*MF))
Expand Down Expand Up @@ -1681,11 +1700,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
if (HaveInitialContext)
MBB.addLiveIn(AArch64::X22);
unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(&MBB);
unsigned SecondScratchReg =
findScratchNonCalleeSaveRegister(&MBB, FirstScratchReg);
Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
.addUse(Reg)
.addUse(AArch64::SP)
.addImm(FPOffset - 8)
.addDef(FirstScratchReg, RegState::Implicit)
.addDef(SecondScratchReg, RegState::Implicit)
.setMIFlags(MachineInstr::FrameSetup);
if (NeedsWinCFI) {
// WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -9163,9 +9163,10 @@ def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
//-----------------------------------------------------------------------------

// This gets lowered into an instruction sequence of 20 bytes
let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
let mayStore = 1, isCodeGenOnly = 1, Size = 20 in
def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset,
GPR64:$scratch1, GPR64sp:$scratch2),
[]>, Sched<[]>;

def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
Expand Down
Loading