Skip to content

Commit fe623c3

Browse files
committed
[AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext.
Add 2 additional arguments to StoreSwiftAsyncContext to specify the scratch registers to used, instead of hard-coding them. This fixes a miscompile where StoreSwiftAsyncContext can clobber live registers after it got moved during shrink-wrapping.
1 parent 4a2db23 commit fe623c3

File tree

6 files changed

+521
-42
lines changed

6 files changed

+521
-42
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,8 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
857857
Register CtxReg = MBBI->getOperand(0).getReg();
858858
Register BaseReg = MBBI->getOperand(1).getReg();
859859
int Offset = MBBI->getOperand(2).getImm();
860+
Register ScratchReg1 = MBBI->getOperand(3).getReg();
861+
Register ScratchReg2 = MBBI->getOperand(4).getReg();
860862
DebugLoc DL(MBBI->getDebugLoc());
861863
auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
862864

@@ -872,35 +874,35 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
872874

873875
// We need to sign the context in an address-discriminated way. 0xc31a is a
874876
// fixed random value, chosen as part of the ABI.
875-
// add x16, xBase, #Offset
876-
// movk x16, #0xc31a, lsl #48
877-
// mov x17, x22/xzr
878-
// pacdb x17, x16
879-
// str x17, [xBase, #Offset]
877+
// add ScratchReg1, xBase, #Offset
878+
// movk ScratchReg1, #0xc31a, lsl #48
879+
// mov ScratchReg2, x22/xzr
880+
// pacdb ScratchReg2, ScratchReg1
881+
// str ScratchReg2, [xBase, #Offset]
880882
unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
881-
BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
883+
BuildMI(MBB, MBBI, DL, TII->get(Opc), ScratchReg1)
882884
.addUse(BaseReg)
883885
.addImm(abs(Offset))
884886
.addImm(0)
885887
.setMIFlag(MachineInstr::FrameSetup);
886-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
887-
.addUse(AArch64::X16)
888+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), ScratchReg1)
889+
.addUse(ScratchReg1)
888890
.addImm(0xc31a)
889891
.addImm(48)
890892
.setMIFlag(MachineInstr::FrameSetup);
891893
// We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
892894
// move it somewhere before signing.
893-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
895+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), ScratchReg2)
894896
.addUse(AArch64::XZR)
895897
.addUse(CtxReg)
896898
.addImm(0)
897899
.setMIFlag(MachineInstr::FrameSetup);
898-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
899-
.addUse(AArch64::X17)
900-
.addUse(AArch64::X16)
900+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), ScratchReg2)
901+
.addUse(ScratchReg2)
902+
.addUse(ScratchReg1)
901903
.setMIFlag(MachineInstr::FrameSetup);
902904
BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
903-
.addUse(AArch64::X17)
905+
.addUse(ScratchReg2)
904906
.addUse(BaseReg)
905907
.addImm(Offset / 8)
906908
.setMIFlag(MachineInstr::FrameSetup);

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
296296
static bool produceCompactUnwindFrame(MachineFunction &MF);
297297
static bool needsWinCFI(const MachineFunction &MF);
298298
static StackOffset getSVEStackSize(const MachineFunction &MF);
299-
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
299+
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
300+
unsigned FirstScratchReg = 0);
300301

301302
/// Returns true if a homogeneous prolog or epilog code can be emitted
302303
/// for the size optimization. If possible, a frame helper call is injected.
@@ -870,17 +871,24 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
870871
// but we would then have to make sure that we were in fact saving at least one
871872
// callee-save register in the prologue, which is additional complexity that
872873
// doesn't seem worth the benefit.
873-
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
874+
//
875+
// If \p FirstScratchReg is not 0, it specifies the register that was chosen as
876+
// first scratch register and indicates that it should return another scratch
877+
// register, if possible.
878+
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
879+
unsigned FirstScratchReg) {
874880
MachineFunction *MF = MBB->getParent();
875881

876882
// If MBB is an entry block, use X9 as the scratch register
877-
if (&MF->front() == MBB)
883+
if (&MF->front() == MBB && !FirstScratchReg)
878884
return AArch64::X9;
879885

880886
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
881887
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
882888
LivePhysRegs LiveRegs(TRI);
883889
LiveRegs.addLiveIns(*MBB);
890+
if (FirstScratchReg)
891+
LiveRegs.addReg(FirstScratchReg);
884892

885893
// Mark callee saved registers as used so we will not choose them.
886894
const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
@@ -905,6 +913,17 @@ bool AArch64FrameLowering::canUseAsPrologue(
905913
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
906914
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
907915
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
916+
const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
917+
918+
if (AFI->hasSwiftAsyncContext()) {
919+
// Expanding StoreSwiftAsyncContext requires 2 scratch registers.
920+
unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(TmpMBB);
921+
unsigned SecondScratchReg =
922+
findScratchNonCalleeSaveRegister(TmpMBB, FirstScratchReg);
923+
if (FirstScratchReg == AArch64::NoRegister ||
924+
SecondScratchReg == AArch64::NoRegister)
925+
return false;
926+
}
908927

909928
// Don't need a scratch register if we're not going to re-align the stack.
910929
if (!RegInfo->hasStackRealignment(*MF))
@@ -1681,11 +1700,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
16811700
bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
16821701
if (HaveInitialContext)
16831702
MBB.addLiveIn(AArch64::X22);
1703+
unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(&MBB);
1704+
unsigned SecondScratchReg =
1705+
findScratchNonCalleeSaveRegister(&MBB, FirstScratchReg);
16841706
Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
16851707
BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
16861708
.addUse(Reg)
16871709
.addUse(AArch64::SP)
16881710
.addImm(FPOffset - 8)
1711+
.addDef(FirstScratchReg, RegState::Implicit)
1712+
.addDef(SecondScratchReg, RegState::Implicit)
16891713
.setMIFlags(MachineInstr::FrameSetup);
16901714
if (NeedsWinCFI) {
16911715
// WinCFI and arm64e, where StoreSwiftAsyncContext is expanded

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9163,9 +9163,10 @@ def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
91639163
//-----------------------------------------------------------------------------
91649164

91659165
// This gets lowered into an instruction sequence of 20 bytes
9166-
let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
9166+
let mayStore = 1, isCodeGenOnly = 1, Size = 20 in
91679167
def StoreSwiftAsyncContext
9168-
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
9168+
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset,
9169+
GPR64:$scratch1, GPR64sp:$scratch2),
91699170
[]>, Sched<[]>;
91709171

91719172
def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;

0 commit comments

Comments
 (0)