[AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext. #73332

fhahn · 2023-11-24T14:33:17Z

Add 2 additional arguments to StoreSwiftAsyncContext to specify the scratch registers to used, instead of hard-coding them. This fixes a miscompile where StoreSwiftAsyncContext can clobber live registers after it got moved during shrink-wrapping.

llvmbot · 2023-11-24T14:33:48Z

@llvm/pr-subscribers-backend-aarch64

Author: Florian Hahn (fhahn)

Changes

Add 2 additional arguments to StoreSwiftAsyncContext to specify the scratch registers to used, instead of hard-coding them. This fixes a miscompile where StoreSwiftAsyncContext can clobber live registers after it got moved during shrink-wrapping.

Patch is 28.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73332.diff

5 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (+15-13)
(modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+25-3)
(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+3-2)
(modified) llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll (+454-1)
(modified) llvm/test/CodeGen/AArch64/swift-async.ll (+21-21)

diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index ac26f4d4fbe66ae..be8aed5967eb26f 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -857,6 +857,8 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
   Register CtxReg = MBBI->getOperand(0).getReg();
   Register BaseReg = MBBI->getOperand(1).getReg();
   int Offset = MBBI->getOperand(2).getImm();
+  Register ScratchReg1 = MBBI->getOperand(3).getReg();
+  Register ScratchReg2 = MBBI->getOperand(4).getReg();
   DebugLoc DL(MBBI->getDebugLoc());
   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
 
@@ -872,35 +874,35 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
 
   // We need to sign the context in an address-discriminated way. 0xc31a is a
   // fixed random value, chosen as part of the ABI.
-  //     add x16, xBase, #Offset
-  //     movk x16, #0xc31a, lsl #48
-  //     mov x17, x22/xzr
-  //     pacdb x17, x16
-  //     str x17, [xBase, #Offset]
+  //     add ScratchReg1, xBase, #Offset
+  //     movk ScratchReg1, #0xc31a, lsl #48
+  //     mov ScratchReg2, x22/xzr
+  //     pacdb ScratchReg2, ScratchReg1
+  //     str ScratchReg2, [xBase, #Offset]
   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
-  BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
+  BuildMI(MBB, MBBI, DL, TII->get(Opc), ScratchReg1)
       .addUse(BaseReg)
       .addImm(abs(Offset))
       .addImm(0)
       .setMIFlag(MachineInstr::FrameSetup);
-  BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
-      .addUse(AArch64::X16)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), ScratchReg1)
+      .addUse(ScratchReg1)
       .addImm(0xc31a)
       .addImm(48)
       .setMIFlag(MachineInstr::FrameSetup);
   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
   // move it somewhere before signing.
-  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), ScratchReg2)
       .addUse(AArch64::XZR)
       .addUse(CtxReg)
       .addImm(0)
       .setMIFlag(MachineInstr::FrameSetup);
-  BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
-      .addUse(AArch64::X17)
-      .addUse(AArch64::X16)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), ScratchReg2)
+      .addUse(ScratchReg2)
+      .addUse(ScratchReg1)
       .setMIFlag(MachineInstr::FrameSetup);
   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
-      .addUse(AArch64::X17)
+      .addUse(ScratchReg2)
       .addUse(BaseReg)
       .addImm(Offset / 8)
       .setMIFlag(MachineInstr::FrameSetup);
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd47970bd050596..e1b62e774146d10 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -296,7 +296,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
 static bool produceCompactUnwindFrame(MachineFunction &MF);
 static bool needsWinCFI(const MachineFunction &MF);
 static StackOffset getSVEStackSize(const MachineFunction &MF);
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+                                                 unsigned FirstScratchReg = 0);
 
 /// Returns true if a homogeneous prolog or epilog code can be emitted
 /// for the size optimization. If possible, a frame helper call is injected.
@@ -870,17 +871,22 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
 // but we would then have to make sure that we were in fact saving at least one
 // callee-save register in the prologue, which is additional complexity that
 // doesn't seem worth the benefit.
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
+//
+// If \p FirstScratchReg is not 0, it specifies the register that was chosen as first scratch register and indicates that it should return another scratch register, if possible.
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
+                                                 unsigned FirstScratchReg) {
   MachineFunction *MF = MBB->getParent();
 
   // If MBB is an entry block, use X9 as the scratch register
-  if (&MF->front() == MBB)
+  if (&MF->front() == MBB && !FirstScratchReg)
     return AArch64::X9;
 
   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
   LivePhysRegs LiveRegs(TRI);
   LiveRegs.addLiveIns(*MBB);
+  if (FirstScratchReg)
+    LiveRegs.addReg(FirstScratchReg);
 
   // Mark callee saved registers as used so we will not choose them.
   const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
@@ -905,6 +911,17 @@ bool AArch64FrameLowering::canUseAsPrologue(
   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
+
+  if (AFI->hasSwiftAsyncContext()) {
+    // Expanding StoreSwiftAsyncContext requires 2 scratch registers.
+    unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(TmpMBB);
+    unsigned SecondScratchReg =
+        findScratchNonCalleeSaveRegister(TmpMBB, FirstScratchReg);
+    if (FirstScratchReg == AArch64::NoRegister ||
+        SecondScratchReg == AArch64::NoRegister)
+      return false;
+  }
 
   // Don't need a scratch register if we're not going to re-align the stack.
   if (!RegInfo->hasStackRealignment(*MF))
@@ -1681,11 +1698,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
       if (HaveInitialContext)
         MBB.addLiveIn(AArch64::X22);
+      unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+      unsigned SecondScratchReg =
+          findScratchNonCalleeSaveRegister(&MBB, FirstScratchReg);
       Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
           .addUse(Reg)
           .addUse(AArch64::SP)
           .addImm(FPOffset - 8)
+          .addDef(FirstScratchReg, RegState::Implicit)
+          .addDef(SecondScratchReg, RegState::Implicit)
           .setMIFlags(MachineInstr::FrameSetup);
       if (NeedsWinCFI) {
         // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0a8abfae5051dd8..75ab0ae6a41ad16 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9163,9 +9163,10 @@ def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
 //-----------------------------------------------------------------------------
 
 // This gets lowered into an instruction sequence of 20 bytes
-let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
+let mayStore = 1, isCodeGenOnly = 1, Size = 20 in
 def StoreSwiftAsyncContext
-      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
+      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset,
+                            GPR64:$scratch1, GPR64sp:$scratch2),
                []>, Sched<[]>;
 
 def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
diff --git a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
index f81531766ca13bf..a740b903e384dad 100644
--- a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
+++ b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
@@ -1,9 +1,64 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: not --crash llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s
+; RUN: llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x16_clobbered:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x9, sp, #8
+; CHECK-NEXT:    movk x9, #49946, lsl #48
+; CHECK-NEXT:    mov x1, x22
+; CHECK-NEXT:    pacdb x1, x9
+; CHECK-NEXT:    str x1, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh0:
+; CHECK-NEXT:    adrp x9, LJTI0_0@PAGE
+; CHECK-NEXT:  Lloh1:
+; CHECK-NEXT:    add x9, x9, LJTI0_0@PAGEOFF
+; CHECK-NEXT:  Ltmp0:
+; CHECK-NEXT:    adr x10, Ltmp0
+; CHECK-NEXT:    ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT:    add x10, x10, x11
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x10
+; CHECK-NEXT:  LBB0_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB0_3
+; CHECK-NEXT:  LBB0_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB0_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:    .loh AdrpAdd Lloh0, Lloh1
+; CHECK-NEXT:    .cfi_endproc
+; CHECK-NEXT:    .section __TEXT,__const
+; CHECK-NEXT:    .p2align 2, 0x0
+; CHECK-NEXT:  LJTI0_0:
+; CHECK-NEXT:    .long LBB0_3-Ltmp0
+; CHECK-NEXT:    .long LBB0_1-Ltmp0
+; CHECK-NEXT:    .long LBB0_1-Ltmp0
+; CHECK-NEXT:    .long LBB0_2-Ltmp0
 entry:
   %x16 = tail call i64 asm "", "={x16}"()
   %l = load i64, ptr %src, align 8
@@ -36,6 +91,61 @@ exit:
 }
 
 define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x17_clobbered:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x9, sp, #8
+; CHECK-NEXT:    movk x9, #49946, lsl #48
+; CHECK-NEXT:    mov x1, x22
+; CHECK-NEXT:    pacdb x1, x9
+; CHECK-NEXT:    str x1, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh2:
+; CHECK-NEXT:    adrp x9, LJTI1_0@PAGE
+; CHECK-NEXT:  Lloh3:
+; CHECK-NEXT:    add x9, x9, LJTI1_0@PAGEOFF
+; CHECK-NEXT:  Ltmp1:
+; CHECK-NEXT:    adr x10, Ltmp1
+; CHECK-NEXT:    ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT:    add x10, x10, x11
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x10
+; CHECK-NEXT:  LBB1_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB1_3
+; CHECK-NEXT:  LBB1_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB1_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:    .loh AdrpAdd Lloh2, Lloh3
+; CHECK-NEXT:    .cfi_endproc
+; CHECK-NEXT:    .section __TEXT,__const
+; CHECK-NEXT:    .p2align 2, 0x0
+; CHECK-NEXT:  LJTI1_0:
+; CHECK-NEXT:    .long LBB1_3-Ltmp1
+; CHECK-NEXT:    .long LBB1_1-Ltmp1
+; CHECK-NEXT:    .long LBB1_1-Ltmp1
+; CHECK-NEXT:    .long LBB1_2-Ltmp1
 entry:
   %x17 = tail call i64 asm "", "={x17}"()
   %l = load i64, ptr %src, align 8
@@ -68,6 +178,61 @@ exit:
 }
 
 define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x1_clobbered:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x9, sp, #8
+; CHECK-NEXT:    movk x9, #49946, lsl #48
+; CHECK-NEXT:    mov x2, x22
+; CHECK-NEXT:    pacdb x2, x9
+; CHECK-NEXT:    str x2, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh4:
+; CHECK-NEXT:    adrp x9, LJTI2_0@PAGE
+; CHECK-NEXT:  Lloh5:
+; CHECK-NEXT:    add x9, x9, LJTI2_0@PAGEOFF
+; CHECK-NEXT:  Ltmp2:
+; CHECK-NEXT:    adr x10, Ltmp2
+; CHECK-NEXT:    ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT:    add x10, x10, x11
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x10
+; CHECK-NEXT:  LBB2_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB2_3
+; CHECK-NEXT:  LBB2_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB2_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:    .loh AdrpAdd Lloh4, Lloh5
+; CHECK-NEXT:    .cfi_endproc
+; CHECK-NEXT:    .section __TEXT,__const
+; CHECK-NEXT:    .p2align 2, 0x0
+; CHECK-NEXT:  LJTI2_0:
+; CHECK-NEXT:    .long LBB2_3-Ltmp2
+; CHECK-NEXT:    .long LBB2_1-Ltmp2
+; CHECK-NEXT:    .long LBB2_1-Ltmp2
+; CHECK-NEXT:    .long LBB2_2-Ltmp2
 entry:
   %x1 = tail call i64 asm "", "={x1}"()
   %l = load i64, ptr %src, align 8
@@ -100,6 +265,65 @@ exit:
 }
 
 define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x1_x9_clobbered:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x2, sp, #8
+; CHECK-NEXT:    movk x2, #49946, lsl #48
+; CHECK-NEXT:    mov x3, x22
+; CHECK-NEXT:    pacdb x3, x2
+; CHECK-NEXT:    str x3, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh6:
+; CHECK-NEXT:    adrp x10, LJTI3_0@PAGE
+; CHECK-NEXT:  Lloh7:
+; CHECK-NEXT:    add x10, x10, LJTI3_0@PAGEOFF
+; CHECK-NEXT:  Ltmp3:
+; CHECK-NEXT:    adr x11, Ltmp3
+; CHECK-NEXT:    ldrsw x12, [x10, x8, lsl #2]
+; CHECK-NEXT:    add x11, x11, x12
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x11
+; CHECK-NEXT:  LBB3_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB3_3
+; CHECK-NEXT:  LBB3_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB3_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x2, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:    .loh AdrpAdd Lloh6, Lloh7
+; CHECK-NEXT:    .cfi_endproc
+; CHECK-NEXT:    .section __TEXT,__const
+; CHECK-NEXT:    .p2align 2, 0x0
+; CHECK-NEXT:  LJTI3_0:
+; CHECK-NEXT:    .long LBB3_3-Ltmp3
+; CHECK-NEXT:    .long LBB3_1-Ltmp3
+; CHECK-NEXT:    .long LBB3_1-Ltmp3
+; CHECK-NEXT:    .long LBB3_2-Ltmp3
 entry:
   %x1 = tail call i64 asm "", "={x1}"()
   %x9 = tail call i64 asm "", "={x9}"()
@@ -135,6 +359,117 @@ exit:
 
 ; There are 2 available scratch registers left, shrink-wrapping can happen.
 define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_2_available_regs_left:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x10, [x0]
+; CHECK-NEXT:    orr x29, x29, #0x1000000000000000
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x17, sp, #8
+; CHECK-NEXT:    movk x17, #49946, lsl #48
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    pacdb x20, x17
+; CHECK-NEXT:    str x20, [sp, #8]
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x20, x22
+; CHECK-NEXT:    mov x22, x0
+; CHECK-NEXT:  Lloh8:
+; CHECK-NEXT:    adrp x17, LJTI4_0@PAGE
+; CHECK-NEXT:  Lloh9:
+; CHECK-NEXT:    add x17, x17, LJTI4_0@PAGEOFF
+; CHECK-NEXT:  Ltmp4:
+; CHECK-NEXT:    adr x0, Ltmp4
+; CHECK-NEXT:    ldrsw x19, [x17, x10, lsl #2]
+; CHECK-NEXT:    add x0, x0, x19
+; CHECK-NEXT:    mov x19, x20
+; CHECK-NEXT:    br x0
+; CHECK-NEXT:  LBB4_1: ; %then.2
+; CHECK-NEXT:    mov x19, #0 ; =0x0
+; CHECK-NEXT:    b LBB4_3
+; CHECK-NEXT:  LBB4_2: ; %then.3
+; CHECK-NEXT:    mov x19, x22
+; CHECK-NEXT:  LBB4_3: ; %exit
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ; InlineAsm End
+; C...
[truncated]

github-actions · 2023-11-24T14:35:49Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Add 2 additional arguments to StoreSwiftAsyncContext to specify the scratch registers to used, instead of hard-coding them. This fixes a miscompile where StoreSwiftAsyncContext can clobber live registers after it got moved during shrink-wrapping.

fhahn · 2023-11-29T13:45:43Z

ping :)

ahmedbougacha

Hmm, this may be a little tricky: x16/x17 are special here, because they're the only GPRs that have some platform-provided security guarantees (they're signed when saved on context switches), which we care about whenever we do a PAC sign.

In this case that's maybe not all that meaningful though, in part because the security guarantees are more about the kernel, where we don't have this sort of codegen today, and in part because we're just feeding the PAC from x22 anyway on one side (though on the other, SP is safe to derive discriminators from.)

But at the end of the day I don't know that we have a much better way to fix this (that wouldn't involve wiring a bunch of stuff through shrink wrapping and/or PEI?) I'll think about it a little bit more.

In the meantime, let me know if you think of a simple alternative that would try harder to keep this in x16/x17, so that we don't have to prove that it's always okay if it's in other GPRs ;)

fhahn · 2023-11-29T22:17:07Z

Discussed this also with @TNorthover offline. We could either just check if x16/x17 are not clobbered in bool AArch64FrameLowering::canUseAsPrologue and thus prevent ShrinkWrapping if they are ( instead of checking if there are any scratch registers)

Or default to X16/x17 if available, and if not pick alternatives

fhahn · 2023-11-30T14:40:30Z

Put up #73945 to only check whether X16/X17 are clobbered

fhahn requested review from ahmedbougacha, TNorthover and momchil-velikov November 24, 2023 14:33

llvmbot added the backend:AArch64 label Nov 24, 2023

fhahn force-pushed the store-swift-async-context-pseudo-scratch-regs branch from 80cf7b9 to fe623c3 Compare November 27, 2023 18:39

ahmedbougacha requested changes Nov 29, 2023

View reviewed changes

fhahn mentioned this pull request Nov 30, 2023

[AArch64] Check X16&X17 in prologue if the fn has an SwiftAsyncContext. #73945

Merged

fhahn closed this Jan 4, 2024

fhahn deleted the store-swift-async-context-pseudo-scratch-regs branch January 4, 2024 10:39

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext. #73332

[AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext. #73332

Uh oh!

fhahn commented Nov 24, 2023

Uh oh!

llvmbot commented Nov 24, 2023

Uh oh!

github-actions bot commented Nov 24, 2023 •

edited

Loading

Uh oh!

fhahn commented Nov 29, 2023

Uh oh!

ahmedbougacha left a comment

Uh oh!

fhahn commented Nov 29, 2023

Uh oh!

fhahn commented Nov 30, 2023

Uh oh!

Uh oh!

[AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext. #73332

[AArch64] Pass scratch regs as operands to StoreSwiftAsyncContext. #73332

Uh oh!

Conversation

fhahn commented Nov 24, 2023

Uh oh!

llvmbot commented Nov 24, 2023

Uh oh!

github-actions bot commented Nov 24, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

fhahn commented Nov 29, 2023

Uh oh!

ahmedbougacha left a comment

Choose a reason for hiding this comment

Uh oh!

fhahn commented Nov 29, 2023

Uh oh!

fhahn commented Nov 30, 2023

Uh oh!

Uh oh!

github-actions bot commented Nov 24, 2023 •

edited

Loading