Skip to content

Commit 7e212d7

Browse files
committed
[x86_64][windows][swift] do not use Swift async extended frame for wi… (llvm#80468)
…ndows x86_64 targets that use windows 64 prologue Windows x86_64 stack frame layout is currently not compatible with Swift's async extended frame, which reserves the slot right below RBP (RBP-8) for the async context pointer, as it doesn't account for the fact that a stack object in a win64 frame can be allocated at the same location. This can cause issues at runtime, for instance, Swift's TCA test code has functions that fail because of this issue, as they spill a value to that slack slot, which then gets overwritten by a store into address returned by the @llvm.swift.async.context.addr() intrinsic (that ends up being RBP - 8), leading to an incorrect value being used at a later point when that stack slot is being read from again. This change drops the use of async extended frame for windows x86_64 subtargets and instead uses the x32 based approach of allocating a separate stack slot for the stored async context pointer. Additionally, LLDB which is the primary consumer of the extended frame makes assumptions like checking for a saved previous frame pointer at the current frame pointer address, which is also incompatible with the windows x86_64 frame layout, as the previous frame pointer is not guaranteed to be stored at the current frame pointer address. Therefore the extended frame layout can be turned off to fix the current miscompile without introducing regression into LLDB for windows x86_64 as it already doesn't work correctly. I am still investigating what should be made for LLDB to support using an allocated stack slot to store the async frame context instead of being located at RBP - 8 for windows.
1 parent 6751d2a commit 7e212d7

File tree

4 files changed

+71
-34
lines changed

4 files changed

+71
-34
lines changed

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,6 +1614,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
16141614
[[fallthrough]];
16151615

16161616
case SwiftAsyncFramePointerMode::Always:
1617+
assert(
1618+
!IsWin64Prologue &&
1619+
"win64 prologue does not set the bit 60 in the saved frame pointer");
16171620
BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
16181621
.addUse(MachineFramePtr)
16191622
.addImm(60)
@@ -1754,6 +1757,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
17541757

17551758
if (!IsFunclet) {
17561759
if (X86FI->hasSwiftAsyncContext()) {
1760+
assert(!IsWin64Prologue &&
1761+
"win64 prologue does not store async context right below rbp");
17571762
const auto &Attrs = MF.getFunction().getAttributes();
17581763

17591764
// Before we update the live frame pointer we have to ensure there's a

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4340,14 +4340,17 @@ SDValue X86TargetLowering::LowerFormalArguments(
43404340
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
43414341
if (Ins[I].Flags.isSwiftAsync()) {
43424342
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
4343-
if (Subtarget.is64Bit())
4343+
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
43444344
X86FI->setHasSwiftAsyncContext(true);
43454345
else {
4346-
int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
4346+
int PtrSize = Subtarget.is64Bit() ? 8 : 4;
4347+
int FI =
4348+
MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
43474349
X86FI->setSwiftAsyncContextFrameIdx(FI);
4348-
SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
4349-
DAG.getFrameIndex(FI, MVT::i32),
4350-
MachinePointerInfo::getFixedStack(MF, FI));
4350+
SDValue St = DAG.getStore(
4351+
DAG.getEntryNode(), dl, InVals[I],
4352+
DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
4353+
MachinePointerInfo::getFixedStack(MF, FI));
43514354
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
43524355
}
43534356
}
@@ -29186,6 +29189,15 @@ static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain,
2918629189
return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
2918729190
}
2918829191

29192+
bool X86::isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
29193+
const MachineFunction &MF) {
29194+
if (!Subtarget.is64Bit())
29195+
return false;
29196+
// 64-bit targets support extended Swift async frame setup,
29197+
// except for targets that use the windows 64 prologue.
29198+
return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
29199+
}
29200+
2918929201
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2919029202
SelectionDAG &DAG) {
2919129203
unsigned IntNo = Op.getConstantOperandVal(1);
@@ -29197,7 +29209,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2919729209
SDLoc dl(Op);
2919829210
auto &MF = DAG.getMachineFunction();
2919929211
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
29200-
if (Subtarget.is64Bit()) {
29212+
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) {
2920129213
MF.getFrameInfo().setFrameAddressIsTaken(true);
2920229214
X86FI->setHasSwiftAsyncContext(true);
2920329215
SDValue Chain = Op->getOperand(0);
@@ -29210,13 +29222,15 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2921029222
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
2921129223
CopyRBP.getValue(1));
2921229224
} else {
29213-
// 32-bit so no special extended frame, create or reuse an existing
29214-
// stack slot.
29225+
// No special extended frame, create or reuse an existing stack slot.
29226+
int PtrSize = Subtarget.is64Bit() ? 8 : 4;
2921529227
if (!X86FI->getSwiftAsyncContextFrameIdx())
2921629228
X86FI->setSwiftAsyncContextFrameIdx(
29217-
MF.getFrameInfo().CreateStackObject(4, Align(4), false));
29229+
MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize),
29230+
false));
2921829231
SDValue Result =
29219-
DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
29232+
DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(),
29233+
PtrSize == 8 ? MVT::i64 : MVT::i32);
2922029234
// Return { result, chain }.
2922129235
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
2922229236
Op->getOperand(0));

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,11 @@ namespace llvm {
966966
/// Check if Op is an operation that could be folded into a zero extend x86
967967
/// instruction.
968968
bool mayFoldIntoZeroExtend(SDValue Op);
969+
970+
/// True if the target supports the extended frame for async Swift
971+
/// functions.
972+
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
973+
const MachineFunction &MF);
969974
} // end namespace X86
970975

971976
//===--------------------------------------------------------------------===//

llvm/test/CodeGen/X86/swift-async-win64.ll

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@ define void @simple(ptr swiftasync %context) "frame-pointer"="all" {
66
}
77

88
; CHECK64-LABEL: simple:
9-
; CHECK64: btsq $60, %rbp
109
; CHECK64: pushq %rbp
11-
; CHECK64: pushq %r14
12-
; CHECK64: leaq 8(%rsp), %rbp
13-
; [...]
14-
; CHECK64: addq $16, %rsp
10+
; CHECK64: pushq %rax
11+
; CHECK64: movq %rsp, %rbp
12+
; CHECK64: movq %r14, (%rbp)
13+
; CHECK64: addq $8, %rsp
1514
; CHECK64: popq %rbp
16-
; CHECK64: btrq $60, %rbp
1715
; CHECK64: retq
1816

1917
; CHECK32-LABEL: simple:
@@ -26,20 +24,20 @@ define void @more_csrs(ptr swiftasync %context) "frame-pointer"="all" {
2624
}
2725

2826
; CHECK64-LABEL: more_csrs:
29-
; CHECK64: btsq $60, %rbp
3027
; CHECK64: pushq %rbp
3128
; CHECK64: .seh_pushreg %rbp
32-
; CHECK64: pushq %r14
33-
; CHECK64: .seh_pushreg %r14
34-
; CHECK64: leaq 8(%rsp), %rbp
35-
; CHECK64: subq $8, %rsp
3629
; CHECK64: pushq %r15
3730
; CHECK64: .seh_pushreg %r15
31+
; CHECK64: pushq %rax
32+
; CHECK64: .seh_stackalloc 8
33+
; CHECK64: movq %rsp, %rbp
34+
; CHECK64: .seh_setframe %rbp, 0
35+
; CHECK64: .seh_endprologue
36+
; CHECK64: movq %r14, (%rbp)
3837
; [...]
38+
; CHECK64: addq $8, %rsp
3939
; CHECK64: popq %r15
40-
; CHECK64: addq $16, %rsp
4140
; CHECK64: popq %rbp
42-
; CHECK64: btrq $60, %rbp
4341
; CHECK64: retq
4442

4543
declare void @f(ptr)
@@ -51,21 +49,16 @@ define void @locals(ptr swiftasync %context) "frame-pointer"="all" {
5149
}
5250

5351
; CHECK64-LABEL: locals:
54-
; CHECK64: btsq $60, %rbp
5552
; CHECK64: pushq %rbp
5653
; CHECK64: .seh_pushreg %rbp
57-
; CHECK64: pushq %r14
58-
; CHECK64: .seh_pushreg %r14
59-
; CHECK64: leaq 8(%rsp), %rbp
60-
; CHECK64: subq $88, %rsp
54+
; CHECK64: subq $80, %rsp
55+
; CHECK64: movq %r14, -8(%rbp)
6156

6257
; CHECK64: leaq -48(%rbp), %rcx
6358
; CHECK64: callq f
6459

6560
; CHECK64: addq $80, %rsp
66-
; CHECK64: addq $16, %rsp
6761
; CHECK64: popq %rbp
68-
; CHECK64: btrq $60, %rbp
6962
; CHECK64: retq
7063

7164
define void @use_input_context(ptr swiftasync %context, ptr %ptr) "frame-pointer"="all" {
@@ -84,7 +77,7 @@ define ptr @context_in_func() "frmae-pointer"="non-leaf" {
8477
}
8578

8679
; CHECK64-LABEL: context_in_func:
87-
; CHECK64: leaq -8(%rbp), %rax
80+
; CHECK64: movq %rsp, %rax
8881

8982
; CHECK32-LABEL: context_in_func:
9083
; CHECK32: movl %esp, %eax
@@ -96,13 +89,33 @@ define void @write_frame_context(ptr swiftasync %context, ptr %new_context) "fra
9689
}
9790

9891
; CHECK64-LABEL: write_frame_context:
99-
; CHECK64: movq %rbp, [[TMP:%.*]]
100-
; CHECK64: subq $8, [[TMP]]
101-
; CHECK64: movq %rcx, ([[TMP]])
92+
; CHECK64: movq %rcx, (%rsp)
10293

10394
define void @simple_fp_elim(ptr swiftasync %context) "frame-pointer"="non-leaf" {
10495
ret void
10596
}
10697

10798
; CHECK64-LABEL: simple_fp_elim:
10899
; CHECK64-NOT: btsq
100+
101+
define void @manylocals_and_overwritten_context(ptr swiftasync %context, ptr %new_context) "frame-pointer"="all" {
102+
%ptr = call ptr @llvm.swift.async.context.addr()
103+
store ptr %new_context, ptr %ptr
104+
%var1 = alloca i64, i64 1
105+
call void @f(ptr %var1)
106+
%var2 = alloca i64, i64 16
107+
call void @f(ptr %var2)
108+
%ptr2 = call ptr @llvm.swift.async.context.addr()
109+
store ptr %new_context, ptr %ptr2
110+
ret void
111+
}
112+
113+
; CHECK64-LABEL: manylocals_and_overwritten_context:
114+
; CHECK64: pushq %rbp
115+
; CHECK64: subq $184, %rsp
116+
; CHECK64: leaq 128(%rsp), %rbp
117+
; CHECK64: movq %rcx, %rsi
118+
; CHECK64: movq %rcx, 48(%rbp)
119+
; CHECK64: callq f
120+
; CHECK64: callq f
121+
; CHECK64: movq %rsi, 48(%rbp)

0 commit comments

Comments
 (0)