Skip to content

DNM: X86: fix frame offset calculation with mandatory tail calls #3121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 30 additions & 42 deletions llvm/lib/Target/X86/X86FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1328,15 +1328,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;

// Add RETADDR move area to callee saved frame size.
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta && IsWin64Prologue)
// Space reserved for stack-based arguments when making a (ABI-guaranteed)
// tail call.
unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
if (TailCallArgReserveSize && IsWin64Prologue)
report_fatal_error("Can't handle guaranteed tail call under win64 yet");

if (TailCallReturnAddrDelta < 0)
X86FI->setCalleeSavedFrameSize(
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);

const bool EmitStackProbeCall =
STI.getTargetLowering()->hasStackProbeSymbol(MF);
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
Expand Down Expand Up @@ -1370,7 +1367,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
!EmitStackProbeCall && // No stack probes.
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
uint64_t MinSize =
X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta();
if (HasFP) MinSize += SlotSize;
X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
Expand All @@ -1380,8 +1378,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Insert stack pointer adjustment for later moving of return addr. Only
// applies to tail call optimized functions where the callee argument stack
// size is bigger than the callers.
if (TailCallReturnAddrDelta < 0) {
BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta,
if (TailCallArgReserveSize != 0) {
BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
/*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
}
Expand Down Expand Up @@ -1430,7 +1428,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (X86FI->getRestoreBasePointer())
FrameSize += SlotSize;

NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
NumBytes = FrameSize -
(X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);

// Callee-saved registers are pushed on stack before the stack is realigned.
if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
Expand Down Expand Up @@ -1562,7 +1561,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
}
} else {
assert(!IsFunclet && "funclets without FPs not yet implemented");
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
NumBytes = StackSize -
(X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
}

// Update the offset adjustment, which is mainly used by codeview to translate
Expand Down Expand Up @@ -2019,6 +2019,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
uint64_t StackSize = MFI.getStackSize();
uint64_t MaxAlign = calculateMaxStackAlign(MF);
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
bool HasFP = hasFP(MF);
uint64_t NumBytes = 0;

Expand All @@ -2032,14 +2033,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
} else if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
NumBytes = FrameSize - CSSize;
NumBytes = FrameSize - CSSize - TailCallArgReserveSize;

// Callee-saved registers were pushed on stack before the stack was
// realigned.
if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
NumBytes = alignTo(FrameSize, MaxAlign);
} else {
NumBytes = StackSize - CSSize;
NumBytes = StackSize - CSSize - TailCallArgReserveSize;
}
uint64_t SEHStackAllocAmt = NumBytes;

Expand Down Expand Up @@ -2106,7 +2107,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,

if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();

// If there is an ADD32ri or SUB32ri of ESP immediately before this
// instruction, merge the two instructions.
if (NumBytes || MFI.hasVarSizedObjects())
Expand Down Expand Up @@ -2151,7 +2151,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (!hasFP(MF) && NeedsDwarfCFI) {
// Define the current CFA rule to use the provided offset.
BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + SlotSize));
MCCFIInstruction::cfiDefCfaOffset(
nullptr, CSSize + TailCallArgReserveSize + SlotSize));
}
--MBBI;
}
Expand Down Expand Up @@ -2230,7 +2231,6 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t StackSize = MFI.getStackSize();
bool HasFP = hasFP(MF);
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int64_t FPDelta = 0;

Expand Down Expand Up @@ -2266,39 +2266,27 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
"FPDelta isn't aligned per the Win64 ABI!");
}


if (TRI->hasBasePointer(MF)) {
assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
if (FI < 0) {
// Skip the saved EBP.
return StackOffset::getFixed(Offset + SlotSize + FPDelta);
} else {
assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
return StackOffset::getFixed(Offset + StackSize);
}
} else if (TRI->needsStackRealignment(MF)) {
if (FI < 0) {
// Skip the saved EBP.
return StackOffset::getFixed(Offset + SlotSize + FPDelta);
} else {
assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
return StackOffset::getFixed(Offset + StackSize);
}
// FIXME: Support tail calls
} else {
if (!HasFP)
return StackOffset::getFixed(Offset + StackSize);

// Skip the saved EBP.
if (FrameReg == TRI->getFramePtr()) {
// Skip saved EBP/RBP
Offset += SlotSize;

// Account for restricted Windows prologue.
Offset += FPDelta;

// Skip the RETADDR move area
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta < 0)
Offset -= TailCallReturnAddrDelta;

return StackOffset::getFixed(Offset);
}

return StackOffset::getFixed(Offset + FPDelta);
// FrameReg is either the stack pointer or a base pointer. But the base is
// located at the end of the statically known StackSize so the distinction
// doesn't really matter.
if (TRI->needsStackRealignment(MF) || TRI->hasBasePointer(MF))
assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
return StackOffset::getFixed(Offset + StackSize);
}

int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/X86/swifttail-realign.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s

declare swifttailcc void @callee([6 x i64], i64, i64)

@var = external global i8*

define swifttailcc void @caller(i64 %n) {
; CHECK-LABEL: caller:
; CHECK: subq $16, %rsp
; CHECK: pushq %rbp
; CHECK: movq %rsp, %rbp
; CHECK: pushq %rbx
; CHECK: andq $-32, %rsp
; [... don't really care what happens to rsp to allocate %ptr ...]
; CHECK: movq %24(%rbp), [[RETADDR:%.*]]
; CHECK: movq [[RETADDR]], 8(%rbp)
; CHECK: movq $42, 16(%rbp)
; CHECK: movq $0, 24(%rbp)
; CHECK: leaq -8(%rbp), %rsp
; CHECK: popq %rbx
; CHECK: popq %rbp
; CHECK: jmp callee

call void asm sideeffect "", "~{rbx}"()
%ptr = alloca i8, i64 %n, align 32
store i8* %ptr, i8** @var
tail call swifttailcc void @callee([6 x i64] undef, i64 42, i64 0)
ret void
}