Skip to content

Commit edc0f26

Browse files
committed
[X86] Avoid generating nested CALLSEQ for TLS pointer function arguments
When a pointer to thread-local storage is passed in a function call, ISel first lowers the call and wraps the resulting code in CALLSEQ markers. Afterwards, to compute the pointer to TLS, a call to retrieve the TLS base address is generated and then wrapped in a set of CALLSEQ markers. If the latter call is inserted into the call sequence of the former call, this leads to nested call frames, which are illegal and lead to errors in the machine verifier. This patch avoids surrounding the call to compute the TLS base address in CALLSEQ markers if it is already surrounded by such markers. It relies on zero-sized call frames being represented in the call frame size info stored in the MachineBBs. Fixes #45574 and #98042.
1 parent 20aa681 commit edc0f26

File tree

4 files changed

+68
-2
lines changed

4 files changed

+68
-2
lines changed

llvm/include/llvm/CodeGen/MachineFrameInfo.h

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class MachineFunction;
2626
class MachineBasicBlock;
2727
class BitVector;
2828
class AllocaInst;
29+
class MachineFrameSizeInfo;
2930
class TargetInstrInfo;
3031

3132
/// The CalleeSavedInfo class tracks the information need to locate where a
@@ -283,6 +284,10 @@ class MachineFrameInfo {
283284
/// It is only valid during and after prolog/epilog code insertion.
284285
uint64_t MaxCallFrameSize = ~UINT64_C(0);
285286

287+
/// Call frame sizes for the MachineFunction's MachineBasicBlocks. This is set
288+
/// by the MachineFrameSizeInfo constructor and cleared by its destructor.
289+
MachineFrameSizeInfo *SizeInfo = nullptr;
290+
286291
/// The number of bytes of callee saved registers that the target wants to
287292
/// report for the current function in the CodeView S_FRAMEPROC record.
288293
unsigned CVBytesOfCalleeSavedRegisters = 0;
@@ -676,6 +681,13 @@ class MachineFrameInfo {
676681
}
677682
void setMaxCallFrameSize(uint64_t S) { MaxCallFrameSize = S; }
678683

684+
/// Return an object that can be queried for call frame sizes at specific
685+
/// locations in the MachineFunction. Constructing a MachineFrameSizeInfo
686+
/// object for the MachineFunction automatically makes it available via this
687+
/// field during the object's lifetime.
688+
MachineFrameSizeInfo *getSizeInfo() const { return SizeInfo; }
689+
void setSizeInfo(MachineFrameSizeInfo *SI) { SizeInfo = SI; }
690+
679691
/// Returns how many bytes of callee-saved registers the target pushed in the
680692
/// prologue. Only used for debug info.
681693
unsigned getCVBytesOfCalleeSavedRegisters() const {
@@ -851,15 +863,24 @@ class MachineFrameInfo {
851863
/// MachineBasicBlocks of a MachineFunction based on call frame setup and
852864
/// destroy pseudo instructions. Usually, no call frame is open at block
853865
/// boundaries, except if a call sequence has been split into multiple blocks.
854-
/// Computing this information is deferred until it is queried.
866+
///
867+
/// Computing this information is deferred until it is queried. Upon
868+
/// construction, a MachineFrameSizeInfo object registers itself in the
869+
/// MachineFunction's MachineFrameInfo (and it unregisters when destructed).
870+
/// While registered, it can be retrieved via MachineFrameInfo::getSizeInfo().
855871
///
856872
/// This class assumes that call frame instructions are placed properly, i.e.,
857873
/// every program path hits a frame destroy of equal size after hitting a frame
858874
/// setup, and a frame setup of equal size before a frame destroy. Nested call
859875
/// frame sequences are not allowed.
860876
class MachineFrameSizeInfo {
861877
public:
862-
MachineFrameSizeInfo(MachineFunction &MF) : MF(MF) {}
878+
MachineFrameSizeInfo(MachineFunction &MF) : MF(MF) {
879+
assert(MF.getFrameInfo().getSizeInfo() == nullptr);
880+
MF.getFrameInfo().setSizeInfo(this);
881+
}
882+
883+
~MachineFrameSizeInfo() { MF.getFrameInfo().setSizeInfo(nullptr); }
863884

864885
/// Get the call frame size just before MI. Contains no value if MI is not in
865886
/// a call sequence. Zero-sized call frames are possible.

llvm/lib/CodeGen/FinalizeISel.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ static std::pair<bool, bool> runImpl(MachineFunction &MF) {
4747
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4848
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
4949

50+
// Pseudo-Lowering might require the sizes of call frames, so compute them
51+
// (lazily). The MachineFrameSizeInfo registers itself in MF's
52+
// MachineFrameInfo for the SizeInfo's lifetime and does not need to be passed
53+
// explicitly.
54+
const MachineFrameSizeInfo MFSI(MF);
55+
5056
// Iterate through each instruction in the function, looking for pseudos.
5157
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
5258
MachineBasicBlock *MBB = &*I;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35588,6 +35588,15 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
3558835588
// inside MC, therefore without the two markers shrink-wrapping
3558935589
// may push the prologue/epilogue pass them.
3559035590
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
35591+
35592+
// Do not introduce CALLSEQ markers if we are already in a call sequence.
35593+
// Nested call sequences are not allowed and cause errors in the machine
35594+
// verifier.
35595+
MachineFrameSizeInfo *MFSI = MI.getMF()->getFrameInfo().getSizeInfo();
35596+
assert(MFSI && "Call frame size information needs to be available!");
35597+
if (MFSI->getCallFrameSizeAt(MI).has_value())
35598+
return BB;
35599+
3559135600
const MIMetadata MIMD(MI);
3559235601
MachineFunction &MF = *BB->getParent();
3559335602

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=x86_64 -verify-machineinstrs -relocation-model=pic < %s | FileCheck %s
3+
4+
; Passing a pointer to thread-local storage to a function can be problematic
5+
; since computing such addresses requires a function call that is introduced
6+
; very late in instruction selection. We need to ensure that we don't introduce
7+
; nested call sequence markers if this function call happens in a call sequence.
8+
9+
@TLS = internal thread_local global i64 zeroinitializer, align 8
10+
declare void @bar(ptr)
11+
define internal void @foo() {
12+
; CHECK-LABEL: foo:
13+
; CHECK: # %bb.0:
14+
; CHECK-NEXT: pushq %rbx
15+
; CHECK-NEXT: .cfi_def_cfa_offset 16
16+
; CHECK-NEXT: .cfi_offset %rbx, -16
17+
; CHECK-NEXT: leaq TLS@TLSLD(%rip), %rdi
18+
; CHECK-NEXT: callq __tls_get_addr@PLT
19+
; CHECK-NEXT: leaq TLS@DTPOFF(%rax), %rbx
20+
; CHECK-NEXT: movq %rbx, %rdi
21+
; CHECK-NEXT: callq bar@PLT
22+
; CHECK-NEXT: movq %rbx, %rdi
23+
; CHECK-NEXT: callq bar@PLT
24+
; CHECK-NEXT: popq %rbx
25+
; CHECK-NEXT: .cfi_def_cfa_offset 8
26+
; CHECK-NEXT: retq
27+
call void @bar(ptr @TLS)
28+
call void @bar(ptr @TLS)
29+
ret void
30+
}

0 commit comments

Comments
 (0)