Skip to content

Commit ee73f87

Browse files
committed
[X86] Insert CALLSEQ when lowering GlobalTLSAddress for ELF targets
When lowering a TLS address for an ELF target, we introduce a call to obtain the TLS base address. So far, we do not insert CALLSEQ_START/END markers around this call when it is generated, but use a custom inserter to insert them in a later phase. This is problematic, since the TLS address call can land in a CALLSEQ for another calls before it is wrapped in its own CALLSEQ. That results in nested CALLSEQs, which are illegal and cause errors when expensive checks are enabled, e.g., in issues llvm#45574 and llvm#98042. This patch instead wraps each TLS address call in a CALLSEQ when it is generated so that instruction selection can avoid nested CALLSEQs. This is an alternative to PR llvm#106965, which instead changes the custom inserter to avoid generating CALLSEQs when the TLS address call is already in a CALLSEQ. This patch also effectively reverts commit 228978c, which introduced the CustomInserter that so far added the CALLSEQ around TLSAddrs. Fixes llvm#45574 and llvm#98042.
1 parent bbc0e63 commit ee73f87

File tree

4 files changed

+83
-77
lines changed

4 files changed

+83
-77
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 50 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -18828,10 +18828,11 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
1882818828
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
1882918829
}
1883018830

18831-
static SDValue
18832-
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
18833-
SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
18834-
unsigned char OperandFlags, bool LocalDynamic = false) {
18831+
static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
18832+
GlobalAddressSDNode *GA, const EVT PtrVT,
18833+
unsigned ReturnReg, unsigned char OperandFlags,
18834+
bool LoadGlobalBaseReg = false,
18835+
bool LocalDynamic = false) {
1883518836
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1883618837
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1883718838
SDLoc dl(GA);
@@ -18841,8 +18842,25 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
1884118842
TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags);
1884218843
auto UI = TGA->use_begin();
1884318844
// Reuse existing GetTLSADDR node if we can find it.
18844-
if (UI != TGA->use_end())
18845-
return SDValue(*UI->use_begin()->use_begin(), 0);
18845+
if (UI != TGA->use_end()) {
18846+
// TLSDESC uses TGA.
18847+
auto TLSDescOp = UI;
18848+
assert(TLSDescOp->getOpcode() == X86ISD::TLSDESC &&
18849+
"Unexpected TLSDESC DAG");
18850+
// CALLSEQ_END uses TGA via a chain and glue.
18851+
auto CallSeqEndOp = TLSDescOp->use_begin();
18852+
assert(CallSeqEndOp->getOpcode() == ISD::CALLSEQ_END &&
18853+
"Unexpected TLSDESC DAG");
18854+
// CopyFromReg uses CALLSEQ_END via a chain and glue.
18855+
auto CopyFromRegOp = CallSeqEndOp->use_begin();
18856+
assert(CopyFromRegOp->getOpcode() == ISD::CopyFromReg &&
18857+
"Unexpected TLSDESC DAG");
18858+
// The Add generated at the final return of this function uses
18859+
// CopyFromReg.
18860+
auto AddOp = CopyFromRegOp->use_begin();
18861+
assert(AddOp->getOpcode() == ISD::ADD && "Unexpected TLSDESC DAG");
18862+
return SDValue(*AddOp, 0);
18863+
}
1884618864
} else {
1884718865
TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
1884818866
GA->getOffset(), OperandFlags);
@@ -18852,13 +18870,20 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
1885218870
: LocalDynamic ? X86ISD::TLSBASEADDR
1885318871
: X86ISD::TLSADDR;
1885418872

18855-
if (InGlue) {
18856-
SDValue Ops[] = { Chain, TGA, *InGlue };
18873+
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
18874+
if (LoadGlobalBaseReg) {
18875+
SDValue InGlue;
18876+
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
18877+
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
18878+
InGlue);
18879+
InGlue = Chain.getValue(1);
18880+
SDValue Ops[] = {Chain, TGA, InGlue};
1885718881
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
1885818882
} else {
18859-
SDValue Ops[] = { Chain, TGA };
18883+
SDValue Ops[] = {Chain, TGA};
1886018884
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
1886118885
}
18886+
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), dl);
1886218887

1886318888
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1886418889
MFI.setAdjustsStack(true);
@@ -18884,30 +18909,24 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
1888418909
static SDValue
1888518910
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1888618911
const EVT PtrVT) {
18887-
SDValue InGlue;
18888-
SDLoc dl(GA); // ? function entry point might be better
18889-
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
18890-
DAG.getNode(X86ISD::GlobalBaseReg,
18891-
SDLoc(), PtrVT), InGlue);
18892-
InGlue = Chain.getValue(1);
18893-
18894-
return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
18912+
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
18913+
X86II::MO_TLSGD, /*LoadGlobalBaseReg=*/true);
1889518914
}
1889618915

1889718916
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
1889818917
static SDValue
1889918918
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1890018919
const EVT PtrVT) {
18901-
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
18902-
X86::RAX, X86II::MO_TLSGD);
18920+
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::RAX,
18921+
X86II::MO_TLSGD);
1890318922
}
1890418923

1890518924
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
1890618925
static SDValue
1890718926
LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1890818927
const EVT PtrVT) {
18909-
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
18910-
X86::EAX, X86II::MO_TLSGD);
18928+
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
18929+
X86II::MO_TLSGD);
1891118930
}
1891218931

1891318932
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
@@ -18916,22 +18935,20 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
1891618935
SDLoc dl(GA);
1891718936

1891818937
// Get the start address of the TLS block for this module.
18919-
X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
18920-
.getInfo<X86MachineFunctionInfo>();
18938+
X86MachineFunctionInfo *MFI =
18939+
DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
1892118940
MFI->incNumLocalDynamicTLSAccesses();
1892218941

1892318942
SDValue Base;
1892418943
if (Is64Bit) {
1892518944
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
18926-
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
18927-
X86II::MO_TLSLD, /*LocalDynamic=*/true);
18945+
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, ReturnReg,
18946+
X86II::MO_TLSLD, /*LoadGlobalBaseReg=*/false,
18947+
/*LocalDynamic=*/true);
1892818948
} else {
18929-
SDValue InGlue;
18930-
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
18931-
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
18932-
InGlue = Chain.getValue(1);
18933-
Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
18934-
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
18949+
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
18950+
X86II::MO_TLSLDM, /*LoadGlobalBaseReg=*/true,
18951+
/*LocalDynamic=*/true);
1893518952
}
1893618953

1893718954
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -36002,36 +36019,6 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
3600236019
return BB;
3600336020
}
3600436021

36005-
MachineBasicBlock *
36006-
X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
36007-
MachineBasicBlock *BB) const {
36008-
// So, here we replace TLSADDR with the sequence:
36009-
// adjust_stackdown -> TLSADDR -> adjust_stackup.
36010-
// We need this because TLSADDR is lowered into calls
36011-
// inside MC, therefore without the two markers shrink-wrapping
36012-
// may push the prologue/epilogue pass them.
36013-
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
36014-
const MIMetadata MIMD(MI);
36015-
MachineFunction &MF = *BB->getParent();
36016-
36017-
// Emit CALLSEQ_START right before the instruction.
36018-
MF.getFrameInfo().setAdjustsStack(true);
36019-
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
36020-
MachineInstrBuilder CallseqStart =
36021-
BuildMI(MF, MIMD, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
36022-
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
36023-
36024-
// Emit CALLSEQ_END right after the instruction.
36025-
// We don't call erase from parent because we want to keep the
36026-
// original instruction around.
36027-
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
36028-
MachineInstrBuilder CallseqEnd =
36029-
BuildMI(MF, MIMD, TII.get(AdjStackUp)).addImm(0).addImm(0);
36030-
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
36031-
36032-
return BB;
36033-
}
36034-
3603536022
MachineBasicBlock *
3603636023
X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
3603736024
MachineBasicBlock *BB) const {
@@ -37030,16 +37017,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3703037017
return X86::TMM0 + Imm;
3703137018
};
3703237019
switch (MI.getOpcode()) {
37033-
default: llvm_unreachable("Unexpected instr type to insert");
37034-
case X86::TLS_addr32:
37035-
case X86::TLS_addr64:
37036-
case X86::TLS_addrX32:
37037-
case X86::TLS_base_addr32:
37038-
case X86::TLS_base_addr64:
37039-
case X86::TLS_base_addrX32:
37040-
case X86::TLS_desc32:
37041-
case X86::TLS_desc64:
37042-
return EmitLoweredTLSAddr(MI, BB);
37020+
default:
37021+
llvm_unreachable("Unexpected instr type to insert");
3704337022
case X86::INDIRECT_THUNK_CALL32:
3704437023
case X86::INDIRECT_THUNK_CALL64:
3704537024
case X86::INDIRECT_THUNK_TCRETURN32:

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,9 +1848,6 @@ namespace llvm {
18481848
MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
18491849
MachineBasicBlock *BB) const;
18501850

1851-
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1852-
MachineBasicBlock *BB) const;
1853-
18541851
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
18551852
MachineBasicBlock *BB) const;
18561853

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
478478
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
479479
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
480480
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
481-
usesCustomInserter = 1, Uses = [ESP, SSP] in {
481+
Uses = [ESP, SSP] in {
482482
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
483483
"# TLS_addr32",
484484
[(X86tlsaddr tls32addr:$sym)]>,
@@ -498,7 +498,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
498498
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
499499
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
500500
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
501-
usesCustomInserter = 1, Uses = [RSP, SSP] in {
501+
Uses = [RSP, SSP] in {
502502
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
503503
"# TLS_addr64",
504504
[(X86tlsaddr tls64addr:$sym)]>,
@@ -520,7 +520,7 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
520520
// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent
521521
// stack-pointer assignments that appear immediately before calls from
522522
// potentially appearing dead.
523-
let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in {
523+
let Defs = [EAX, EFLAGS], Uses = [RSP, SSP] in {
524524
def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
525525
"# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>;
526526
def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=x86_64 -verify-machineinstrs -relocation-model=pic < %s | FileCheck %s
3+
4+
; Passing a pointer to thread-local storage to a function can be problematic
5+
; since computing such addresses requires a function call that is introduced
6+
; very late in instruction selection. We need to ensure that we don't introduce
7+
; nested call sequence markers if this function call happens in a call sequence.
8+
9+
@TLS = internal thread_local global i64 zeroinitializer, align 8
10+
declare void @bar(ptr)
11+
define internal void @foo() {
12+
; CHECK-LABEL: foo:
13+
; CHECK: # %bb.0:
14+
; CHECK-NEXT: pushq %rbx
15+
; CHECK-NEXT: .cfi_def_cfa_offset 16
16+
; CHECK-NEXT: .cfi_offset %rbx, -16
17+
; CHECK-NEXT: leaq TLS@TLSLD(%rip), %rdi
18+
; CHECK-NEXT: callq __tls_get_addr@PLT
19+
; CHECK-NEXT: leaq TLS@DTPOFF(%rax), %rbx
20+
; CHECK-NEXT: movq %rbx, %rdi
21+
; CHECK-NEXT: callq bar@PLT
22+
; CHECK-NEXT: movq %rbx, %rdi
23+
; CHECK-NEXT: callq bar@PLT
24+
; CHECK-NEXT: popq %rbx
25+
; CHECK-NEXT: .cfi_def_cfa_offset 8
26+
; CHECK-NEXT: retq
27+
call void @bar(ptr @TLS)
28+
call void @bar(ptr @TLS)
29+
ret void
30+
}

0 commit comments

Comments
 (0)