Skip to content

Commit c7acd5a

Browse files
committed
[X86] Insert CALLSEQ when lowering GlobalTLSAddress for ELF targets
When lowering a TLS address for an ELF target, we introduce a call to obtain the TLS base address. So far, we do not insert CALLSEQ_START/END markers around this call when it is generated, but use a custom inserter to insert them in a later phase. This is problematic, since the TLS address call can land in a CALLSEQ for another calls before it is wrapped in its own CALLSEQ. That results in nested CALLSEQs, which are illegal and cause errors when expensive checks are enabled, e.g., in issues #45574 and #98042. This patch instead wraps each TLS address call in a CALLSEQ when it is generated so that instruction selection can avoid nested CALLSEQs. This is an alternative to PR #106965, which instead changes the custom inserter to avoid generating CALLSEQs when the TLS address call is already in a CALLSEQ. This patch also effectively reverts commit 228978c, which introduced the CustomInserter that so far added the CALLSEQ around TLSAddrs. Fixes #45574 and #98042.
1 parent 0067b79 commit c7acd5a

File tree

4 files changed

+83
-77
lines changed

4 files changed

+83
-77
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 50 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -18831,10 +18831,11 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
1883118831
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
1883218832
}
1883318833

18834-
static SDValue
18835-
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
18836-
SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
18837-
unsigned char OperandFlags, bool LocalDynamic = false) {
18834+
static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
18835+
GlobalAddressSDNode *GA, const EVT PtrVT,
18836+
unsigned ReturnReg, unsigned char OperandFlags,
18837+
bool LoadGlobalBaseReg = false,
18838+
bool LocalDynamic = false) {
1883818839
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1883918840
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1884018841
SDLoc dl(GA);
@@ -18844,8 +18845,25 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
1884418845
TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags);
1884518846
auto UI = TGA->use_begin();
1884618847
// Reuse existing GetTLSADDR node if we can find it.
18847-
if (UI != TGA->use_end())
18848-
return SDValue(*UI->use_begin()->use_begin(), 0);
18848+
if (UI != TGA->use_end()) {
18849+
// TLSDESC uses TGA.
18850+
auto TLSDescOp = UI;
18851+
assert(TLSDescOp->getOpcode() == X86ISD::TLSDESC &&
18852+
"Unexpected TLSDESC DAG");
18853+
// CALLSEQ_END uses TGA via a chain and glue.
18854+
auto CallSeqEndOp = TLSDescOp->use_begin();
18855+
assert(CallSeqEndOp->getOpcode() == ISD::CALLSEQ_END &&
18856+
"Unexpected TLSDESC DAG");
18857+
// CopyFromReg uses CALLSEQ_END via a chain and glue.
18858+
auto CopyFromRegOp = CallSeqEndOp->use_begin();
18859+
assert(CopyFromRegOp->getOpcode() == ISD::CopyFromReg &&
18860+
"Unexpected TLSDESC DAG");
18861+
// The Add generated at the final return of this function uses
18862+
// CopyFromReg.
18863+
auto AddOp = CopyFromRegOp->use_begin();
18864+
assert(AddOp->getOpcode() == ISD::ADD && "Unexpected TLSDESC DAG");
18865+
return SDValue(*AddOp, 0);
18866+
}
1884918867
} else {
1885018868
TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
1885118869
GA->getOffset(), OperandFlags);
@@ -18855,13 +18873,20 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
1885518873
: LocalDynamic ? X86ISD::TLSBASEADDR
1885618874
: X86ISD::TLSADDR;
1885718875

18858-
if (InGlue) {
18859-
SDValue Ops[] = { Chain, TGA, *InGlue };
18876+
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
18877+
if (LoadGlobalBaseReg) {
18878+
SDValue InGlue;
18879+
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
18880+
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
18881+
InGlue);
18882+
InGlue = Chain.getValue(1);
18883+
SDValue Ops[] = {Chain, TGA, InGlue};
1886018884
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
1886118885
} else {
18862-
SDValue Ops[] = { Chain, TGA };
18886+
SDValue Ops[] = {Chain, TGA};
1886318887
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
1886418888
}
18889+
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), dl);
1886518890

1886618891
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1886718892
MFI.setAdjustsStack(true);
@@ -18887,30 +18912,24 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
1888718912
static SDValue
1888818913
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1888918914
const EVT PtrVT) {
18890-
SDValue InGlue;
18891-
SDLoc dl(GA); // ? function entry point might be better
18892-
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
18893-
DAG.getNode(X86ISD::GlobalBaseReg,
18894-
SDLoc(), PtrVT), InGlue);
18895-
InGlue = Chain.getValue(1);
18896-
18897-
return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
18915+
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
18916+
X86II::MO_TLSGD, /*LoadGlobalBaseReg=*/true);
1889818917
}
1889918918

1890018919
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
1890118920
static SDValue
1890218921
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1890318922
const EVT PtrVT) {
18904-
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
18905-
X86::RAX, X86II::MO_TLSGD);
18923+
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::RAX,
18924+
X86II::MO_TLSGD);
1890618925
}
1890718926

1890818927
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
1890918928
static SDValue
1891018929
LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1891118930
const EVT PtrVT) {
18912-
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
18913-
X86::EAX, X86II::MO_TLSGD);
18931+
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
18932+
X86II::MO_TLSGD);
1891418933
}
1891518934

1891618935
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
@@ -18919,22 +18938,20 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
1891918938
SDLoc dl(GA);
1892018939

1892118940
// Get the start address of the TLS block for this module.
18922-
X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
18923-
.getInfo<X86MachineFunctionInfo>();
18941+
X86MachineFunctionInfo *MFI =
18942+
DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
1892418943
MFI->incNumLocalDynamicTLSAccesses();
1892518944

1892618945
SDValue Base;
1892718946
if (Is64Bit) {
1892818947
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
18929-
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
18930-
X86II::MO_TLSLD, /*LocalDynamic=*/true);
18948+
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, ReturnReg,
18949+
X86II::MO_TLSLD, /*LoadGlobalBaseReg=*/false,
18950+
/*LocalDynamic=*/true);
1893118951
} else {
18932-
SDValue InGlue;
18933-
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
18934-
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
18935-
InGlue = Chain.getValue(1);
18936-
Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
18937-
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
18952+
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, PtrVT, X86::EAX,
18953+
X86II::MO_TLSLDM, /*LoadGlobalBaseReg=*/true,
18954+
/*LocalDynamic=*/true);
1893818955
}
1893918956

1894018957
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -36059,36 +36076,6 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
3605936076
return BB;
3606036077
}
3606136078

36062-
MachineBasicBlock *
36063-
X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
36064-
MachineBasicBlock *BB) const {
36065-
// So, here we replace TLSADDR with the sequence:
36066-
// adjust_stackdown -> TLSADDR -> adjust_stackup.
36067-
// We need this because TLSADDR is lowered into calls
36068-
// inside MC, therefore without the two markers shrink-wrapping
36069-
// may push the prologue/epilogue pass them.
36070-
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
36071-
const MIMetadata MIMD(MI);
36072-
MachineFunction &MF = *BB->getParent();
36073-
36074-
// Emit CALLSEQ_START right before the instruction.
36075-
MF.getFrameInfo().setAdjustsStack(true);
36076-
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
36077-
MachineInstrBuilder CallseqStart =
36078-
BuildMI(MF, MIMD, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
36079-
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
36080-
36081-
// Emit CALLSEQ_END right after the instruction.
36082-
// We don't call erase from parent because we want to keep the
36083-
// original instruction around.
36084-
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
36085-
MachineInstrBuilder CallseqEnd =
36086-
BuildMI(MF, MIMD, TII.get(AdjStackUp)).addImm(0).addImm(0);
36087-
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
36088-
36089-
return BB;
36090-
}
36091-
3609236079
MachineBasicBlock *
3609336080
X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
3609436081
MachineBasicBlock *BB) const {
@@ -37091,16 +37078,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3709137078
return X86::TMM0_TMM1 + Imm / 2;
3709237079
};
3709337080
switch (MI.getOpcode()) {
37094-
default: llvm_unreachable("Unexpected instr type to insert");
37095-
case X86::TLS_addr32:
37096-
case X86::TLS_addr64:
37097-
case X86::TLS_addrX32:
37098-
case X86::TLS_base_addr32:
37099-
case X86::TLS_base_addr64:
37100-
case X86::TLS_base_addrX32:
37101-
case X86::TLS_desc32:
37102-
case X86::TLS_desc64:
37103-
return EmitLoweredTLSAddr(MI, BB);
37081+
default:
37082+
llvm_unreachable("Unexpected instr type to insert");
3710437083
case X86::INDIRECT_THUNK_CALL32:
3710537084
case X86::INDIRECT_THUNK_CALL64:
3710637085
case X86::INDIRECT_THUNK_TCRETURN32:

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,9 +1848,6 @@ namespace llvm {
18481848
MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
18491849
MachineBasicBlock *BB) const;
18501850

1851-
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1852-
MachineBasicBlock *BB) const;
1853-
18541851
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
18551852
MachineBasicBlock *BB) const;
18561853

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
478478
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
479479
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
480480
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
481-
usesCustomInserter = 1, Uses = [ESP, SSP] in {
481+
Uses = [ESP, SSP] in {
482482
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
483483
"# TLS_addr32",
484484
[(X86tlsaddr tls32addr:$sym)]>,
@@ -498,7 +498,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
498498
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
499499
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
500500
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
501-
usesCustomInserter = 1, Uses = [RSP, SSP] in {
501+
Uses = [RSP, SSP] in {
502502
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
503503
"# TLS_addr64",
504504
[(X86tlsaddr tls64addr:$sym)]>,
@@ -520,7 +520,7 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
520520
// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent
521521
// stack-pointer assignments that appear immediately before calls from
522522
// potentially appearing dead.
523-
let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in {
523+
let Defs = [EAX, EFLAGS], Uses = [RSP, SSP] in {
524524
def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
525525
"# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>;
526526
def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=x86_64 -verify-machineinstrs -relocation-model=pic < %s | FileCheck %s
3+
4+
; Passing a pointer to thread-local storage to a function can be problematic
5+
; since computing such addresses requires a function call that is introduced
6+
; very late in instruction selection. We need to ensure that we don't introduce
7+
; nested call sequence markers if this function call happens in a call sequence.
8+
9+
@TLS = internal thread_local global i64 zeroinitializer, align 8
10+
declare void @bar(ptr)
11+
define internal void @foo() {
12+
; CHECK-LABEL: foo:
13+
; CHECK: # %bb.0:
14+
; CHECK-NEXT: pushq %rbx
15+
; CHECK-NEXT: .cfi_def_cfa_offset 16
16+
; CHECK-NEXT: .cfi_offset %rbx, -16
17+
; CHECK-NEXT: leaq TLS@TLSLD(%rip), %rdi
18+
; CHECK-NEXT: callq __tls_get_addr@PLT
19+
; CHECK-NEXT: leaq TLS@DTPOFF(%rax), %rbx
20+
; CHECK-NEXT: movq %rbx, %rdi
21+
; CHECK-NEXT: callq bar@PLT
22+
; CHECK-NEXT: movq %rbx, %rdi
23+
; CHECK-NEXT: callq bar@PLT
24+
; CHECK-NEXT: popq %rbx
25+
; CHECK-NEXT: .cfi_def_cfa_offset 8
26+
; CHECK-NEXT: retq
27+
call void @bar(ptr @TLS)
28+
call void @bar(ptr @TLS)
29+
ret void
30+
}

0 commit comments

Comments
 (0)