Skip to content

Commit afa23ea

Browse files
authored
[X86] Insert CALLSEQ when lowering GlobalTLSAddress for ELF targets (#113706)
When lowering a TLS address for an ELF target, we introduce a call to obtain the TLS base address. So far, we do not insert CALLSEQ_START/END markers around this call when it is generated, but use a custom inserter to insert them in a later phase. This is problematic, since the TLS address call can land in a CALLSEQ for another call before it is wrapped in its own CALLSEQ. That results in nested CALLSEQs, which are illegal and cause errors when expensive checks are enabled, e.g., in issues #45574 and #98042. This patch instead wraps each TLS address call in a CALLSEQ when it is generated so that instruction selection can avoid nested CALLSEQs. This is an alternative to PR #106965, which instead changes the custom inserter to avoid generating CALLSEQs when the TLS address call is already in a CALLSEQ. This patch also effectively reverts commit [228978c](228978c), which introduced the CustomInserter that so far added the CALLSEQ around TLSAddrs. Fixes #45574 and #98042.
1 parent 5f30b1a commit afa23ea

File tree

4 files changed

+90
-89
lines changed

4 files changed

+90
-89
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 57 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -18831,44 +18831,66 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
1883118831
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
1883218832
}
1883318833

18834-
static SDValue
18835-
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
18836-
SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
18837-
unsigned char OperandFlags, bool LocalDynamic = false) {
18834+
static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
18835+
const EVT PtrVT, unsigned ReturnReg,
18836+
unsigned char OperandFlags,
18837+
bool LoadGlobalBaseReg = false,
18838+
bool LocalDynamic = false) {
1883818839
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1883918840
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1884018841
SDLoc dl(GA);
1884118842
SDValue TGA;
1884218843
bool UseTLSDESC = DAG.getTarget().useTLSDESC();
18844+
SDValue Chain = DAG.getEntryNode();
18845+
SDValue Ret;
1884318846
if (LocalDynamic && UseTLSDESC) {
1884418847
TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags);
1884518848
auto UI = TGA->use_begin();
1884618849
// Reuse existing GetTLSADDR node if we can find it.
18847-
if (UI != TGA->use_end())
18848-
return SDValue(*UI->use_begin()->use_begin(), 0);
18850+
if (UI != TGA->use_end()) {
18851+
// TLSDESC uses TGA.
18852+
auto TLSDescOp = UI;
18853+
assert(TLSDescOp->getOpcode() == X86ISD::TLSDESC &&
18854+
"Unexpected TLSDESC DAG");
18855+
// CALLSEQ_END uses TGA via a chain and glue.
18856+
auto *CallSeqEndOp = TLSDescOp->getGluedUser();
18857+
assert(CallSeqEndOp && CallSeqEndOp->getOpcode() == ISD::CALLSEQ_END &&
18858+
"Unexpected TLSDESC DAG");
18859+
// CopyFromReg uses CALLSEQ_END via a chain and glue.
18860+
auto *CopyFromRegOp = CallSeqEndOp->getGluedUser();
18861+
assert(CopyFromRegOp && CopyFromRegOp->getOpcode() == ISD::CopyFromReg &&
18862+
"Unexpected TLSDESC DAG");
18863+
Ret = SDValue(CopyFromRegOp, 0);
18864+
}
1884918865
} else {
1885018866
TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
1885118867
GA->getOffset(), OperandFlags);
1885218868
}
1885318869

18854-
X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC
18855-
: LocalDynamic ? X86ISD::TLSBASEADDR
18856-
: X86ISD::TLSADDR;
18870+
if (!Ret) {
18871+
X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC
18872+
: LocalDynamic ? X86ISD::TLSBASEADDR
18873+
: X86ISD::TLSADDR;
1885718874

18858-
if (InGlue) {
18859-
SDValue Ops[] = { Chain, TGA, *InGlue };
18860-
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
18861-
} else {
18862-
SDValue Ops[] = { Chain, TGA };
18863-
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
18864-
}
18875+
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
18876+
if (LoadGlobalBaseReg) {
18877+
SDValue InGlue;
18878+
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
18879+
DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT),
18880+
InGlue);
18881+
InGlue = Chain.getValue(1);
18882+
Chain = DAG.getNode(CallType, dl, NodeTys, {Chain, TGA, InGlue});
18883+
} else {
18884+
Chain = DAG.getNode(CallType, dl, NodeTys, {Chain, TGA});
18885+
}
18886+
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), dl);
1886518887

18866-
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
18867-
MFI.setAdjustsStack(true);
18868-
MFI.setHasCalls(true);
18888+
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
18889+
MFI.setHasCalls(true);
1886918890

18870-
SDValue Glue = Chain.getValue(1);
18871-
SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
18891+
SDValue Glue = Chain.getValue(1);
18892+
Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
18893+
}
1887218894

1887318895
if (!UseTLSDESC)
1887418896
return Ret;
@@ -18887,30 +18909,22 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
1888718909
static SDValue
1888818910
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1888918911
const EVT PtrVT) {
18890-
SDValue InGlue;
18891-
SDLoc dl(GA); // ? function entry point might be better
18892-
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
18893-
DAG.getNode(X86ISD::GlobalBaseReg,
18894-
SDLoc(), PtrVT), InGlue);
18895-
InGlue = Chain.getValue(1);
18896-
18897-
return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
18912+
return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD,
18913+
/*LoadGlobalBaseReg=*/true);
1889818914
}
1889918915

1890018916
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
1890118917
static SDValue
1890218918
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1890318919
const EVT PtrVT) {
18904-
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
18905-
X86::RAX, X86II::MO_TLSGD);
18920+
return GetTLSADDR(DAG, GA, PtrVT, X86::RAX, X86II::MO_TLSGD);
1890618921
}
1890718922

1890818923
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
1890918924
static SDValue
1891018925
LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
1891118926
const EVT PtrVT) {
18912-
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
18913-
X86::EAX, X86II::MO_TLSGD);
18927+
return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD);
1891418928
}
1891518929

1891618930
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
@@ -18919,22 +18933,20 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
1891918933
SDLoc dl(GA);
1892018934

1892118935
// Get the start address of the TLS block for this module.
18922-
X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
18923-
.getInfo<X86MachineFunctionInfo>();
18936+
X86MachineFunctionInfo *MFI =
18937+
DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
1892418938
MFI->incNumLocalDynamicTLSAccesses();
1892518939

1892618940
SDValue Base;
1892718941
if (Is64Bit) {
1892818942
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
18929-
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
18930-
X86II::MO_TLSLD, /*LocalDynamic=*/true);
18943+
Base = GetTLSADDR(DAG, GA, PtrVT, ReturnReg, X86II::MO_TLSLD,
18944+
/*LoadGlobalBaseReg=*/false,
18945+
/*LocalDynamic=*/true);
1893118946
} else {
18932-
SDValue InGlue;
18933-
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
18934-
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
18935-
InGlue = Chain.getValue(1);
18936-
Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
18937-
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
18947+
Base = GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSLDM,
18948+
/*LoadGlobalBaseReg=*/true,
18949+
/*LocalDynamic=*/true);
1893818950
}
1893918951

1894018952
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -36059,36 +36071,6 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
3605936071
return BB;
3606036072
}
3606136073

36062-
MachineBasicBlock *
36063-
X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
36064-
MachineBasicBlock *BB) const {
36065-
// So, here we replace TLSADDR with the sequence:
36066-
// adjust_stackdown -> TLSADDR -> adjust_stackup.
36067-
// We need this because TLSADDR is lowered into calls
36068-
// inside MC, therefore without the two markers shrink-wrapping
36069-
// may push the prologue/epilogue pass them.
36070-
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
36071-
const MIMetadata MIMD(MI);
36072-
MachineFunction &MF = *BB->getParent();
36073-
36074-
// Emit CALLSEQ_START right before the instruction.
36075-
MF.getFrameInfo().setAdjustsStack(true);
36076-
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
36077-
MachineInstrBuilder CallseqStart =
36078-
BuildMI(MF, MIMD, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
36079-
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
36080-
36081-
// Emit CALLSEQ_END right after the instruction.
36082-
// We don't call erase from parent because we want to keep the
36083-
// original instruction around.
36084-
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
36085-
MachineInstrBuilder CallseqEnd =
36086-
BuildMI(MF, MIMD, TII.get(AdjStackUp)).addImm(0).addImm(0);
36087-
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
36088-
36089-
return BB;
36090-
}
36091-
3609236074
MachineBasicBlock *
3609336075
X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
3609436076
MachineBasicBlock *BB) const {
@@ -37091,16 +37073,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3709137073
return X86::TMM0_TMM1 + Imm / 2;
3709237074
};
3709337075
switch (MI.getOpcode()) {
37094-
default: llvm_unreachable("Unexpected instr type to insert");
37095-
case X86::TLS_addr32:
37096-
case X86::TLS_addr64:
37097-
case X86::TLS_addrX32:
37098-
case X86::TLS_base_addr32:
37099-
case X86::TLS_base_addr64:
37100-
case X86::TLS_base_addrX32:
37101-
case X86::TLS_desc32:
37102-
case X86::TLS_desc64:
37103-
return EmitLoweredTLSAddr(MI, BB);
37076+
default:
37077+
llvm_unreachable("Unexpected instr type to insert");
3710437078
case X86::INDIRECT_THUNK_CALL32:
3710537079
case X86::INDIRECT_THUNK_CALL64:
3710637080
case X86::INDIRECT_THUNK_TCRETURN32:

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,9 +1848,6 @@ namespace llvm {
18481848
MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
18491849
MachineBasicBlock *BB) const;
18501850

1851-
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1852-
MachineBasicBlock *BB) const;
1853-
18541851
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
18551852
MachineBasicBlock *BB) const;
18561853

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
478478
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
479479
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
480480
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
481-
usesCustomInserter = 1, Uses = [ESP, SSP] in {
481+
Uses = [ESP, SSP] in {
482482
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
483483
"# TLS_addr32",
484484
[(X86tlsaddr tls32addr:$sym)]>,
@@ -498,7 +498,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
498498
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
499499
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
500500
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
501-
usesCustomInserter = 1, Uses = [RSP, SSP] in {
501+
Uses = [RSP, SSP] in {
502502
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
503503
"# TLS_addr64",
504504
[(X86tlsaddr tls64addr:$sym)]>,
@@ -520,7 +520,7 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
520520
// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent
521521
// stack-pointer assignments that appear immediately before calls from
522522
// potentially appearing dead.
523-
let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in {
523+
let Defs = [EAX, EFLAGS], Uses = [RSP, SSP] in {
524524
def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
525525
"# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>;
526526
def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=x86_64 -verify-machineinstrs -relocation-model=pic < %s | FileCheck %s
3+
4+
; Passing a pointer to thread-local storage to a function can be problematic
5+
; since computing such addresses requires a function call that is introduced
6+
; very late in instruction selection. We need to ensure that we don't introduce
7+
; nested call sequence markers if this function call happens in a call sequence.
8+
9+
@TLS = internal thread_local global i64 zeroinitializer, align 8
10+
declare void @bar(ptr)
11+
define internal void @foo() {
12+
; CHECK-LABEL: foo:
13+
; CHECK: # %bb.0:
14+
; CHECK-NEXT: pushq %rbx
15+
; CHECK-NEXT: .cfi_def_cfa_offset 16
16+
; CHECK-NEXT: .cfi_offset %rbx, -16
17+
; CHECK-NEXT: leaq TLS@TLSLD(%rip), %rdi
18+
; CHECK-NEXT: callq __tls_get_addr@PLT
19+
; CHECK-NEXT: leaq TLS@DTPOFF(%rax), %rbx
20+
; CHECK-NEXT: movq %rbx, %rdi
21+
; CHECK-NEXT: callq bar@PLT
22+
; CHECK-NEXT: movq %rbx, %rdi
23+
; CHECK-NEXT: callq bar@PLT
24+
; CHECK-NEXT: popq %rbx
25+
; CHECK-NEXT: .cfi_def_cfa_offset 8
26+
; CHECK-NEXT: retq
27+
call void @bar(ptr @TLS)
28+
call void @bar(ptr @TLS)
29+
ret void
30+
}

0 commit comments

Comments
 (0)