Skip to content

[X86] Insert CALLSEQ when lowering GlobalTLSAddress for ELF targets #113706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 57 additions & 83 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18831,44 +18831,66 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
}

static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags, bool LocalDynamic = false) {
static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags,
bool LoadGlobalBaseReg = false,
bool LocalDynamic = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
SDValue TGA;
bool UseTLSDESC = DAG.getTarget().useTLSDESC();
SDValue Chain = DAG.getEntryNode();
SDValue Ret;
if (LocalDynamic && UseTLSDESC) {
TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags);
auto UI = TGA->use_begin();
// Reuse existing GetTLSADDR node if we can find it.
if (UI != TGA->use_end())
return SDValue(*UI->use_begin()->use_begin(), 0);
if (UI != TGA->use_end()) {
// TLSDESC uses TGA.
auto TLSDescOp = UI;
assert(TLSDescOp->getOpcode() == X86ISD::TLSDESC &&
"Unexpected TLSDESC DAG");
// CALLSEQ_END uses TGA via a chain and glue.
auto *CallSeqEndOp = TLSDescOp->getGluedUser();
assert(CallSeqEndOp && CallSeqEndOp->getOpcode() == ISD::CALLSEQ_END &&
"Unexpected TLSDESC DAG");
// CopyFromReg uses CALLSEQ_END via a chain and glue.
auto *CopyFromRegOp = CallSeqEndOp->getGluedUser();
assert(CopyFromRegOp && CopyFromRegOp->getOpcode() == ISD::CopyFromReg &&
"Unexpected TLSDESC DAG");
Ret = SDValue(CopyFromRegOp, 0);
}
} else {
TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
GA->getOffset(), OperandFlags);
}

X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC
: LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
if (!Ret) {
X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC
: LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;

if (InGlue) {
SDValue Ops[] = { Chain, TGA, *InGlue };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
} else {
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
}
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
if (LoadGlobalBaseReg) {
SDValue InGlue;
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT),
InGlue);
InGlue = Chain.getValue(1);
Chain = DAG.getNode(CallType, dl, NodeTys, {Chain, TGA, InGlue});
} else {
Chain = DAG.getNode(CallType, dl, NodeTys, {Chain, TGA});
}
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, Chain.getValue(1), dl);

// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MFI.setAdjustsStack(true);
MFI.setHasCalls(true);
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
MFI.setHasCalls(true);

SDValue Glue = Chain.getValue(1);
SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
SDValue Glue = Chain.getValue(1);
Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
}

if (!UseTLSDESC)
return Ret;
Expand All @@ -18887,30 +18909,22 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
static SDValue
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
SDValue InGlue;
SDLoc dl(GA); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);

return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD,
/*LoadGlobalBaseReg=*/true);
}

// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
static SDValue
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
X86::RAX, X86II::MO_TLSGD);
return GetTLSADDR(DAG, GA, PtrVT, X86::RAX, X86II::MO_TLSGD);
}

// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
static SDValue
LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
X86::EAX, X86II::MO_TLSGD);
return GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSGD);
}

static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
Expand All @@ -18919,22 +18933,20 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SDLoc dl(GA);

// Get the start address of the TLS block for this module.
X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
.getInfo<X86MachineFunctionInfo>();
X86MachineFunctionInfo *MFI =
DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();

SDValue Base;
if (Is64Bit) {
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
X86II::MO_TLSLD, /*LocalDynamic=*/true);
Base = GetTLSADDR(DAG, GA, PtrVT, ReturnReg, X86II::MO_TLSLD,
/*LoadGlobalBaseReg=*/false,
/*LocalDynamic=*/true);
} else {
SDValue InGlue;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
Base = GetTLSADDR(DAG, GA, PtrVT, X86::EAX, X86II::MO_TLSLDM,
/*LoadGlobalBaseReg=*/true,
/*LocalDynamic=*/true);
}

// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
Expand Down Expand Up @@ -36059,36 +36071,6 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
return BB;
}

MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *BB) const {
// So, here we replace TLSADDR with the sequence:
// adjust_stackdown -> TLSADDR -> adjust_stackup.
// We need this because TLSADDR is lowered into calls
// inside MC, therefore without the two markers shrink-wrapping
// may push the prologue/epilogue pass them.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MIMetadata MIMD(MI);
MachineFunction &MF = *BB->getParent();

// Emit CALLSEQ_START right before the instruction.
MF.getFrameInfo().setAdjustsStack(true);
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
MachineInstrBuilder CallseqStart =
BuildMI(MF, MIMD, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);

// Emit CALLSEQ_END right after the instruction.
// We don't call erase from parent because we want to keep the
// original instruction around.
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
MachineInstrBuilder CallseqEnd =
BuildMI(MF, MIMD, TII.get(AdjStackUp)).addImm(0).addImm(0);
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);

return BB;
}

MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const {
Expand Down Expand Up @@ -37091,16 +37073,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return X86::TMM0_TMM1 + Imm / 2;
};
switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected instr type to insert");
case X86::TLS_addr32:
case X86::TLS_addr64:
case X86::TLS_addrX32:
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
case X86::TLS_base_addrX32:
case X86::TLS_desc32:
case X86::TLS_desc64:
return EmitLoweredTLSAddr(MI, BB);
default:
llvm_unreachable("Unexpected instr type to insert");
case X86::INDIRECT_THUNK_CALL32:
case X86::INDIRECT_THUNK_CALL64:
case X86::INDIRECT_THUNK_TCRETURN32:
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1848,9 +1848,6 @@ namespace llvm {
MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const;

MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *BB) const;

MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const;

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
usesCustomInserter = 1, Uses = [ESP, SSP] in {
Uses = [ESP, SSP] in {
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_addr32",
[(X86tlsaddr tls32addr:$sym)]>,
Expand All @@ -498,7 +498,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
usesCustomInserter = 1, Uses = [RSP, SSP] in {
Uses = [RSP, SSP] in {
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,
Expand All @@ -520,7 +520,7 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent
// stack-pointer assignments that appear immediately before calls from
// potentially appearing dead.
let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in {
let Defs = [EAX, EFLAGS], Uses = [RSP, SSP] in {
def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>;
def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/X86/tls-function-argument.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=x86_64 -verify-machineinstrs -relocation-model=pic < %s | FileCheck %s

; Passing a pointer to thread-local storage to a function can be problematic
; since computing such addresses requires a function call that is introduced
; very late in instruction selection. We need to ensure that we don't introduce
; nested call sequence markers if this function call happens in a call sequence.

@TLS = internal thread_local global i64 zeroinitializer, align 8
declare void @bar(ptr)
define internal void @foo() {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: leaq TLS@TLSLD(%rip), %rdi
; CHECK-NEXT: callq __tls_get_addr@PLT
; CHECK-NEXT: leaq TLS@DTPOFF(%rax), %rbx
; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: callq bar@PLT
; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: callq bar@PLT
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
call void @bar(ptr @TLS)
call void @bar(ptr @TLS)
ret void
}
Loading