Skip to content

[llvm][x86] Add TLSDESC support for X86 code generation #84085

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,18 @@ enum TOF {
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSLDM
MO_TLSLDM,
/// MO_TLSDESC - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the TLS index for the module that
/// contains the symbol. When this index is passed to a call to
/// the resolver function, it will return the offset from the thread pointer.
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSDESC
MO_TLSDESC,
/// MO_TLSCALL - On a symbol operand this indicates this call to
/// the resolver function, it will return the offset from the thread pointer.
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSCALL
MO_TLSCALL,
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the thread-pointer offset for the
/// symbol. Used in the x86-64 initial exec TLS access model.
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,12 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
case X86II::MO_TLSDESC:
O << "@TLSDESC";
break;
case X86II::MO_TLSCALL:
O << "@TLSCALL";
break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
case X86II::MO_TPOFF: O << "@TPOFF"; break;
Expand Down
205 changes: 156 additions & 49 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//

#include "X86ISelLowering.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "MCTargetDesc/X86ShuffleDecode.h"
#include "X86.h"
#include "X86CallingConv.h"
Expand Down Expand Up @@ -18622,6 +18624,47 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
X86::RAX, X86II::MO_TLSGD);
}

static SDValue LowerToTLSDESC(SDValue Sym, SelectionDAG &DAG, const EVT PtrVT,
bool Is64Bit) {

MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Chain = DAG.getEntryNode();
SDValue Target;
SDLoc DL(Sym);

if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Sym)) {
Target =
DAG.getTargetGlobalAddress(GA->getGlobal(), DL, GA->getValueType(0),
GA->getOffset(), X86II::MO_TLSDESC);
} else {
const auto *ES = cast<ExternalSymbolSDNode>(Sym);
Target =
DAG.getTargetExternalSymbol(ES->getSymbol(), PtrVT, X86II::MO_TLSDESC);
}

SDValue Offset = DAG.getNode(X86ISD::Wrapper, DL, PtrVT, Target);
SDValue Ops[] = {Chain, Offset};
Chain = DAG.getNode(X86ISD::TLSCALL, Target, NodeTys, Ops);

// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MFI.setAdjustsStack(true);
MFI.setHasCalls(true);
unsigned ReturnReg = Is64Bit ? X86::RAX : X86::EAX;

SDValue Glue = Chain.getValue(1);
Offset = DAG.getCopyFromReg(Chain, DL, ReturnReg, PtrVT, Glue);
// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
Value *Ptr = Constant::getNullValue(
PointerType::get(*DAG.getContext(), Is64Bit ? 257 : 256));

SDValue ThreadPointer =
DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), DAG.getIntPtrConstant(0, DL),
MachinePointerInfo(Ptr));

return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset);
}

// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
static SDValue
LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Expand All @@ -18641,7 +18684,12 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
MFI->incNumLocalDynamicTLSAccesses();

SDValue Base;
if (Is64Bit) {

if (DAG.getTarget().useTLSDESC()) {
SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
X86II::MO_TLSDESC);
Base = LowerToTLSDESC(SymAddr, DAG, PtrVT, Is64Bit);
} else if (Is64Bit) {
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
X86II::MO_TLSLD, /*LocalDynamic=*/true);
Expand Down Expand Up @@ -18740,6 +18788,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
if (DAG.getTarget().useTLSDESC())
return LowerToTLSDESC(Op, DAG, PtrVT, Subtarget.is64Bit());
if (Subtarget.is64Bit()) {
if (Subtarget.isTarget64BitLP64())
return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
Expand Down Expand Up @@ -35203,60 +35253,117 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const {
// This is pretty easy. We're taking the value that we received from
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
MachineFunction *F = BB->getParent();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
const MIMetadata MIMD(MI);

assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
assert(MI.getOperand(3).isGlobal() && "This should be a global");
assert((Subtarget.isTargetDarwin() || Subtarget.isTargetELF()) &&
"Incompatible Target for TLSCALL");
assert((MI.getOperand(3).isGlobal() || MI.getOperand(3).isSymbol()) &&
"This should be a global or symbol");

// Get a register mask for the lowered call.
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
Subtarget.is64Bit() ?
Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :
Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget.is64Bit()) {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else if (!isPositionIndependent()) {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
if (Subtarget.isTargetDarwin()) {
// This is pretty easy. We're taking the value that we received from
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.

// Get a register mask for the lowered call.
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
Subtarget.is64Bit()
? Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask()
: Subtarget.getRegisterInfo()->getCallPreservedMask(*F,
CallingConv::C);
if (Subtarget.is64Bit()) {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else if (!isPositionIndependent()) {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
}
} else {
MachineInstrBuilder MIB =
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
// For ELF TLS Descriptors, we create the correct ELF relocations so that
// we generate:
// lea a@tlsdesc(%rip), %rax
// call *a@tlsdesc(%rax)
// The TLSCALL will call a resolver function with a custom ABI that
// preserves all registers, and places the offset from the thread pointer
// in %rax/%eax.

// Get a register mask for the lowered call.
const uint32_t *RegMask = Subtarget.getRegisterInfo()->getCallPreservedMask(
*F, CallingConv::PreserveAll);
const unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
const unsigned IP = Subtarget.is64Bit() ? X86::RIP : X86::IP;
const auto LoadOp = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
const auto CallOp = Subtarget.is64Bit() ? X86::CALL64m : X86::CALL32m;

MachineOperand Sym = MI.getOperand(3);

MachineInstrBuilder MIB;
if (Sym.isGlobal()) {
MIB = BuildMI(*BB, MI, MIMD, TII->get(LoadOp), Reg)
.addReg(IP)
.addImm(0)
.addReg(0)
.addGlobalAddress(Sym.getGlobal(), 0, Sym.getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, MIMD, TII->get(CallOp))
.addReg(Reg)
.addImm(1)
.addReg(0)
.addGlobalAddress(Sym.getGlobal(), 0, X86II::MO_TLSCALL)
.addReg(0);

} else {
MIB = BuildMI(*BB, MI, MIMD, TII->get(LoadOp), Reg)
.addReg(IP)
.addImm(0)
.addReg(0)
.add({Sym})
.addReg(0);
Sym.setTargetFlags(X86II::MO_TLSCALL);
MIB = BuildMI(*BB, MI, MIMD, TII->get(CallOp))
.addReg(Reg)
.addImm(1)
.addReg(0)
.add({Sym})
.addReg(0);
}

MIB.addReg(Reg, RegState::ImplicitDefine).addRegMask(RegMask);
}

MI.eraseFromParent(); // The pseudo instruction is gone now.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10088,6 +10088,8 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_TLSGD, "x86-tlsgd"},
{MO_TLSLD, "x86-tlsld"},
{MO_TLSLDM, "x86-tlsldm"},
{MO_TLSDESC, "x86-tlsdesc"},
{MO_TLSCALL, "x86-tlscall"},
{MO_GOTTPOFF, "x86-gottpoff"},
{MO_INDNTPOFF, "x86-indntpoff"},
{MO_TPOFF, "x86-tpoff"},
Expand Down Expand Up @@ -10241,6 +10243,8 @@ struct LDTLSCleanup : public MachineFunctionPass {
switch (I->getOpcode()) {
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
case X86::TLSCall_32:
case X86::TLSCall_64:
if (TLSBaseAddrReg)
I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
else
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86MCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLSLDM:
RefKind = MCSymbolRefExpr::VK_TLSLDM;
break;
case X86II::MO_TLSDESC:
RefKind = MCSymbolRefExpr::VK_TLSDESC;
break;
case X86II::MO_TLSCALL:
RefKind = MCSymbolRefExpr::VK_TLSCALL;
break;
case X86II::MO_GOTTPOFF:
RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
break;
Expand Down
74 changes: 74 additions & 0 deletions llvm/test/CodeGen/X86/tlsdesc-dynamic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
; RUN: llc -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-tlsdesc %s -o - | FileCheck %s --check-prefixes=GD
; RUN: llc -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-tlsdesc -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefixes=GD-RELOC %s

@general_dynamic_var = external thread_local global i32

define i32 @test_generaldynamic() {
%val = load i32, ptr @general_dynamic_var
ret i32 %val
; GD: test_generaldynamic:
; GD: leaq general_dynamic_var@tlsdesc(%rip), [[REG:%.*]]
; GD-NEXT: callq *general_dynamic_var@tlscall([[REG]])
; GD-NEXT: movl %fs:([[REG]]),

; GD-RELOC: R_X86_64_GOTPC32_TLSDESC general_dynamic_var
; GD-RELOC: R_X86_64_TLSDESC_CALL general_dynamic_var
}

define ptr @test_generaldynamic_addr() {
ret ptr @general_dynamic_var
; GD: test_generaldynamic_addr:
; GD: leaq general_dynamic_var@tlsdesc(%rip), [[REG:%.*]]
; GD-NEXT: callq *general_dynamic_var@tlscall([[REG]])
; GD-NEXT: addq %fs:0, %rax

; GD-RELOC: R_X86_64_GOTPC32_TLSDESC general_dynamic_var
; GD-RELOC: R_X86_64_TLSDESC_CALL general_dynamic_var
}

@local_dynamic_var = external thread_local(localdynamic) global i32

define i32 @test_localdynamic() {
%val = load i32, ptr @local_dynamic_var
ret i32 %val
; GD: test_localdynamic:
; GD: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), [[REG:%.*]]
; GD-NEXT: callq *_TLS_MODULE_BASE_@tlscall([[REG]])
; GD-NEXT: movl %fs:local_dynamic_var@DTPOFF(%rax), %eax

; GD-RELOC: R_X86_64_GOTPC32_TLSDESC _TLS_MODULE_BASE_
; GD-RELOC: R_X86_64_TLSDESC_CALL _TLS_MODULE_BASE_
; GD-RELOC: R_X86_64_DTPOFF32 local_dynamic_var
}

define ptr @test_localdynamic_addr() {
ret ptr @local_dynamic_var
; GD: test_localdynamic_addr:
; GD: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), [[REG:%.*]]
; GD-NEXT: callq *_TLS_MODULE_BASE_@tlscall([[REG]])
; GD-NEXT: movq %fs:0, %rcx
; GD-NEXT: leaq local_dynamic_var@DTPOFF(%rcx,[[REG]])

; GD-RELOC: R_X86_64_GOTPC32_TLSDESC _TLS_MODULE_BASE_
; GD-RELOC: R_X86_64_TLSDESC_CALL _TLS_MODULE_BASE_
; GD-RELOC: R_X86_64_DTPOFF32 local_dynamic_var
}

@local_dynamic_var2 = external thread_local(localdynamic) global i32

define i32 @test_localdynamic_deduplicate() {
%val = load i32, ptr @local_dynamic_var
%val2 = load i32, ptr @local_dynamic_var2
%sum = add i32 %val, %val2
ret i32 %sum
; GD: test_localdynamic_deduplicate:
; GD: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), [[REG:%.*]]
; GD-NEXT: callq *_TLS_MODULE_BASE_@tlscall([[REG]])
; GD-NEXT: movl %fs:local_dynamic_var@DTPOFF(%rax)
; GD-NEXT: addl %fs:local_dynamic_var2@DTPOFF(%rax)

; GD-RELOC: R_X86_64_GOTPC32_TLSDESC _TLS_MODULE_BASE_
; GD-RELOC: R_X86_64_TLSDESC_CALL _TLS_MODULE_BASE_
; GD-RELOC: R_X86_64_DTPOFF32 local_dynamic_var2
}

Loading