-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Add Support for X86 TLSDESC Relocations #83136
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang-driver Author: Phoebe Wang (phoebewang) ChangesFull diff: https://github.com/llvm/llvm-project/pull/83136.diff 7 Files Affected:
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index faceee85a2f8dc..c66e3ee12e50c4 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -740,7 +740,8 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC,
SupportedArgument = V == "desc" || V == "trad";
EnableTLSDESC = V == "desc";
} else if (Triple.isX86()) {
- SupportedArgument = V == "gnu";
+ SupportedArgument = V == "gnu" || V == "gnu2";
+ EnableTLSDESC = V == "gnu2";
} else {
Unsupported = true;
}
diff --git a/clang/test/Driver/tls-dialect.c b/clang/test/Driver/tls-dialect.c
index f73915b28ec2a3..a808dd81531ce7 100644
--- a/clang/test/Driver/tls-dialect.c
+++ b/clang/test/Driver/tls-dialect.c
@@ -2,6 +2,7 @@
// RUN: %clang -### --target=riscv64-linux -mtls-dialect=trad %s 2>&1 | FileCheck --check-prefix=NODESC %s
// RUN: %clang -### --target=riscv64-linux %s 2>&1 | FileCheck --check-prefix=NODESC %s
// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu %s 2>&1 | FileCheck --check-prefix=NODESC %s
+// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=DESC %s
/// Android supports TLSDESC by default on RISC-V
/// TLSDESC is not on by default in Linux, even on RISC-V, and is covered above
@@ -18,7 +19,6 @@
/// Unsupported argument
// RUN: not %clang -### --target=riscv64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
-// RUN: not %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
// DESC: "-cc1" {{.*}}"-enable-tlsdesc"
// NODESC-NOT: "-enable-tlsdesc"
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 4442b80861b61a..1877550f8c40bb 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -431,6 +431,20 @@ enum TOF {
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSLDM
MO_TLSLDM,
+ /// MO_TLSCALL - On a symbol operand this indicates that the immediate is
+ /// the index of the TLS descriptor function for the symbol. Used in both
+ /// the IA32 and x86-64 local dynamic TLS access model.
+ /// See 'RFC-TLSDESC-x86' for more details.
+ /// SYMBOL_LABEL @TLSCALL
+ MO_TLSCALL,
+ /// MO_TLSDESC - On a symbol operand this indicates that the immediate is
+ /// the index of the TLS descriptor argument for the symbol. When this
+ /// argument is passed to a call getting from index@TLSCALL, the function will
+ /// return the offset for the symbol. Used in both the IA32 and x86-64 local
+ /// dynamic TLS access model.
+ /// See 'RFC-TLSDESC-x86' for more details.
+ /// SYMBOL_LABEL @TLSDESC
+ MO_TLSDESC,
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the thread-pointer offset for the
/// symbol. Used in the x86-64 initial exec TLS access model.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 3395a13545e454..d8e111db1cec42 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -271,6 +271,8 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
+ case X86II::MO_TLSDESC: O << "@TLSDESC"; break;
+ case X86II::MO_TLSCALL: O << "@TLSCALL"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
case X86II::MO_TPOFF: O << "@TPOFF"; break;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a86f13135173b0..88314bcf510e9a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18515,17 +18515,17 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
}
-static SDValue
-GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
- SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
- unsigned char OperandFlags, bool LocalDynamic = false) {
+static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
+ GlobalAddressSDNode *GA, SDValue *InGlue,
+ const EVT PtrVT, unsigned ReturnReg,
+ unsigned char OperandFlags, bool UseTLSDESC = false,
+ bool LocalDynamic = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(),
- OperandFlags);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(),
+ UseTLSDESC ? X86II::MO_TLSDESC : OperandFlags);
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
@@ -18543,13 +18543,27 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
MFI.setHasCalls(true);
SDValue Glue = Chain.getValue(1);
- return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+ SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+
+ if (!UseTLSDESC)
+ return Ret;
+
+ const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
+ MVT VT = Subtarget.isTarget64BitLP64() ? MVT::i64 : MVT::i32;
+ unsigned Seg = Subtarget.is64Bit() ? X86AS::FS : X86AS::GS;
+
+ Value *Ptr = Constant::getNullValue(PointerType::get(*DAG.getContext(), Seg));
+ SDValue Offset =
+ DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
+ MachinePointerInfo(Ptr));
+ return DAG.getNode(ISD::ADD, dl, VT, Ret, Offset);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
-static SDValue
-LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
SDValue InGlue;
SDLoc dl(GA); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
@@ -18557,23 +18571,26 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
- return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
+ return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
+ UseTLSDESC);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
-static SDValue
-LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
- X86::RAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
+ X86II::MO_TLSGD, UseTLSDESC);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
-static SDValue
-LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
- X86::EAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
+ X86II::MO_TLSGD, UseTLSDESC);
}
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
@@ -18590,14 +18607,16 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
if (Is64Bit) {
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
- X86II::MO_TLSLD, /*LocalDynamic=*/true);
+ X86II::MO_TLSLD, /*UseTLSDESC=*/false,
+ /*LocalDynamic=*/true);
} else {
SDValue InGlue;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
- X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+ X86II::MO_TLSLDM, /*UseTLSDESC=*/false,
+ /*LocalDynamic=*/true);
}
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -18684,21 +18703,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget.isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
+ bool UseTLSDESC = DAG.getTarget().useTLSDESC();
switch (model) {
- case TLSModel::GeneralDynamic:
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTarget64BitLP64())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
- return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
- }
- return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
- case TLSModel::LocalDynamic:
+ case TLSModel::LocalDynamic:
+ if (!UseTLSDESC)
return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
Subtarget.isTarget64BitLP64());
- case TLSModel::InitialExec:
- case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
- PositionIndependent);
+ [[fallthrough]];
+ case TLSModel::GeneralDynamic:
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTarget64BitLP64()) {
+ // auto PtrVT = getPointerTy(DAG.getDataLayout(), X86AS::FS);
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC);
+ }
+ return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC);
+ }
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC);
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
+ PositionIndependent);
}
llvm_unreachable("Unknown TLS model.");
}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index d3b7d97a83caf0..e447e17c2d7d09 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -257,6 +257,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLSLDM:
RefKind = MCSymbolRefExpr::VK_TLSLDM;
break;
+ case X86II::MO_TLSDESC:
+ RefKind = MCSymbolRefExpr::VK_TLSDESC;
+ break;
+ case X86II::MO_TLSCALL:
+ RefKind = MCSymbolRefExpr::VK_TLSCALL;
+ break;
case X86II::MO_GOTTPOFF:
RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
break;
@@ -524,13 +530,14 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
MI.getOpcode() == X86::TLS_base_addr64;
MCContext &Ctx = OutStreamer->getContext();
+ bool isTLSDESC = MI.getOperand(3).getTargetFlags() == X86II::MO_TLSDESC;
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
case X86::TLS_addr32:
case X86::TLS_addr64:
case X86::TLS_addrX32:
- SRVK = MCSymbolRefExpr::VK_TLSGD;
+ SRVK = isTLSDESC ? MCSymbolRefExpr::VK_TLSDESC : MCSymbolRefExpr::VK_TLSGD;
break;
case X86::TLS_base_addr32:
SRVK = MCSymbolRefExpr::VK_TLSLDM;
@@ -554,7 +561,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
Ctx.getAsmInfo()->canRelaxRelocations();
- if (Is64Bits) {
+ if (isTLSDESC) {
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
+ MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)),
+ MCSymbolRefExpr::VK_TLSCALL, Ctx);
+ EmitAndCountInstruction(
+ MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
+ .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+ .addReg(Is64Bits ? X86::RIP : X86::EBX)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Sym)
+ .addReg(0));
+ EmitAndCountInstruction(
+ MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m)
+ .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Expr)
+ .addReg(0));
+ } else if (Is64Bits) {
bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
if (NeedsPadding && Is64BitsLP64)
EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
new file mode 100644
index 00000000000000..00d8a3533a74d0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
+; RUN: llc -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+
+@x = thread_local global i32 0, align 4
+@y = internal thread_local global i32 0, align 4
+
+define ptr @f1() nounwind {
+; X86-LABEL: f1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L0$pb
+; X86-NEXT: .L0$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp0:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT: leal x@tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x@tlscall(%eax)
+; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f1:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: leal x@tlsdesc(%rip), %eax
+; X32-NEXT: callq *x@tlscall(%eax)
+; X32-NEXT: # kill: def $eax killed $eax def $rax
+; X32-NEXT: addl %fs:0, %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f1:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: leaq x@tlsdesc(%rip), %rax
+; X64-NEXT: callq *x@tlscall(%rax)
+; X64-NEXT: addq %fs:0, %rax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ ret ptr %1
+}
+
+define i32 @f2() nounwind {
+; X86-LABEL: f2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
+; X86-NEXT: calll .L1$pb
+; X86-NEXT: .L1$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp1:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
+; X86-NEXT: movl %gs:0, %esi
+; X86-NEXT: leal x@tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x@tlscall(%eax)
+; X86-NEXT: movl (%eax,%esi), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f2:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rbx
+; X32-NEXT: movl %fs:0, %ebx
+; X32-NEXT: leal x@tlsdesc(%rip), %eax
+; X32-NEXT: callq *x@tlscall(%eax)
+; X32-NEXT: movl (%eax,%ebx), %eax
+; X32-NEXT: popq %rbx
+; X32-NEXT: retq
+;
+; X64-LABEL: f2:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movq %fs:0, %rbx
+; X64-NEXT: leaq x@tlsdesc(%rip), %rax
+; X64-NEXT: callq *x@tlscall(%rax)
+; X64-NEXT: movl (%rax,%rbx), %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %2 = load i32, ptr %1
+ ret i32 %2
+}
+
+define ptr @f3() nounwind {
+; X86-LABEL: f3:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L2$pb
+; X86-NEXT: .L2$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp2:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L2$pb), %ebx
+; X86-NEXT: leal x@tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x@tlscall(%eax)
+; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f3:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: leal x@tlsdesc(%rip), %eax
+; X32-NEXT: callq *x@tlscall(%eax)
+; X32-NEXT: # kill: def $eax killed $eax def $rax
+; X32-NEXT: addl %fs:0, %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f3:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: leaq x@tlsdesc(%rip), %rax
+; X64-NEXT: callq *x@tlscall(%rax)
+; X64-NEXT: addq %fs:0, %rax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ ret ptr %1
+}
+
+define i32 @f4() nounwind {
+; X86-LABEL: f4:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
+; X86-NEXT: calll .L3$pb
+; X86-NEXT: .L3$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp3:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
+; X86-NEXT: movl %gs:0, %esi
+; X86-NEXT: leal x@tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x@tlscall(%eax)
+; X86-NEXT: movl (%eax,%esi), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f4:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rbx
+; X32-NEXT: movl %fs:0, %ebx
+; X32-NEXT: leal x@tlsdesc(%rip), %eax
+; X32-NEXT: callq *x@tlscall(%eax)
+; X32-NEXT: movl (%eax,%ebx), %eax
+; X32-NEXT: popq %rbx
+; X32-NEXT: retq
+;
+; X64-LABEL: f4:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movq %fs:0, %rbx
+; X64-NEXT: leaq x@tlsdesc(%rip), %rax
+; X64-NEXT: callq *x@tlscall(%rax)
+; X64-NEXT: movl (%rax,%rbx), %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %2 = load i32, ptr %1
+ ret i32 %2
+}
|
@llvm/pr-subscribers-clang Author: Phoebe Wang (phoebewang) ChangesFull diff: https://github.com/llvm/llvm-project/pull/83136.diff 7 Files Affected:
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index faceee85a2f8dc..c66e3ee12e50c4 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -740,7 +740,8 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC,
SupportedArgument = V == "desc" || V == "trad";
EnableTLSDESC = V == "desc";
} else if (Triple.isX86()) {
- SupportedArgument = V == "gnu";
+ SupportedArgument = V == "gnu" || V == "gnu2";
+ EnableTLSDESC = V == "gnu2";
} else {
Unsupported = true;
}
diff --git a/clang/test/Driver/tls-dialect.c b/clang/test/Driver/tls-dialect.c
index f73915b28ec2a3..a808dd81531ce7 100644
--- a/clang/test/Driver/tls-dialect.c
+++ b/clang/test/Driver/tls-dialect.c
@@ -2,6 +2,7 @@
// RUN: %clang -### --target=riscv64-linux -mtls-dialect=trad %s 2>&1 | FileCheck --check-prefix=NODESC %s
// RUN: %clang -### --target=riscv64-linux %s 2>&1 | FileCheck --check-prefix=NODESC %s
// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu %s 2>&1 | FileCheck --check-prefix=NODESC %s
+// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=DESC %s
/// Android supports TLSDESC by default on RISC-V
/// TLSDESC is not on by default in Linux, even on RISC-V, and is covered above
@@ -18,7 +19,6 @@
/// Unsupported argument
// RUN: not %clang -### --target=riscv64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
-// RUN: not %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
// DESC: "-cc1" {{.*}}"-enable-tlsdesc"
// NODESC-NOT: "-enable-tlsdesc"
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 4442b80861b61a..1877550f8c40bb 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -431,6 +431,20 @@ enum TOF {
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSLDM
MO_TLSLDM,
+ /// MO_TLSCALL - On a symbol operand this indicates that the immediate is
+ /// the index of the TLS descriptor function for the symbol. Used in both
+ /// the IA32 and x86-64 local dynamic TLS access model.
+ /// See 'RFC-TLSDESC-x86' for more details.
+ /// SYMBOL_LABEL @TLSCALL
+ MO_TLSCALL,
+ /// MO_TLSDESC - On a symbol operand this indicates that the immediate is
+ /// the index of the TLS descriptor argument for the symbol. When this
+ /// argument is passed to a call getting from index@TLSCALL, the function will
+ /// return the offset for the symbol. Used in both the IA32 and x86-64 local
+ /// dynamic TLS access model.
+ /// See 'RFC-TLSDESC-x86' for more details.
+ /// SYMBOL_LABEL @TLSDESC
+ MO_TLSDESC,
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the thread-pointer offset for the
/// symbol. Used in the x86-64 initial exec TLS access model.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 3395a13545e454..d8e111db1cec42 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -271,6 +271,8 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
+ case X86II::MO_TLSDESC: O << "@TLSDESC"; break;
+ case X86II::MO_TLSCALL: O << "@TLSCALL"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
case X86II::MO_TPOFF: O << "@TPOFF"; break;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a86f13135173b0..88314bcf510e9a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18515,17 +18515,17 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
}
-static SDValue
-GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
- SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
- unsigned char OperandFlags, bool LocalDynamic = false) {
+static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
+ GlobalAddressSDNode *GA, SDValue *InGlue,
+ const EVT PtrVT, unsigned ReturnReg,
+ unsigned char OperandFlags, bool UseTLSDESC = false,
+ bool LocalDynamic = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(),
- OperandFlags);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(),
+ UseTLSDESC ? X86II::MO_TLSDESC : OperandFlags);
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
@@ -18543,13 +18543,27 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
MFI.setHasCalls(true);
SDValue Glue = Chain.getValue(1);
- return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+ SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+
+ if (!UseTLSDESC)
+ return Ret;
+
+ const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
+ MVT VT = Subtarget.isTarget64BitLP64() ? MVT::i64 : MVT::i32;
+ unsigned Seg = Subtarget.is64Bit() ? X86AS::FS : X86AS::GS;
+
+ Value *Ptr = Constant::getNullValue(PointerType::get(*DAG.getContext(), Seg));
+ SDValue Offset =
+ DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
+ MachinePointerInfo(Ptr));
+ return DAG.getNode(ISD::ADD, dl, VT, Ret, Offset);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
-static SDValue
-LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
SDValue InGlue;
SDLoc dl(GA); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
@@ -18557,23 +18571,26 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
- return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
+ return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
+ UseTLSDESC);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
-static SDValue
-LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
- X86::RAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
+ X86II::MO_TLSGD, UseTLSDESC);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
-static SDValue
-LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
- const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
- X86::EAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ const EVT PtrVT,
+ bool UseTLSDESC) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
+ X86II::MO_TLSGD, UseTLSDESC);
}
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
@@ -18590,14 +18607,16 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
if (Is64Bit) {
unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
- X86II::MO_TLSLD, /*LocalDynamic=*/true);
+ X86II::MO_TLSLD, /*UseTLSDESC=*/false,
+ /*LocalDynamic=*/true);
} else {
SDValue InGlue;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
InGlue = Chain.getValue(1);
Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
- X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+ X86II::MO_TLSLDM, /*UseTLSDESC=*/false,
+ /*LocalDynamic=*/true);
}
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -18684,21 +18703,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget.isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
+ bool UseTLSDESC = DAG.getTarget().useTLSDESC();
switch (model) {
- case TLSModel::GeneralDynamic:
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTarget64BitLP64())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
- return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
- }
- return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
- case TLSModel::LocalDynamic:
+ case TLSModel::LocalDynamic:
+ if (!UseTLSDESC)
return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
Subtarget.isTarget64BitLP64());
- case TLSModel::InitialExec:
- case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
- PositionIndependent);
+ [[fallthrough]];
+ case TLSModel::GeneralDynamic:
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTarget64BitLP64()) {
+ // auto PtrVT = getPointerTy(DAG.getDataLayout(), X86AS::FS);
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC);
+ }
+ return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC);
+ }
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC);
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
+ PositionIndependent);
}
llvm_unreachable("Unknown TLS model.");
}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index d3b7d97a83caf0..e447e17c2d7d09 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -257,6 +257,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLSLDM:
RefKind = MCSymbolRefExpr::VK_TLSLDM;
break;
+ case X86II::MO_TLSDESC:
+ RefKind = MCSymbolRefExpr::VK_TLSDESC;
+ break;
+ case X86II::MO_TLSCALL:
+ RefKind = MCSymbolRefExpr::VK_TLSCALL;
+ break;
case X86II::MO_GOTTPOFF:
RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
break;
@@ -524,13 +530,14 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
MI.getOpcode() == X86::TLS_base_addr64;
MCContext &Ctx = OutStreamer->getContext();
+ bool isTLSDESC = MI.getOperand(3).getTargetFlags() == X86II::MO_TLSDESC;
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
case X86::TLS_addr32:
case X86::TLS_addr64:
case X86::TLS_addrX32:
- SRVK = MCSymbolRefExpr::VK_TLSGD;
+ SRVK = isTLSDESC ? MCSymbolRefExpr::VK_TLSDESC : MCSymbolRefExpr::VK_TLSGD;
break;
case X86::TLS_base_addr32:
SRVK = MCSymbolRefExpr::VK_TLSLDM;
@@ -554,7 +561,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
Ctx.getAsmInfo()->canRelaxRelocations();
- if (Is64Bits) {
+ if (isTLSDESC) {
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
+ MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)),
+ MCSymbolRefExpr::VK_TLSCALL, Ctx);
+ EmitAndCountInstruction(
+ MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
+ .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+ .addReg(Is64Bits ? X86::RIP : X86::EBX)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Sym)
+ .addReg(0));
+ EmitAndCountInstruction(
+ MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m)
+ .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Expr)
+ .addReg(0));
+ } else if (Is64Bits) {
bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
if (NeedsPadding && Is64BitsLP64)
EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
new file mode 100644
index 00000000000000..00d8a3533a74d0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
+; RUN: llc -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+
+@x = thread_local global i32 0, align 4
+@y = internal thread_local global i32 0, align 4
+
+define ptr @f1() nounwind {
+; X86-LABEL: f1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L0$pb
+; X86-NEXT: .L0$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp0:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT: leal x@tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x@tlscall(%eax)
+; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f1:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: leal x@tlsdesc(%rip), %eax
+; X32-NEXT: callq *x@tlscall(%eax)
+; X32-NEXT: # kill: def $eax killed $eax def $rax
+; X32-NEXT: addl %fs:0, %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f1:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: leaq x@tlsdesc(%rip), %rax
+; X64-NEXT: callq *x@tlscall(%rax)
+; X64-NEXT: addq %fs:0, %rax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ ret ptr %1
+}
+
+define i32 @f2() nounwind {
+; X86-LABEL: f2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
+; X86-NEXT: calll .L1$pb
+; X86-NEXT: .L1$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp1:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
+; X86-NEXT: movl %gs:0, %esi
+; X86-NEXT: leal x@tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x@tlscall(%eax)
+; X86-NEXT: movl (%eax,%esi), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f2:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rbx
+; X32-NEXT: movl %fs:0, %ebx
+; X32-NEXT: leal x@tlsdesc(%rip), %eax
+; X32-NEXT: callq *x@tlscall(%eax)
+; X32-NEXT: movl (%eax,%ebx), %eax
+; X32-NEXT: popq %rbx
+; X32-NEXT: retq
+;
+; X64-LABEL: f2:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movq %fs:0, %rbx
+; X64-NEXT: leaq x@tlsdesc(%rip), %rax
+; X64-NEXT: callq *x@tlscall(%rax)
+; X64-NEXT: movl (%rax,%rbx), %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %2 = load i32, ptr %1
+ ret i32 %2
+}
+
+define ptr @f3() nounwind {
+; X86-LABEL: f3:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: calll .L2$pb
+; X86-NEXT: .L2$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp2:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L2$pb), %ebx
+; X86-NEXT: leal x@tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x@tlscall(%eax)
+; X86-NEXT: addl %gs:0, %eax
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f3:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rax
+; X32-NEXT: leal x@tlsdesc(%rip), %eax
+; X32-NEXT: callq *x@tlscall(%eax)
+; X32-NEXT: # kill: def $eax killed $eax def $rax
+; X32-NEXT: addl %fs:0, %eax
+; X32-NEXT: popq %rcx
+; X32-NEXT: retq
+;
+; X64-LABEL: f3:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: leaq x@tlsdesc(%rip), %rax
+; X64-NEXT: callq *x@tlscall(%rax)
+; X64-NEXT: addq %fs:0, %rax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ ret ptr %1
+}
+
+define i32 @f4() nounwind {
+; X86-LABEL: f4:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
+; X86-NEXT: calll .L3$pb
+; X86-NEXT: .L3$pb:
+; X86-NEXT: popl %ebx
+; X86-NEXT: .Ltmp3:
+; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
+; X86-NEXT: movl %gs:0, %esi
+; X86-NEXT: leal x@tlsdesc(%ebx), %eax
+; X86-NEXT: calll *x@tlscall(%eax)
+; X86-NEXT: movl (%eax,%esi), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X32-LABEL: f4:
+; X32: # %bb.0:
+; X32-NEXT: pushq %rbx
+; X32-NEXT: movl %fs:0, %ebx
+; X32-NEXT: leal x@tlsdesc(%rip), %eax
+; X32-NEXT: callq *x@tlscall(%eax)
+; X32-NEXT: movl (%eax,%ebx), %eax
+; X32-NEXT: popq %rbx
+; X32-NEXT: retq
+;
+; X64-LABEL: f4:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movq %fs:0, %rbx
+; X64-NEXT: leaq x@tlsdesc(%rip), %rax
+; X64-NEXT: callq *x@tlscall(%rax)
+; X64-NEXT: movl (%rax,%rbx), %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %2 = load i32, ptr %1
+ ret i32 %2
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
421a5e4
to
cdc9ee6
Compare
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, | ||
SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg, | ||
unsigned char OperandFlags, bool LocalDynamic = false) { | ||
static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While changing signatures, rename this to getTLSADDR
(lowerCase)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
llvm/test/CodeGen/X86/tls-desc.ll
Outdated
@@ -0,0 +1,165 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | |||
; RUN: llc < %s -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
-unknown-unknown
can be removed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
llvm/test/CodeGen/X86/tls-desc.ll
Outdated
; RUN: llc < %s -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64 | ||
|
||
@x = thread_local global i32 0, align 4 | ||
@y = internal thread_local global i32 0, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GCC's x86-64 port assumes that FLAGS_REG and RAX are changed while all other registers are preserved.
Ideally, show that some registers are not clobbered.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the info! In that way, we need to define a new pseudo instruction that only clobbers the two register.
I'll try to do it in the next reversion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the update. We need at least 2 internal thread_local
to trigger general dynamic => local dynamic optimization, and demonstrate that TLSDESC optimizes local dynamic as well. The internal thread_local variables should be modified to prevent a future smarter Clang from optimizing out the TLS access.
GCC will generate leaq _TLS_MODULE_BASE_@TLSDESC(%rip), %rax
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
I don't see GCC generate leaq _TLS_MODULE_BASE_@TLSDESC(%rip), %rax
but optimized them away https://godbolt.org/z/Ksv43bsq6
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That example allows the variables to be optimized out entirely because there's no way they can ever be modified. https://godbolt.org/z/63oz4xrj1 shows an example where it's generating LD model accesses.
Note that in some cases I've seen GCC generate a local anchor symbol instead of using _TLS_MODULE_BASE_
. That is less optimal than using _TLS_MODULE_BASE_
, because multiple uses of _TLS_MODULE_BASE_
across multiple TUs can be resolved by the linker to a single GOT slot (pair), whereas a local anchor symbol will always have its own GOT slot (pair) that won't be shared with any other TU.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's worthwhile to have test cases that are dso_local
but not internal linkage as well, and mixing the two. These can and should also use _TLS_MODULE_BASE_
with the secondary dtpoff
offset after the TLSDESC call returns. e.g.
extern __thread int x __attribute__((visibility("hidden")));
static __thread int y;
int foo() { return ++x + ++y; }
The ideal thing is probably not to use _TLS_MODULE_BASE_
when its result is used only once in the function, regardless of the linkage details of the referenced TLS symbol. That's because a second addition with the dtpoff
value will always be needed for that. If it's a case where that addition is "free", e.g. because it can be rolled into the displacement of a load you're doing anyway, then using _TLS_MODULE_BASE_
is ideal even if it's the only use in the function (because the GOT slot & relocs will be shared with any other such use in the whole link). However, if it requires an extra instruction to do the add after the TLSDESC call returns, then it's probably better overall to have the separate reloc for just that access.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added in f5
.
The ideal thing is probably not to use
_TLS_MODULE_BASE_
when its result is used only once in the function
I think it is demonstrated by f3
?
Ping~ |
static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain, | ||
GlobalAddressSDNode *GA, SDValue *InGlue, | ||
const EVT PtrVT, unsigned ReturnReg, | ||
unsigned char OperandFlags, bool UseTLSDESC = false, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need a default arg for UseTLSDESC - all the calls to getTLSADDR seem to set it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. We don't need this arg now.
SupportedArgument = V == "gnu"; | ||
SupportedArgument = V == "gnu" || V == "gnu2"; | ||
EnableTLSDESC = V == "gnu2"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to make this option work uniformly with trad
and desc
too? The gnu
/gnu2
spelling is something we need to support for compatibility, but it would be good if we can get this to be consistent in clang. I'm hoping we can get GCC to accept the same set of options.
@MaskRay does that sound reasonable to you?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If GCC is happy to accept trad
and desc
aliases, then we can accept the aliases; otherwise, I do not want to introduce another way to express the same thing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume that's what you're doing in the place I asked for a comment is trying to avoid redundantly generating TLSDESC accesses for TLS_MODULE_BASE. I see that we don't generate multiple accesses for TLS_MODULE_BASE in the test cases, but I thought that was only safe to do when an earlier access dominates a later access.
Is my understanding here correct, or is there an interaction with
llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
Line 10245 in 04bbbba
I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg); |
if (UI != TGA->use_end()) | ||
return SDValue(*UI->use_begin()->use_begin(), 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we get a comment here about why we're returning this value?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Yes, you are correct. I think SelectionDAG takes care of it. I checked it locally, it will generate two pairs of I admit I didn't know the code here when adding it to GetTLSADDR. But I think not to create the nodes at the beginning is slightly better than to remove it later if they have the same output. Not to mention dominator iteration is expensive. |
|
||
Value *Ptr = Constant::getNullValue(PointerType::get(*DAG.getContext(), Seg)); | ||
SDValue Offset = | ||
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why add 0
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Quote from https://www.fsfla.org/~lxoliva/writeups/TLS/RFC-TLSDESC-x86.txt
An alternate design in which the function called through the TLS
descriptor returns not the TP offset, but rather the address of the
variable of interest, could refrain from adding %gs:0 to the value
returned by the call to compute the address of a symbol, and from
using the %gs: prefix when accessing the variable, but it would
require the use of a longer call instruction to enable proper
relaxation. The call instruction would have to be 7, instead of 2
bytes long, such that the linker could relax it to `addl %gs:0, %eax'.
This would make code that accesses the variable 4 bytes longer on
average (5 bytes minus one used by the %gs prefix), whereas code that
computes its address would be shorter by only two bytes. It's not
clear such a change would be profitable.
Gentle ping~ |
llvm/test/CodeGen/X86/tls-desc.ll
Outdated
ret i32 %3 | ||
} | ||
|
||
define i32 @f5() nounwind { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that f4 and f5 can be combined? Both y and z are of internal linkage, so there isn't any nuance. You could change one to protected visibility
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
No description provided.