Skip to content

Commit 09d51a8

Browse files
Felix (Ting Wang)amy-kwan
andauthored
[PowerPC][AIX] Enable aix-small-local-dynamic-tls target attribute (#86641)
Following the aix-small-local-exec-tls target attribute, this patch adds a target attribute for an AIX-specific option in llc that informs the compiler that it can use a faster access sequence for the local-dynamic TLS model (formally named aix-small-local-dynamic-tls) when TLS variables are less than ~32KB in size. The patch either produces an addi/la with a displacement off of module handle (return value from .__tls_get_mod) when the address is calculated, or it produces an addi/la followed by a load/store when the address is calculated and used for further accesses. --------- Co-authored-by: Amy Kwan <[email protected]>
1 parent bf1d7b8 commit 09d51a8

10 files changed

+1530
-396
lines changed

llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
7171
return {XCOFF::RelocationType::R_TOCL, SignAndSizeForHalf16};
7272
case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
7373
return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForHalf16};
74+
case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
75+
return {XCOFF::RelocationType::R_TLS_LD, SignAndSizeForHalf16};
7476
}
7577
} break;
7678
case PPC::fixup_ppc_half16ds:
@@ -86,6 +88,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
8688
return {XCOFF::RelocationType::R_TOCL, 15};
8789
case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
8890
return {XCOFF::RelocationType::R_TLS_LE, 15};
91+
case MCSymbolRefExpr::VK_PPC_AIX_TLSLD:
92+
return {XCOFF::RelocationType::R_TLS_LD, 15};
8993
}
9094
} break;
9195
case PPC::fixup_ppc_br24:

llvm/lib/Target/PowerPC/PPC.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,15 @@ def FeatureAIXLocalExecTLS :
329329
"Produce a TOC-free local-exec TLS sequence for this function "
330330
"for 64-bit AIX">;
331331

332+
// Specifies that local-dynamic TLS accesses in any function with this target
333+
// attribute should use the optimized sequence (where the offset is an immediate
334+
// off the module-handle for which the linker might add fix-up code for if the
335+
// immediate is too large).
336+
def FeatureAIXLocalDynamicTLS :
337+
SubtargetFeature<"aix-small-local-dynamic-tls", "HasAIXSmallLocalDynamicTLS",
338+
"true", "Produce a faster local-dynamic TLS sequence for this "
339+
"function for 64-bit AIX">;
340+
332341
def FeaturePredictableSelectIsExpensive :
333342
SubtargetFeature<"predictable-select-expensive",
334343
"PredictableSelectIsExpensive",

llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
803803
MCInst TmpInst;
804804
const bool IsPPC64 = Subtarget->isPPC64();
805805
const bool IsAIX = Subtarget->isAIXABI();
806-
const bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
806+
const bool HasAIXSmallLocalTLS = Subtarget->hasAIXSmallLocalExecTLS() ||
807+
Subtarget->hasAIXSmallLocalDynamicTLS();
807808
const Module *M = MF->getFunction().getParent();
808809
PICLevel::Level PL = M->getPICLevel();
809810

@@ -1612,19 +1613,19 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
16121613
case PPC::LFD:
16131614
case PPC::STFD:
16141615
case PPC::ADDI8: {
1615-
// A faster non-TOC-based local-exec sequence is represented by `addi`
1616-
// or a load/store instruction (that directly loads or stores off of the
1617-
// thread pointer) with an immediate operand having the MO_TPREL_FLAG.
1616+
// A faster non-TOC-based local-[exec|dynamic] sequence is represented by
1617+
// `addi` or a load/store instruction (that directly loads or stores off of
1618+
// the thread pointer) with an immediate operand having the MO_TPREL_FLAG.
16181619
// Such instructions do not otherwise arise.
1619-
if (!HasAIXSmallLocalExecTLS)
1620+
if (!HasAIXSmallLocalTLS)
16201621
break;
16211622
bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
16221623
unsigned OpNum = IsMIADDI8 ? 2 : 1;
16231624
const MachineOperand &MO = MI->getOperand(OpNum);
16241625
unsigned Flag = MO.getTargetFlags();
16251626
if (Flag == PPCII::MO_TPREL_FLAG ||
16261627
Flag == PPCII::MO_GOT_TPREL_PCREL_FLAG ||
1627-
Flag == PPCII::MO_TPREL_PCREL_FLAG) {
1628+
Flag == PPCII::MO_TPREL_PCREL_FLAG || Flag == PPCII::MO_TLSLD_FLAG) {
16281629
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
16291630

16301631
const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
@@ -1672,7 +1673,12 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
16721673

16731674
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
16741675
const GlobalValue *GValue = MO.getGlobal();
1675-
assert(TM.getTLSModel(GValue) == TLSModel::LocalExec &&
1676+
// TODO: Handle the aix-small-local-dynamic-tls non-zero offset case.
1677+
TLSModel::Model Model = TM.getTLSModel(GValue);
1678+
if (Model == TLSModel::LocalDynamic) {
1679+
return nullptr;
1680+
}
1681+
assert(Model == TLSModel::LocalExec &&
16761682
"Only local-exec accesses are handled!");
16771683

16781684
bool IsGlobalADeclaration = GValue->isDeclarationForLinker();

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
153153

154154
static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
155155

156-
// A faster local-exec TLS access sequence (enabled with the
157-
// -maix-small-local-exec-tls option) can be produced for TLS variables;
158-
// consistent with the IBM XL compiler, we apply a max size of slightly under
159-
// 32KB.
156+
// A faster local-[exec|dynamic] TLS access sequence (enabled with the
157+
// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
158+
// variables; consistent with the IBM XL compiler, we apply a max size of
159+
// slightly under 32KB.
160160
constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
161161

162162
// FIXME: Remove this once the bug has been fixed!
@@ -3434,6 +3434,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
34343434
}
34353435

34363436
if (Model == TLSModel::LocalDynamic) {
3437+
bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3438+
3439+
// We do not implement the 32-bit version of the faster access sequence
3440+
// for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3441+
if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3442+
report_fatal_error("The small-local-dynamic TLS access sequence is "
3443+
"currently only supported on AIX (64-bit mode).");
3444+
34373445
// For local-dynamic on AIX, we need to generate one TOC entry for each
34383446
// variable offset, and a single module-handle TOC entry for the entire
34393447
// file.
@@ -3454,6 +3462,22 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
34543462
SDValue ModuleHandle =
34553463
DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
34563464

3465+
// With the -maix-small-local-dynamic-tls option, produce a faster access
3466+
// sequence for local-dynamic TLS variables where the offset from the
3467+
// module-handle is encoded as an immediate operand.
3468+
//
3469+
// We only utilize the faster local-dynamic access sequence when the TLS
3470+
// variable has a size within the policy limit. We treat types that are
3471+
// not sized or are empty as being over the policy size limit.
3472+
if (HasAIXSmallLocalDynamicTLS) {
3473+
Type *GVType = GV->getValueType();
3474+
if (GVType->isSized() && !GVType->isEmptyTy() &&
3475+
GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
3476+
AIXSmallTlsPolicySizeLimit)
3477+
return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3478+
ModuleHandle);
3479+
}
3480+
34573481
return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
34583482
}
34593483

llvm/lib/Target/PowerPC/PPCMCInstLower.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,18 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
9696
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_PCREL;
9797
else if (MO.getTargetFlags() == PPCII::MO_GOT_TPREL_PCREL_FLAG)
9898
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL_PCREL;
99-
else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG) {
99+
else if (MO.getTargetFlags() == PPCII::MO_TPREL_FLAG ||
100+
MO.getTargetFlags() == PPCII::MO_TLSLD_FLAG) {
100101
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
101102
TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
102-
// For the local-exec TLS model, we may generate the offset from the TLS
103-
// base as an immediate operand (instead of using a TOC entry).
104-
// Set the relocation type in case the result is used for purposes other
105-
// than a TOC reference. In TOC reference cases, this result is discarded.
103+
// For the local-[exec|dynamic] TLS model, we may generate the offset from
104+
// the TLS base as an immediate operand (instead of using a TOC entry). Set
105+
// the relocation type in case the result is used for purposes other than a
106+
// TOC reference. In TOC reference cases, this result is discarded.
106107
if (Model == TLSModel::LocalExec)
107108
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
109+
else if (Model == TLSModel::LocalDynamic)
110+
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLD;
108111
}
109112

110113
const MachineInstr *MI = MO.getParent();

llvm/lib/Target/PowerPC/PPCSubtarget.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -124,22 +124,22 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
124124
// Determine endianness.
125125
IsLittleEndian = TM.isLittleEndian();
126126

127-
if (HasAIXSmallLocalExecTLS) {
127+
if (HasAIXSmallLocalExecTLS || HasAIXSmallLocalDynamicTLS) {
128128
if (!TargetTriple.isOSAIX() || !IsPPC64)
129-
report_fatal_error(
130-
"The aix-small-local-exec-tls attribute is only supported on AIX in "
131-
"64-bit mode.\n",
132-
false);
133-
// The aix-small-local-exec-tls attribute should only be used with
129+
report_fatal_error("The aix-small-local-[exec|dynamic]-tls attribute is "
130+
"only supported on AIX in "
131+
"64-bit mode.\n",
132+
false);
133+
// The aix-small-local-[exec|dynamic]-tls attribute should only be used with
134134
// -data-sections, as having data sections turned off with this option
135-
// is not ideal for performance. Moreover, the small-local-exec-tls region
136-
// is a limited resource, and should not be used for variables that may
137-
// be replaced.
135+
// is not ideal for performance. Moreover, the
136+
// small-local-[exec|dynamic]-tls region is a limited resource, and should
137+
// not be used for variables that may be replaced.
138138
if (!TM.getDataSections())
139-
report_fatal_error(
140-
"The aix-small-local-exec-tls attribute can only be specified with "
141-
"-data-sections.\n",
142-
false);
139+
report_fatal_error("The aix-small-local-[exec|dynamic]-tls attribute can "
140+
"only be specified with "
141+
"-data-sections.\n",
142+
false);
143143
}
144144
}
145145

0 commit comments

Comments
 (0)