Skip to content

Commit ea126ae

Browse files
author
Felix (Ting Wang)
authored
[PowerPC] Tune AIX shared library TLS model at function level (llvm#84132)
Under some circumstance (library loaded with the main program), TLS initial-exec model can be applied to local-dynamic access(es). We could use some simple heuristic to decide the update at function level: * If there is equal or less than a number of TLS local-dynamic access(es) in the function, use TLS initial-exec model. (the threshold which default to 1 is controlled by hidden option)
1 parent 51f178d commit ea126ae

13 files changed

+859
-3
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5077,6 +5077,10 @@ def maix_small_local_dynamic_tls : Flag<["-"], "maix-small-local-dynamic-tls">,
50775077
"where the offset from the TLS base is encoded as an "
50785078
"immediate operand (AIX 64-bit only). "
50795079
"This access sequence is not used for variables larger than 32KB.">;
5080+
def maix_shared_lib_tls_model_opt : Flag<["-"], "maix-shared-lib-tls-model-opt">,
5081+
Group<m_ppc_Features_Group>,
5082+
HelpText<"For shared library loaded with the main program, change local-dynamic access(es) "
5083+
"to initial-exec access(es) at the function level (AIX 64-bit only).">;
50805084
def maix_struct_return : Flag<["-"], "maix-struct-return">,
50815085
Group<m_Group>, Visibility<[ClangOption, CC1Option]>,
50825086
HelpText<"Return all structs in memory (PPC32 only)">,

clang/lib/Basic/Targets/PPC.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
9191
IsISA3_1 = true;
9292
} else if (Feature == "+quadword-atomics") {
9393
HasQuadwordAtomics = true;
94+
} else if (Feature == "+aix-shared-lib-tls-model-opt") {
95+
HasAIXShLibTLSModelOpt = true;
9496
}
9597
// TODO: Finish this list and add an assert that we've handled them
9698
// all.
@@ -580,6 +582,9 @@ bool PPCTargetInfo::initFeatureMap(
580582
Features["aix-small-local-exec-tls"] = false;
581583
Features["aix-small-local-dynamic-tls"] = false;
582584

585+
// Turn off TLS model opt by default.
586+
Features["aix-shared-lib-tls-model-opt"] = false;
587+
583588
Features["spe"] = llvm::StringSwitch<bool>(CPU)
584589
.Case("8548", true)
585590
.Case("e500", true)
@@ -722,6 +727,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
722727
.Case("isa-v30-instructions", IsISA3_0)
723728
.Case("isa-v31-instructions", IsISA3_1)
724729
.Case("quadword-atomics", HasQuadwordAtomics)
730+
.Case("aix-shared-lib-tls-model-opt", HasAIXShLibTLSModelOpt)
725731
.Default(false);
726732
}
727733

clang/lib/Basic/Targets/PPC.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
8181
bool IsISA3_0 = false;
8282
bool IsISA3_1 = false;
8383
bool HasQuadwordAtomics = false;
84+
bool HasAIXShLibTLSModelOpt = false;
8485

8586
protected:
8687
std::string ABI;

llvm/lib/Target/PowerPC/PPC.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,12 @@ def FeatureAIXLocalDynamicTLS :
338338
"true", "Produce a faster local-dynamic TLS sequence for this "
339339
"function for 64-bit AIX">;
340340

341+
def FeatureAIXSharedLibTLSModelOpt :
342+
SubtargetFeature<"aix-shared-lib-tls-model-opt",
343+
"HasAIXShLibTLSModelOpt", "true",
344+
"Tune TLS model at function level in shared library loaded "
345+
"with the main program (for 64-bit AIX only)">;
346+
341347
def FeaturePredictableSelectIsExpensive :
342348
SubtargetFeature<"predictable-select-expensive",
343349
"PredictableSelectIsExpensive",

llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
878878
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE;
879879
if (Model == TLSModel::InitialExec)
880880
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE;
881+
// On AIX, TLS model opt may have turned local-dynamic accesses into
882+
// initial-exec accesses.
883+
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
884+
if (Model == TLSModel::LocalDynamic &&
885+
FuncInfo->isAIXFuncUseTLSIEForLD()) {
886+
LLVM_DEBUG(
887+
dbgs() << "Current function uses IE access for default LD vars.\n");
888+
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE;
889+
}
881890
llvm_unreachable("Only expecting local-exec or initial-exec accesses!");
882891
}
883892
// For GD TLS access on AIX, we have two TOC entries for the symbol (one for
@@ -2950,7 +2959,11 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
29502959
// Setup the csect for the current TC entry. If the variant kind is
29512960
// VK_PPC_AIX_TLSGDM the entry represents the region handle, we create a
29522961
// new symbol to prefix the name with a dot.
2953-
if (I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM) {
2962+
// If TLS model opt is turned on, create a new symbol to prefix the name
2963+
// with a dot.
2964+
if (I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
2965+
(Subtarget->hasAIXShLibTLSModelOpt() &&
2966+
I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)) {
29542967
SmallString<128> Name;
29552968
StringRef Prefix = ".";
29562969
Name += Prefix;

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,11 @@ static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
141141
"ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
142142
cl::desc("max depth when checking alias info in GatherAllAliases()"));
143143

144+
static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
145+
"ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
146+
cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
147+
"function to use initial-exec"));
148+
144149
STATISTIC(NumTailCalls, "Number of tail calls");
145150
STATISTIC(NumSiblingCalls, "Number of sibling calls");
146151
STATISTIC(ShufflesHandledWithVPERM,
@@ -3362,6 +3367,54 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
33623367
return LowerGlobalTLSAddressLinux(Op, DAG);
33633368
}
33643369

3370+
/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3371+
/// and then apply the update.
3372+
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3373+
SelectionDAG &DAG,
3374+
const TargetMachine &TM) {
3375+
// Initialize TLS model opt setting lazily:
3376+
// (1) Use initial-exec for single TLS var references within current function.
3377+
// (2) Use local-dynamic for multiple TLS var references within current
3378+
// function.
3379+
PPCFunctionInfo *FuncInfo =
3380+
DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3381+
if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3382+
SmallPtrSet<const GlobalValue *, 8> TLSGV;
3383+
// Iterate over all instructions within current function, collect all TLS
3384+
// global variables (global variables taken as the first parameter to
3385+
// Intrinsic::threadlocal_address).
3386+
const Function &Func = DAG.getMachineFunction().getFunction();
3387+
for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
3388+
++BI)
3389+
for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
3390+
II != IE; ++II)
3391+
if (II->getOpcode() == Instruction::Call)
3392+
if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
3393+
if (Function *CF = CI->getCalledFunction())
3394+
if (CF->isDeclaration() &&
3395+
CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3396+
if (const GlobalValue *GV =
3397+
dyn_cast<GlobalValue>(II->getOperand(0))) {
3398+
TLSModel::Model GVModel = TM.getTLSModel(GV);
3399+
if (GVModel == TLSModel::LocalDynamic)
3400+
TLSGV.insert(GV);
3401+
}
3402+
3403+
unsigned TLSGVCnt = TLSGV.size();
3404+
LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3405+
if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3406+
FuncInfo->setAIXFuncUseTLSIEForLD();
3407+
FuncInfo->setAIXFuncTLSModelOptInitDone();
3408+
}
3409+
3410+
if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3411+
LLVM_DEBUG(
3412+
dbgs() << DAG.getMachineFunction().getName()
3413+
<< " function is using the TLS-IE model for TLS-LD access.\n");
3414+
Model = TLSModel::InitialExec;
3415+
}
3416+
}
3417+
33653418
SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
33663419
SelectionDAG &DAG) const {
33673420
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
@@ -3374,6 +3427,11 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
33743427
EVT PtrVT = getPointerTy(DAG.getDataLayout());
33753428
bool Is64Bit = Subtarget.isPPC64();
33763429
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3430+
3431+
// Apply update to the TLS model.
3432+
if (Subtarget.hasAIXShLibTLSModelOpt())
3433+
updateForAIXShLibTLSModelOpt(Model, DAG, getTargetMachine());
3434+
33773435
bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
33783436

33793437
if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {

llvm/lib/Target/PowerPC/PPCMCInstLower.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "MCTargetDesc/PPCMCExpr.h"
1515
#include "PPC.h"
16+
#include "PPCMachineFunctionInfo.h"
1617
#include "PPCSubtarget.h"
1718
#include "llvm/ADT/SmallString.h"
1819
#include "llvm/ADT/Twine.h"
@@ -81,6 +82,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
8182
}
8283

8384
const TargetMachine &TM = Printer.TM;
85+
const MachineInstr *MI = MO.getParent();
86+
const MachineFunction *MF = MI->getMF();
8487

8588
if (MO.getTargetFlags() == PPCII::MO_PLT)
8689
RefKind = MCSymbolRefExpr::VK_PLT;
@@ -100,18 +103,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
100103
MO.getTargetFlags() == PPCII::MO_TLSLD_FLAG) {
101104
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
102105
TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
106+
const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
103107
// For the local-[exec|dynamic] TLS model, we may generate the offset from
104108
// the TLS base as an immediate operand (instead of using a TOC entry). Set
105109
// the relocation type in case the result is used for purposes other than a
106110
// TOC reference. In TOC reference cases, this result is discarded.
107111
if (Model == TLSModel::LocalExec)
108112
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
113+
else if (Model == TLSModel::LocalDynamic &&
114+
FuncInfo->isAIXFuncUseTLSIEForLD())
115+
// On AIX, TLS model opt may have turned local-dynamic accesses into
116+
// initial-exec accesses.
117+
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSIE;
109118
else if (Model == TLSModel::LocalDynamic)
110119
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLD;
111120
}
112121

113-
const MachineInstr *MI = MO.getParent();
114-
const MachineFunction *MF = MI->getMF();
115122
const Module *M = MF->getFunction().getParent();
116123
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
117124

llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ class PPCFunctionInfo : public MachineFunctionInfo {
150150
/// to use SExt/ZExt flags in later optimization.
151151
std::vector<std::pair<Register, ISD::ArgFlagsTy>> LiveInAttrs;
152152

153+
/// Flags for aix-shared-lib-tls-model-opt, will be lazily initialized for
154+
/// each function.
155+
bool AIXFuncUseTLSIEForLD = false;
156+
bool AIXFuncTLSModelOptInitDone = false;
157+
153158
public:
154159
explicit PPCFunctionInfo(const Function &F, const TargetSubtargetInfo *STI);
155160

@@ -221,6 +226,13 @@ class PPCFunctionInfo : public MachineFunctionInfo {
221226
void setHasFastCall() { HasFastCall = true; }
222227
bool hasFastCall() const { return HasFastCall;}
223228

229+
void setAIXFuncTLSModelOptInitDone() { AIXFuncTLSModelOptInitDone = true; }
230+
bool isAIXFuncTLSModelOptInitDone() const {
231+
return AIXFuncTLSModelOptInitDone;
232+
}
233+
void setAIXFuncUseTLSIEForLD() { AIXFuncUseTLSIEForLD = true; }
234+
bool isAIXFuncUseTLSIEForLD() const { return AIXFuncUseTLSIEForLD; }
235+
224236
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
225237
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
226238

llvm/lib/Target/PowerPC/PPCSubtarget.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,11 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
141141
"-data-sections.\n",
142142
false);
143143
}
144+
145+
if (HasAIXShLibTLSModelOpt && (!TargetTriple.isOSAIX() || !IsPPC64))
146+
report_fatal_error("The aix-shared-lib-tls-model-opt attribute "
147+
"is only supported on AIX in 64-bit mode.\n",
148+
false);
144149
}
145150

146151
bool PPCSubtarget::enableMachineScheduler() const { return true; }
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
3+
; RUN: -mattr=+aix-shared-lib-tls-model-opt --code-model=large < %s | FileCheck %s --check-prefixes=OPT
4+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
5+
; RUN: -mattr=+aix-small-local-dynamic-tls --code-model=large < %s | FileCheck %s --check-prefixes=SMALL
6+
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
7+
; RUN: -mattr=+aix-shared-lib-tls-model-opt -mattr=+aix-small-local-dynamic-tls \
8+
; RUN: --code-model=large < %s | FileCheck %s --check-prefixes=BOTH
9+
10+
@VarTLSLD1 = internal thread_local(localdynamic) global i32 42, align 4
11+
12+
define i32 @Single_LD(i32 %P, i32 %Q) {
13+
; OPT-LABEL: Single_LD:
14+
; OPT: # %bb.0: # %entry
15+
; OPT-NEXT: and 4, 3, 4
16+
; OPT-NEXT: addis 3, L..C0@u(2)
17+
; OPT-NEXT: ld 3, L..C0@l(3)
18+
; OPT-NEXT: cmpwi 4, -1
19+
; OPT-NEXT: lwzx 3, 13, 3
20+
; OPT-NEXT: blr
21+
;
22+
; SMALL-LABEL: Single_LD:
23+
; SMALL: # %bb.0: # %entry
24+
; SMALL-NEXT: mflr 0
25+
; SMALL-NEXT: stdu 1, -48(1)
26+
; SMALL-NEXT: and 6, 3, 4
27+
; SMALL-NEXT: addis 3, L..C0@u(2)
28+
; SMALL-NEXT: std 0, 64(1)
29+
; SMALL-NEXT: ld 3, L..C0@l(3)
30+
; SMALL-NEXT: bla .__tls_get_mod[PR]
31+
; SMALL-NEXT: cmpwi 6, -1
32+
; SMALL-NEXT: lwz 3, VarTLSLD1[TL]@ld(3)
33+
; SMALL-NEXT: addi 1, 1, 48
34+
; SMALL-NEXT: ld 0, 16(1)
35+
; SMALL-NEXT: mtlr 0
36+
; SMALL-NEXT: blr
37+
;
38+
; BOTH-LABEL: Single_LD:
39+
; BOTH: # %bb.0: # %entry
40+
; BOTH-NEXT: and 4, 3, 4
41+
; BOTH-NEXT: addis 3, L..C0@u(2)
42+
; BOTH-NEXT: ld 3, L..C0@l(3)
43+
; BOTH-NEXT: cmpwi 4, -1
44+
; BOTH-NEXT: lwzx 3, 13, 3
45+
; BOTH-NEXT: blr
46+
entry:
47+
%a = icmp slt i32 %P, 0
48+
%b = icmp slt i32 %Q, 0
49+
%c = and i1 %a, %b
50+
%tls1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @VarTLSLD1)
51+
%load1 = load i32, ptr %tls1, align 4
52+
br i1 %c, label %bb1, label %return
53+
54+
bb1:
55+
%tls2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @VarTLSLD1)
56+
%load2 = load i32, ptr %tls2, align 4
57+
ret i32 %load2
58+
59+
return:
60+
ret i32 %load1
61+
}
62+
63+
; OPT-LABEL: .toc
64+
; OPT-LABEL: L..C0:
65+
; OPT-NEXT: .tc VarTLSLD1[TE],VarTLSLD1[TL]@ie
66+
67+
; SMALL-LABEL: .toc
68+
; SMALL-LABEL: L..C0:
69+
; SMALL-NEXT: .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
70+
; SMALL-NEXT: .rename _Renamed..5f24__TLSML[TC],"_$TLSML"
71+
72+
; BOTH-LABEL: .toc
73+
; BOTH-LABEL: L..C0:
74+
; BOTH-NEXT: .tc VarTLSLD1[TE],VarTLSLD1[TL]@ie

0 commit comments

Comments
 (0)