Skip to content

[PowerPC] Tune AIX shared library TLS model at function level #84132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5077,6 +5077,10 @@ def maix_small_local_dynamic_tls : Flag<["-"], "maix-small-local-dynamic-tls">,
"where the offset from the TLS base is encoded as an "
"immediate operand (AIX 64-bit only). "
"This access sequence is not used for variables larger than 32KB.">;
def maix_shared_lib_tls_model_opt : Flag<["-"], "maix-shared-lib-tls-model-opt">,
Group<m_ppc_Features_Group>,
HelpText<"For shared library loaded with the main program, change local-dynamic access(es) "
"to initial-exec access(es) at the function level (AIX 64-bit only).">;
def maix_struct_return : Flag<["-"], "maix-struct-return">,
Group<m_Group>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Return all structs in memory (PPC32 only)">,
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/PPC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
IsISA3_1 = true;
} else if (Feature == "+quadword-atomics") {
HasQuadwordAtomics = true;
} else if (Feature == "+aix-shared-lib-tls-model-opt") {
HasAIXShLibTLSModelOpt = true;
}
// TODO: Finish this list and add an assert that we've handled them
// all.
Expand Down Expand Up @@ -580,6 +582,9 @@ bool PPCTargetInfo::initFeatureMap(
Features["aix-small-local-exec-tls"] = false;
Features["aix-small-local-dynamic-tls"] = false;

// Turn off TLS model opt by default.
Features["aix-shared-lib-tls-model-opt"] = false;

Features["spe"] = llvm::StringSwitch<bool>(CPU)
.Case("8548", true)
.Case("e500", true)
Expand Down Expand Up @@ -722,6 +727,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
.Case("isa-v30-instructions", IsISA3_0)
.Case("isa-v31-instructions", IsISA3_1)
.Case("quadword-atomics", HasQuadwordAtomics)
.Case("aix-shared-lib-tls-model-opt", HasAIXShLibTLSModelOpt)
.Default(false);
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/PPC.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool IsISA3_0 = false;
bool IsISA3_1 = false;
bool HasQuadwordAtomics = false;
bool HasAIXShLibTLSModelOpt = false;

protected:
std::string ABI;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/PowerPC/PPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,12 @@ def FeatureAIXLocalDynamicTLS :
"true", "Produce a faster local-dynamic TLS sequence for this "
"function for 64-bit AIX">;

def FeatureAIXSharedLibTLSModelOpt :
SubtargetFeature<"aix-shared-lib-tls-model-opt",
"HasAIXShLibTLSModelOpt", "true",
"Tune TLS model at function level in shared library loaded "
"with the main program (for 64-bit AIX only)">;

def FeaturePredictableSelectIsExpensive :
SubtargetFeature<"predictable-select-expensive",
"PredictableSelectIsExpensive",
Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE;
if (Model == TLSModel::InitialExec)
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE;
// On AIX, TLS model opt may have turned local-dynamic accesses into
// initial-exec accesses.
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
if (Model == TLSModel::LocalDynamic &&
FuncInfo->isAIXFuncUseTLSIEForLD()) {
LLVM_DEBUG(
dbgs() << "Current function uses IE access for default LD vars.\n");
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE;
}
llvm_unreachable("Only expecting local-exec or initial-exec accesses!");
}
// For GD TLS access on AIX, we have two TOC entries for the symbol (one for
Expand Down Expand Up @@ -2950,7 +2959,11 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
// Setup the csect for the current TC entry. If the variant kind is
// VK_PPC_AIX_TLSGDM the entry represents the region handle, we create a
// new symbol to prefix the name with a dot.
if (I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM) {
// If TLS model opt is turned on, create a new symbol to prefix the name
// with a dot.
if (I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
(Subtarget->hasAIXShLibTLSModelOpt() &&
I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)) {
SmallString<128> Name;
StringRef Prefix = ".";
Name += Prefix;
Expand Down
58 changes: 58 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
"ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
cl::desc("max depth when checking alias info in GatherAllAliases()"));

static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
"ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
"function to use initial-exec"));

STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
STATISTIC(ShufflesHandledWithVPERM,
Expand Down Expand Up @@ -3362,6 +3367,54 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
return LowerGlobalTLSAddressLinux(Op, DAG);
}

/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
/// and then apply the update.
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
SelectionDAG &DAG,
const TargetMachine &TM) {
// Initialize TLS model opt setting lazily:
// (1) Use initial-exec for single TLS var references within current function.
// (2) Use local-dynamic for multiple TLS var references within current
// function.
PPCFunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
SmallPtrSet<const GlobalValue *, 8> TLSGV;
// Iterate over all instructions within current function, collect all TLS
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit on comment:

Suggested change
// Iterate over all instructions within current function, collect all TLS
// Iterate over all instructions within the current function and collect all TLS

// global variables (global variables taken as the first parameter to
// Intrinsic::threadlocal_address).
const Function &Func = DAG.getMachineFunction().getFunction();
for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
++BI)
for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
II != IE; ++II)
if (II->getOpcode() == Instruction::Call)
if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
if (Function *CF = CI->getCalledFunction())
if (CF->isDeclaration() &&
CF->getIntrinsicID() == Intrinsic::threadlocal_address)
if (const GlobalValue *GV =
dyn_cast<GlobalValue>(II->getOperand(0))) {
TLSModel::Model GVModel = TM.getTLSModel(GV);
if (GVModel == TLSModel::LocalDynamic)
TLSGV.insert(GV);
}

unsigned TLSGVCnt = TLSGV.size();
LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
FuncInfo->setAIXFuncUseTLSIEForLD();
FuncInfo->setAIXFuncTLSModelOptInitDone();
}

if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
LLVM_DEBUG(
dbgs() << DAG.getMachineFunction().getName()
<< " function is using the TLS-IE model for TLS-LD access.\n");
Model = TLSModel::InitialExec;
}
}

SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
Expand All @@ -3374,6 +3427,11 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
bool Is64Bit = Subtarget.isPPC64();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);

// Apply update to the TLS model.
if (Subtarget.hasAIXShLibTLSModelOpt())
updateForAIXShLibTLSModelOpt(Model, DAG, getTargetMachine());

bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;

if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "MCTargetDesc/PPCMCExpr.h"
#include "PPC.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
Expand Down Expand Up @@ -81,6 +82,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
}

const TargetMachine &TM = Printer.TM;
const MachineInstr *MI = MO.getParent();
const MachineFunction *MF = MI->getMF();

if (MO.getTargetFlags() == PPCII::MO_PLT)
RefKind = MCSymbolRefExpr::VK_PLT;
Expand All @@ -100,18 +103,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
MO.getTargetFlags() == PPCII::MO_TLSLD_FLAG) {
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
// For the local-[exec|dynamic] TLS model, we may generate the offset from
// the TLS base as an immediate operand (instead of using a TOC entry). Set
// the relocation type in case the result is used for purposes other than a
// TOC reference. In TOC reference cases, this result is discarded.
if (Model == TLSModel::LocalExec)
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
else if (Model == TLSModel::LocalDynamic &&
FuncInfo->isAIXFuncUseTLSIEForLD())
// On AIX, TLS model opt may have turned local-dynamic accesses into
// initial-exec accesses.
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSIE;
else if (Model == TLSModel::LocalDynamic)
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLD;
}

const MachineInstr *MI = MO.getParent();
const MachineFunction *MF = MI->getMF();
const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());

Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// to use SExt/ZExt flags in later optimization.
std::vector<std::pair<Register, ISD::ArgFlagsTy>> LiveInAttrs;

/// Flags for aix-shared-lib-tls-model-opt, will be lazily initialized for
/// each function.
bool AIXFuncUseTLSIEForLD = false;
bool AIXFuncTLSModelOptInitDone = false;

public:
explicit PPCFunctionInfo(const Function &F, const TargetSubtargetInfo *STI);

Expand Down Expand Up @@ -221,6 +226,13 @@ class PPCFunctionInfo : public MachineFunctionInfo {
void setHasFastCall() { HasFastCall = true; }
bool hasFastCall() const { return HasFastCall;}

void setAIXFuncTLSModelOptInitDone() { AIXFuncTLSModelOptInitDone = true; }
bool isAIXFuncTLSModelOptInitDone() const {
return AIXFuncTLSModelOptInitDone;
}
void setAIXFuncUseTLSIEForLD() { AIXFuncUseTLSIEForLD = true; }
bool isAIXFuncUseTLSIEForLD() const { return AIXFuncUseTLSIEForLD; }

int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/PPCSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
"-data-sections.\n",
false);
}

if (HasAIXShLibTLSModelOpt && (!TargetTriple.isOSAIX() || !IsPPC64))
report_fatal_error("The aix-shared-lib-tls-model-opt attribute "
"is only supported on AIX in 64-bit mode.\n",
false);
}

bool PPCSubtarget::enableMachineScheduler() const { return true; }
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
; RUN: -mattr=+aix-shared-lib-tls-model-opt --code-model=large < %s | FileCheck %s --check-prefixes=OPT
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
; RUN: -mattr=+aix-small-local-dynamic-tls --code-model=large < %s | FileCheck %s --check-prefixes=SMALL
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
; RUN: -mattr=+aix-shared-lib-tls-model-opt -mattr=+aix-small-local-dynamic-tls \
; RUN: --code-model=large < %s | FileCheck %s --check-prefixes=BOTH

@VarTLSLD1 = internal thread_local(localdynamic) global i32 42, align 4

define i32 @Single_LD(i32 %P, i32 %Q) {
; OPT-LABEL: Single_LD:
; OPT: # %bb.0: # %entry
; OPT-NEXT: and 4, 3, 4
; OPT-NEXT: addis 3, L..C0@u(2)
; OPT-NEXT: ld 3, L..C0@l(3)
; OPT-NEXT: cmpwi 4, -1
; OPT-NEXT: lwzx 3, 13, 3
; OPT-NEXT: blr
;
; SMALL-LABEL: Single_LD:
; SMALL: # %bb.0: # %entry
; SMALL-NEXT: mflr 0
; SMALL-NEXT: stdu 1, -48(1)
; SMALL-NEXT: and 6, 3, 4
; SMALL-NEXT: addis 3, L..C0@u(2)
; SMALL-NEXT: std 0, 64(1)
; SMALL-NEXT: ld 3, L..C0@l(3)
; SMALL-NEXT: bla .__tls_get_mod[PR]
; SMALL-NEXT: cmpwi 6, -1
; SMALL-NEXT: lwz 3, VarTLSLD1[TL]@ld(3)
; SMALL-NEXT: addi 1, 1, 48
; SMALL-NEXT: ld 0, 16(1)
; SMALL-NEXT: mtlr 0
; SMALL-NEXT: blr
;
; BOTH-LABEL: Single_LD:
; BOTH: # %bb.0: # %entry
; BOTH-NEXT: and 4, 3, 4
; BOTH-NEXT: addis 3, L..C0@u(2)
; BOTH-NEXT: ld 3, L..C0@l(3)
; BOTH-NEXT: cmpwi 4, -1
; BOTH-NEXT: lwzx 3, 13, 3
; BOTH-NEXT: blr
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = and i1 %a, %b
%tls1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @VarTLSLD1)
%load1 = load i32, ptr %tls1, align 4
br i1 %c, label %bb1, label %return

bb1:
%tls2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @VarTLSLD1)
%load2 = load i32, ptr %tls2, align 4
ret i32 %load2

return:
ret i32 %load1
}

; OPT-LABEL: .toc
; OPT-LABEL: L..C0:
; OPT-NEXT: .tc VarTLSLD1[TE],VarTLSLD1[TL]@ie

; SMALL-LABEL: .toc
; SMALL-LABEL: L..C0:
; SMALL-NEXT: .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
; SMALL-NEXT: .rename _Renamed..5f24__TLSML[TC],"_$TLSML"

; BOTH-LABEL: .toc
; BOTH-LABEL: L..C0:
; BOTH-NEXT: .tc VarTLSLD1[TE],VarTLSLD1[TL]@ie
Loading