Skip to content

Commit 1922011

Browse files
author
Felix (Ting Wang)
authored
[PowerPC][AIX] Refactor existing logic to handle non-zero offsets for aix-small-local-dynamic-tls (#89182)
To enable optimized small local-dynamic access sequence for non-zero offsets, this patch refactors existing 2a50921.
1 parent 927913f commit 1922011

File tree

4 files changed

+129
-159
lines changed

4 files changed

+129
-159
lines changed

llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ class PPCAsmPrinter : public AsmPrinter {
205205
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
206206
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
207207
void EmitAIXTlsCallHelper(const MachineInstr *MI);
208-
const MCExpr *getAdjustedLocalExecExpr(const MachineOperand &MO,
209-
int64_t Offset);
208+
const MCExpr *getAdjustedFasterLocalExpr(const MachineOperand &MO,
209+
int64_t Offset);
210210
bool runOnMachineFunction(MachineFunction &MF) override {
211211
Subtarget = &MF.getSubtarget<PPCSubtarget>();
212212
bool Changed = AsmPrinter::runOnMachineFunction(MF);
@@ -1598,7 +1598,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
15981598
// machine operand (which is a TargetGlobalTLSAddress) is expected to be
15991599
// the same operand for both loads and stores.
16001600
for (const MachineOperand &TempMO : MI->operands()) {
1601-
if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG)) &&
1601+
if (((TempMO.getTargetFlags() == PPCII::MO_TPREL_FLAG ||
1602+
TempMO.getTargetFlags() == PPCII::MO_TLSLD_FLAG)) &&
16021603
TempMO.getOperandNo() == 1)
16031604
OpNum = 1;
16041605
}
@@ -1634,8 +1635,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
16341635
case PPC::ADDI8: {
16351636
// A faster non-TOC-based local-[exec|dynamic] sequence is represented by
16361637
// `addi` or a load/store instruction (that directly loads or stores off of
1637-
// the thread pointer) with an immediate operand having the MO_TPREL_FLAG.
1638-
// Such instructions do not otherwise arise.
1638+
// the thread pointer) with an immediate operand having the
1639+
// [MO_TPREL_FLAG|MO_TLSLD_FLAG]. Such instructions do not otherwise arise.
16391640
if (!HasAIXSmallLocalTLS)
16401641
break;
16411642
bool IsMIADDI8 = MI->getOpcode() == PPC::ADDI8;
@@ -1647,7 +1648,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
16471648
Flag == PPCII::MO_TPREL_PCREL_FLAG || Flag == PPCII::MO_TLSLD_FLAG) {
16481649
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
16491650

1650-
const MCExpr *Expr = getAdjustedLocalExecExpr(MO, MO.getOffset());
1651+
const MCExpr *Expr = getAdjustedFasterLocalExpr(MO, MO.getOffset());
16511652
if (Expr)
16521653
TmpInst.getOperand(OpNum) = MCOperand::createExpr(Expr);
16531654

@@ -1677,28 +1678,25 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
16771678
EmitToStreamer(*OutStreamer, TmpInst);
16781679
}
16791680

1680-
// For non-TOC-based local-exec variables that have a non-zero offset,
1681+
// For non-TOC-based local-[exec|dynamic] variables that have a non-zero offset,
16811682
// we need to create a new MCExpr that adds the non-zero offset to the address
1682-
// of the local-exec variable that will be used in either an addi, load or
1683-
// store. However, the final displacement for these instructions must be
1683+
// of the local-[exec|dynamic] variable that will be used in either an addi,
1684+
// load or store. However, the final displacement for these instructions must be
16841685
// between [-32768, 32768), so if the TLS address + its non-zero offset is
16851686
// greater than 32KB, a new MCExpr is produced to accommodate this situation.
1686-
const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
1687-
int64_t Offset) {
1687+
const MCExpr *
1688+
PPCAsmPrinter::getAdjustedFasterLocalExpr(const MachineOperand &MO,
1689+
int64_t Offset) {
16881690
// Non-zero offsets (for loads, stores or `addi`) require additional handling.
16891691
// When the offset is zero, there is no need to create an adjusted MCExpr.
16901692
if (!Offset)
16911693
return nullptr;
16921694

16931695
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
16941696
const GlobalValue *GValue = MO.getGlobal();
1695-
// TODO: Handle the aix-small-local-dynamic-tls non-zero offset case.
16961697
TLSModel::Model Model = TM.getTLSModel(GValue);
1697-
if (Model == TLSModel::LocalDynamic) {
1698-
return nullptr;
1699-
}
1700-
assert(Model == TLSModel::LocalExec &&
1701-
"Only local-exec accesses are handled!");
1698+
assert((Model == TLSModel::LocalExec || Model == TLSModel::LocalDynamic) &&
1699+
"Only local-[exec|dynamic] accesses are handled!");
17021700

17031701
bool IsGlobalADeclaration = GValue->isDeclarationForLinker();
17041702
// Find the GlobalVariable that corresponds to the particular TLS variable
@@ -1719,7 +1717,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
17191717
// For when TLS variables are extern, this is safe to do because we can
17201718
// assume that the address of extern TLS variables are zero.
17211719
const MCExpr *Expr = MCSymbolRefExpr::create(
1722-
getSymbol(GValue), MCSymbolRefExpr::VK_PPC_AIX_TLSLE, OutContext);
1720+
getSymbol(GValue),
1721+
Model == TLSModel::LocalExec ? MCSymbolRefExpr::VK_PPC_AIX_TLSLE
1722+
: MCSymbolRefExpr::VK_PPC_AIX_TLSLD,
1723+
OutContext);
17231724
Expr = MCBinaryExpr::createAdd(
17241725
Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
17251726
if (FinalAddress >= 32768) {
@@ -1732,10 +1733,10 @@ const MCExpr *PPCAsmPrinter::getAdjustedLocalExecExpr(const MachineOperand &MO,
17321733
ptrdiff_t Delta = ((FinalAddress + 32768) & ~0xFFFF);
17331734
// Check that the total instruction displacement fits within [-32768,32768).
17341735
[[maybe_unused]] ptrdiff_t InstDisp = TLSVarAddress + Offset - Delta;
1735-
assert(((InstDisp < 32768) &&
1736-
(InstDisp >= -32768)) &&
1737-
"Expecting the instruction displacement for local-exec TLS "
1738-
"variables to be between [-32768, 32768)!");
1736+
assert(
1737+
((InstDisp < 32768) && (InstDisp >= -32768)) &&
1738+
"Expecting the instruction displacement for local-[exec|dynamic] TLS "
1739+
"variables to be between [-32768, 32768)!");
17391740
Expr = MCBinaryExpr::createAdd(
17401741
Expr, MCConstantExpr::create(-Delta, OutContext), OutContext);
17411742
}

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 44 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -7587,29 +7587,23 @@ static bool hasAIXSmallTLSAttr(SDValue Val) {
75877587
return false;
75887588
}
75897589

7590-
// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
7591-
static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
7592-
SDValue ADDIToFold) {
7590+
// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
7591+
// accesses?
7592+
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG,
7593+
SDValue ADDIToFold) {
75937594
// Check if ADDIToFold (the ADDI that we want to fold into local-exec
75947595
// accesses), is truly an ADDI.
75957596
if (!ADDIToFold.isMachineOpcode() ||
75967597
(ADDIToFold.getMachineOpcode() != PPC::ADDI8))
75977598
return false;
75987599

7599-
// Folding is only allowed for the AIX small-local-exec TLS target attribute
7600-
// or when the 'aix-small-tls' global variable attribute is present.
7600+
// Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
7601+
// attribute or when the 'aix-small-tls' global variable attribute is present.
76017602
const PPCSubtarget &Subtarget =
76027603
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
76037604
SDValue TLSVarNode = ADDIToFold.getOperand(1);
7604-
if (!(Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
7605-
return false;
7606-
7607-
// The first operand of the ADDIToFold should be the thread pointer.
7608-
// This transformation is only performed if the first operand of the
7609-
// addi is the thread pointer.
7610-
SDValue TPRegNode = ADDIToFold.getOperand(0);
7611-
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7612-
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7605+
if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
7606+
Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
76137607
return false;
76147608

76157609
// The second operand of the ADDIToFold should be the global TLS address
@@ -7619,52 +7613,54 @@ static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
76197613
if (!GA)
76207614
return false;
76217615

7622-
// The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
7623-
// so this optimization is not performed otherwise if the flag is not set.
7616+
if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
7617+
// The first operand of the ADDIToFold should be the thread pointer.
7618+
// This transformation is only performed if the first operand of the
7619+
// addi is the thread pointer.
7620+
SDValue TPRegNode = ADDIToFold.getOperand(0);
7621+
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7622+
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7623+
return false;
7624+
}
7625+
7626+
// The local-[exec|dynamic] TLS variable should only have the
7627+
// [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
7628+
// performed otherwise if the flag is not set.
76247629
unsigned TargetFlags = GA->getTargetFlags();
7625-
if (TargetFlags != PPCII::MO_TPREL_FLAG)
7630+
if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
7631+
TargetFlags == PPCII::MO_TLSLD_FLAG))
76267632
return false;
76277633

76287634
// If all conditions are satisfied, the ADDI is valid for folding.
76297635
return true;
76307636
}
76317637

7632-
// For non-TOC-based local-exec access where an addi is feeding into another
7633-
// addi, fold this sequence into a single addi if possible.
7634-
// Before this optimization, the sequence appears as:
7635-
// addi rN, r13, sym@le
7638+
// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
7639+
// another addi, fold this sequence into a single addi if possible. Before this
7640+
// optimization, the sequence appears as:
7641+
// addi rN, r13, sym@[le|ld]
76367642
// addi rM, rN, imm
76377643
// After this optimization, we can fold the two addi into a single one:
7638-
// addi rM, r13, sym@le + imm
7639-
static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
7644+
// addi rM, r13, sym@[le|ld] + imm
7645+
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG) {
76407646
if (N->getMachineOpcode() != PPC::ADDI8)
76417647
return;
76427648

76437649
// InitialADDI is the addi feeding into N (also an addi), and the addi that
76447650
// we want optimized out.
76457651
SDValue InitialADDI = N->getOperand(0);
76467652

7647-
if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, InitialADDI))
7653+
if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
76487654
return;
76497655

7650-
// At this point, InitialADDI can be folded into a non-TOC-based local-exec
7651-
// access. The first operand of InitialADDI should be the thread pointer,
7652-
// which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7653-
SDValue TPRegNode = InitialADDI.getOperand(0);
7654-
[[maybe_unused]] RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7655-
[[maybe_unused]] const PPCSubtarget &Subtarget =
7656-
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
7657-
assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
7658-
"Expecting the first operand to be a thread pointer for folding addi "
7659-
"in local-exec accesses!");
7660-
76617656
// The second operand of the InitialADDI should be the global TLS address
7662-
// (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
7663-
// This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7657+
// (the local-[exec|dynamic] TLS variable), with the
7658+
// [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
7659+
// isEligibleToFoldADDIForFasterLocalAccesses().
76647660
SDValue TLSVarNode = InitialADDI.getOperand(1);
76657661
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
76667662
assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7667-
"local-exec accesses!");
7663+
"local-[exec|dynamic] accesses!");
76687664
unsigned TargetFlags = GA->getTargetFlags();
76697665

76707666
// The second operand of the addi that we want to preserve will be an
@@ -7676,7 +7672,7 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) {
76767672
TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
76777673
Offset, TargetFlags);
76787674

7679-
(void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
7675+
(void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
76807676
if (InitialADDI.getNode()->use_empty())
76817677
DAG->RemoveDeadNode(InitialADDI.getNode());
76827678
}
@@ -7693,8 +7689,9 @@ void PPCDAGToDAGISel::PeepholePPC64() {
76937689
if (isVSXSwap(SDValue(N, 0)))
76947690
reduceVSXSwap(N, CurDAG);
76957691

7696-
// This optimization is performed for non-TOC-based local-exec accesses.
7697-
foldADDIForLocalExecAccesses(N, CurDAG);
7692+
// This optimization is performed for non-TOC-based local-[exec|dynamic]
7693+
// accesses.
7694+
foldADDIForFasterLocalAccesses(N, CurDAG);
76987695

76997696
unsigned FirstOp;
77007697
unsigned StorageOpcode = N->getMachineOpcode();
@@ -7852,13 +7849,15 @@ void PPCDAGToDAGISel::PeepholePPC64() {
78527849
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
78537850
ImmOpnd.getValueType());
78547851
} else if (Offset != 0) {
7855-
// This optimization is performed for non-TOC-based local-exec accesses.
7856-
if (isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) {
7852+
// This optimization is performed for non-TOC-based local-[exec|dynamic]
7853+
// accesses.
7854+
if (isEligibleToFoldADDIForFasterLocalAccesses(CurDAG, Base)) {
78577855
// Add the non-zero offset information into the load or store
7858-
// instruction to be used for non-TOC-based local-exec accesses.
7856+
// instruction to be used for non-TOC-based local-[exec|dynamic]
7857+
// accesses.
78597858
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
78607859
assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7861-
"addi into local-exec accesses!");
7860+
"addi into local-[exec|dynamic] accesses!");
78627861
ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
78637862
MVT::i64, Offset,
78647863
GA->getTargetFlags());

0 commit comments

Comments
 (0)