Skip to content

Commit 0e4827a

Browse files
committed
[ARM][MachineOutliner] Add Machine Outliner support for ARM.
Enables Machine Outlining for ARM and Thumb2 modes. This is the first patch of the series which adds all the basic logic for the support, and only handles tail-calls and thunks. The outliner can be turned on by using clang -moutline option or -mllvm -enable-machine-outliner one (like AArch64). Differential Revision: https://reviews.llvm.org/D76066
1 parent f83d66c commit 0e4827a

File tree

8 files changed

+842
-5
lines changed

8 files changed

+842
-5
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6149,11 +6149,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
61496149
if (Arg *A = Args.getLastArg(options::OPT_moutline,
61506150
options::OPT_mno_outline)) {
61516151
if (A->getOption().matches(options::OPT_moutline)) {
6152-
// We only support -moutline in AArch64 right now. If we're not compiling
6153-
// for AArch64, emit a warning and ignore the flag. Otherwise, add the
6154-
// proper mllvm flags.
6155-
if (Triple.getArch() != llvm::Triple::aarch64 &&
6156-
Triple.getArch() != llvm::Triple::aarch64_32) {
6152+
// We only support -moutline in AArch64 and ARM targets right now. If
6153+
// we're not compiling for these, emit a warning and ignore the flag.
6154+
// Otherwise, add the proper mllvm flags.
6155+
if (!(Triple.isARM() || Triple.isThumb() ||
6156+
Triple.getArch() == llvm::Triple::aarch64 ||
6157+
Triple.getArch() == llvm::Triple::aarch64_32)) {
61576158
D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName();
61586159
} else {
61596160
CmdArgs.push_back("-mllvm");

llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp

Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/CodeGen/MachineInstr.h"
3333
#include "llvm/CodeGen/MachineInstrBuilder.h"
3434
#include "llvm/CodeGen/MachineMemOperand.h"
35+
#include "llvm/CodeGen/MachineModuleInfo.h"
3536
#include "llvm/CodeGen/MachineOperand.h"
3637
#include "llvm/CodeGen/MachineRegisterInfo.h"
3738
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
@@ -5517,3 +5518,372 @@ bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,
55175518
return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <
55185519
ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);
55195520
}
5521+
5522+
/// Constants defining how certain sequences should be outlined.
5523+
/// This encompasses how an outlined function should be called, and what kind of
5524+
/// frame should be emitted for that outlined function.
5525+
///
5526+
/// \p MachineOutlinerTailCall implies that the function is being created from
5527+
/// a sequence of instructions ending in a return.
5528+
///
5529+
/// That is,
5530+
///
5531+
/// I1 OUTLINED_FUNCTION:
5532+
/// I2 --> B OUTLINED_FUNCTION I1
5533+
/// BX LR I2
5534+
/// BX LR
5535+
///
5536+
/// +-------------------------+--------+-----+
5537+
/// | | Thumb2 | ARM |
5538+
/// +-------------------------+--------+-----+
5539+
/// | Call overhead in Bytes | 4 | 4 |
5540+
/// | Frame overhead in Bytes | 0 | 0 |
5541+
/// | Stack fixup required | No | No |
5542+
/// +-------------------------+--------+-----+
5543+
///
5544+
/// \p MachineOutlinerThunk implies that the function is being created from
5545+
/// a sequence of instructions ending in a call. The outlined function is
5546+
/// called with a BL instruction, and the outlined function tail-calls the
5547+
/// original call destination.
5548+
///
5549+
/// That is,
5550+
///
5551+
/// I1 OUTLINED_FUNCTION:
5552+
/// I2 --> BL OUTLINED_FUNCTION I1
5553+
/// BL f I2
5554+
/// B f
5555+
///
5556+
/// +-------------------------+--------+-----+
5557+
/// | | Thumb2 | ARM |
5558+
/// +-------------------------+--------+-----+
5559+
/// | Call overhead in Bytes | 4 | 4 |
5560+
/// | Frame overhead in Bytes | 0 | 0 |
5561+
/// | Stack fixup required | No | No |
5562+
/// +-------------------------+--------+-----+
5563+
5564+
enum MachineOutlinerClass { MachineOutlinerTailCall, MachineOutlinerThunk };
5565+
5566+
enum MachineOutlinerMBBFlags {
5567+
LRUnavailableSomewhere = 0x2,
5568+
HasCalls = 0x4,
5569+
UnsafeRegsDead = 0x8
5570+
};
5571+
5572+
struct OutlinerCosts {
5573+
const int CallTailCall;
5574+
const int FrameTailCall;
5575+
const int CallThunk;
5576+
const int FrameThunk;
5577+
5578+
OutlinerCosts(const ARMSubtarget &target)
5579+
: CallTailCall(target.isThumb() ? 4 : 4),
5580+
FrameTailCall(target.isThumb() ? 0 : 0),
5581+
CallThunk(target.isThumb() ? 4 : 4),
5582+
FrameThunk(target.isThumb() ? 0 : 0) {}
5583+
};
5584+
5585+
outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
5586+
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5587+
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5588+
unsigned SequenceSize =
5589+
std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
5590+
[this](unsigned Sum, const MachineInstr &MI) {
5591+
return Sum + getInstSizeInBytes(MI);
5592+
});
5593+
5594+
// Properties about candidate MBBs that hold for all of them.
5595+
unsigned FlagsSetInAll = 0xF;
5596+
5597+
// Compute liveness information for each candidate, and set FlagsSetInAll.
5598+
const TargetRegisterInfo &TRI = getRegisterInfo();
5599+
std::for_each(
5600+
RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5601+
[&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; });
5602+
5603+
// According to the ARM Procedure Call Standard, the following are
5604+
// undefined on entry/exit from a function call:
5605+
//
5606+
// * Register R12(IP),
5607+
// * Condition codes (and thus the CPSR register)
5608+
//
5609+
// Since we control the instructions which are part of the outlined regions
5610+
// we don't need to be fully compliant with the AAPCS, but we have to
5611+
// guarantee that if a veneer is inserted at link time the code is still
5612+
// correct. Because of this, we can't outline any sequence of instructions
5613+
// where one of these registers is live into/across it. Thus, we need to
5614+
// delete those candidates.
5615+
auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5616+
// If the unsafe registers in this block are all dead, then we don't need
5617+
// to compute liveness here.
5618+
if (C.Flags & UnsafeRegsDead)
5619+
return false;
5620+
C.initLRU(TRI);
5621+
LiveRegUnits LRU = C.LRU;
5622+
return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR));
5623+
};
5624+
5625+
// Are there any candidates where those registers are live?
5626+
if (!(FlagsSetInAll & UnsafeRegsDead)) {
5627+
// Erase every candidate that violates the restrictions above. (It could be
5628+
// true that we have viable candidates, so it's not worth bailing out in
5629+
// the case that, say, 1 out of 20 candidates violate the restructions.)
5630+
RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5631+
RepeatedSequenceLocs.end(),
5632+
CantGuaranteeValueAcrossCall),
5633+
RepeatedSequenceLocs.end());
5634+
5635+
// If the sequence doesn't have enough candidates left, then we're done.
5636+
if (RepeatedSequenceLocs.size() < 2)
5637+
return outliner::OutlinedFunction();
5638+
}
5639+
5640+
// At this point, we have only "safe" candidates to outline. Figure out
5641+
// frame + call instruction information.
5642+
5643+
unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
5644+
5645+
// Helper lambda which sets call information for every candidate.
5646+
auto SetCandidateCallInfo =
5647+
[&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5648+
for (outliner::Candidate &C : RepeatedSequenceLocs)
5649+
C.setCallInfo(CallID, NumBytesForCall);
5650+
};
5651+
5652+
OutlinerCosts *Costs = new OutlinerCosts(Subtarget);
5653+
unsigned FrameID = 0;
5654+
unsigned NumBytesToCreateFrame = 0;
5655+
5656+
// If the last instruction in any candidate is a terminator, then we should
5657+
// tail call all of the candidates.
5658+
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5659+
FrameID = MachineOutlinerTailCall;
5660+
NumBytesToCreateFrame = Costs->FrameTailCall;
5661+
SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall);
5662+
} else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||
5663+
LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr ||
5664+
LastInstrOpcode == ARM::tBLXi) {
5665+
FrameID = MachineOutlinerThunk;
5666+
NumBytesToCreateFrame = Costs->FrameThunk;
5667+
SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk);
5668+
} else
5669+
return outliner::OutlinedFunction();
5670+
5671+
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5672+
NumBytesToCreateFrame, FrameID);
5673+
}
5674+
5675+
bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
5676+
MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5677+
const Function &F = MF.getFunction();
5678+
5679+
// Can F be deduplicated by the linker? If it can, don't outline from it.
5680+
if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5681+
return false;
5682+
5683+
// Don't outline from functions with section markings; the program could
5684+
// expect that all the code is in the named section.
5685+
// FIXME: Allow outlining from multiple functions with the same section
5686+
// marking.
5687+
if (F.hasSection())
5688+
return false;
5689+
5690+
// FIXME: Thumb1 outlining is not handled
5691+
if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())
5692+
return false;
5693+
5694+
// It's safe to outline from MF.
5695+
return true;
5696+
}
5697+
5698+
bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
5699+
unsigned &Flags) const {
5700+
// Check if LR is available through all of the MBB. If it's not, then set
5701+
// a flag.
5702+
assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5703+
"Suitable Machine Function for outlining must track liveness");
5704+
5705+
LiveRegUnits LRU(getRegisterInfo());
5706+
5707+
std::for_each(MBB.rbegin(), MBB.rend(),
5708+
[&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5709+
5710+
// Check if each of the unsafe registers are available...
5711+
bool R12AvailableInBlock = LRU.available(ARM::R12);
5712+
bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);
5713+
5714+
// If all of these are dead (and not live out), we know we don't have to check
5715+
// them later.
5716+
if (R12AvailableInBlock && CPSRAvailableInBlock)
5717+
Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
5718+
5719+
// Now, add the live outs to the set.
5720+
LRU.addLiveOuts(MBB);
5721+
5722+
// If any of these registers is available in the MBB, but also a live out of
5723+
// the block, then we know outlining is unsafe.
5724+
if (R12AvailableInBlock && !LRU.available(ARM::R12))
5725+
return false;
5726+
if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))
5727+
return false;
5728+
5729+
// Check if there's a call inside this MachineBasicBlock. If there is, then
5730+
// set a flag.
5731+
if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
5732+
Flags |= MachineOutlinerMBBFlags::HasCalls;
5733+
5734+
if (!LRU.available(ARM::LR))
5735+
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
5736+
5737+
return true;
5738+
}
5739+
5740+
outliner::InstrType
5741+
ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
5742+
unsigned Flags) const {
5743+
MachineInstr &MI = *MIT;
5744+
const TargetRegisterInfo *TRI = &getRegisterInfo();
5745+
5746+
// Be conservative with inline ASM
5747+
if (MI.isInlineAsm())
5748+
return outliner::InstrType::Illegal;
5749+
5750+
// Don't allow debug values to impact outlining type.
5751+
if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5752+
return outliner::InstrType::Invisible;
5753+
5754+
// At this point, KILL or IMPLICIT_DEF instructions don't really tell us much
5755+
// so we can go ahead and skip over them.
5756+
if (MI.isKill() || MI.isImplicitDef())
5757+
return outliner::InstrType::Invisible;
5758+
5759+
// PIC instructions contain labels, outlining them would break offset
5760+
// computing. unsigned Opc = MI.getOpcode();
5761+
unsigned Opc = MI.getOpcode();
5762+
if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||
5763+
Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||
5764+
Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||
5765+
Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||
5766+
Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||
5767+
Opc == ARM::t2MOV_ga_pcrel)
5768+
return outliner::InstrType::Illegal;
5769+
5770+
// Be conservative with ARMv8.1 MVE instructions.
5771+
if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||
5772+
Opc == ARM::t2WhileLoopStart || Opc == ARM::t2LoopDec ||
5773+
Opc == ARM::t2LoopEnd)
5774+
return outliner::InstrType::Illegal;
5775+
5776+
const MCInstrDesc &MCID = MI.getDesc();
5777+
uint64_t MIFlags = MCID.TSFlags;
5778+
if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)
5779+
return outliner::InstrType::Illegal;
5780+
5781+
// Is this a terminator for a basic block?
5782+
if (MI.isTerminator()) {
5783+
// Don't outline if the branch is not unconditional.
5784+
if (isPredicated(MI))
5785+
return outliner::InstrType::Illegal;
5786+
5787+
// Is this the end of a function?
5788+
if (MI.getParent()->succ_empty())
5789+
return outliner::InstrType::Legal;
5790+
5791+
// It's not, so don't outline it.
5792+
return outliner::InstrType::Illegal;
5793+
}
5794+
5795+
// Make sure none of the operands are un-outlinable.
5796+
for (const MachineOperand &MOP : MI.operands()) {
5797+
if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5798+
MOP.isTargetIndex())
5799+
return outliner::InstrType::Illegal;
5800+
}
5801+
5802+
// Don't outline if link register or program counter value are used.
5803+
if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
5804+
return outliner::InstrType::Illegal;
5805+
5806+
if (MI.isCall()) {
5807+
// If we don't know anything about the callee, assume it depends on the
5808+
// stack layout of the caller. In that case, it's only legal to outline
5809+
// as a tail-call. Whitelist the call instructions we know about so we
5810+
// don't get unexpected results with call pseudo-instructions.
5811+
auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5812+
if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||
5813+
Opc == ARM::tBLXr || Opc == ARM::tBLXi)
5814+
UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5815+
5816+
return UnknownCallOutlineType;
5817+
}
5818+
5819+
// Since calls are handled, don't touch LR or PC
5820+
if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))
5821+
return outliner::InstrType::Illegal;
5822+
5823+
// Be conservative with IT blocks.
5824+
if (MI.readsRegister(ARM::ITSTATE, TRI) ||
5825+
MI.modifiesRegister(ARM::ITSTATE, TRI))
5826+
return outliner::InstrType::Illegal;
5827+
5828+
// Don't outline positions.
5829+
if (MI.isPosition())
5830+
return outliner::InstrType::Illegal;
5831+
5832+
return outliner::InstrType::Legal;
5833+
}
5834+
5835+
void ARMBaseInstrInfo::buildOutlinedFrame(
5836+
MachineBasicBlock &MBB, MachineFunction &MF,
5837+
const outliner::OutlinedFunction &OF) const {
5838+
// For thunk outlining, rewrite the last instruction from a call to a
5839+
// tail-call.
5840+
if (OF.FrameConstructionID == MachineOutlinerThunk) {
5841+
MachineInstr *Call = &*--MBB.instr_end();
5842+
bool isThumb = Subtarget.isThumb();
5843+
unsigned FuncOp = isThumb ? 2 : 0;
5844+
unsigned Opc = Call->getOperand(FuncOp).isReg()
5845+
? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr
5846+
: isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd
5847+
: ARM::tTAILJMPdND
5848+
: ARM::TAILJMPd;
5849+
MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))
5850+
.add(Call->getOperand(FuncOp));
5851+
if (isThumb && !Call->getOperand(FuncOp).isReg())
5852+
MIB.add(predOps(ARMCC::AL));
5853+
Call->eraseFromParent();
5854+
}
5855+
}
5856+
5857+
MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(
5858+
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
5859+
MachineFunction &MF, const outliner::Candidate &C) const {
5860+
MachineInstrBuilder MIB;
5861+
MachineBasicBlock::iterator CallPt;
5862+
unsigned Opc;
5863+
bool isThumb = Subtarget.isThumb();
5864+
5865+
// Are we tail calling?
5866+
if (C.CallConstructionID == MachineOutlinerTailCall) {
5867+
// If yes, then we can just branch to the label.
5868+
Opc = isThumb
5869+
? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND
5870+
: ARM::TAILJMPd;
5871+
MIB = BuildMI(MF, DebugLoc(), get(Opc))
5872+
.addGlobalAddress(M.getNamedValue(MF.getName()));
5873+
if (isThumb)
5874+
MIB.add(predOps(ARMCC::AL));
5875+
It = MBB.insert(It, MIB);
5876+
return It;
5877+
}
5878+
5879+
// Create the call instruction.
5880+
Opc = isThumb ? ARM::tBL : ARM::BL;
5881+
MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));
5882+
if (isThumb)
5883+
CallMIB.add(predOps(ARMCC::AL));
5884+
CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));
5885+
5886+
// Insert the call.
5887+
It = MBB.insert(It, CallMIB);
5888+
return It;
5889+
}

0 commit comments

Comments
 (0)