Skip to content

[llvm][AsmPrinter] Add an option to print instruction latencies #113243

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 6, 2024
Merged
65 changes: 63 additions & 2 deletions llvm/include/llvm/MC/MCSchedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
#ifndef LLVM_MC_MCSCHEDULE_H
#define LLVM_MC_MCSCHEDULE_H

#include "llvm/Config/llvm-config.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <optional>

namespace llvm {

Expand All @@ -25,6 +27,7 @@ struct InstrItinerary;
class MCSubtargetInfo;
class MCInstrInfo;
class MCInst;
class MCInstrDesc;
class InstrItineraryData;

/// Define a kind of processor resource that will be modeled by the scheduler.
Expand Down Expand Up @@ -369,9 +372,19 @@ struct MCSchedModel {
const MCSchedClassDesc &SCDesc);

int computeInstrLatency(const MCSubtargetInfo &STI, unsigned SClass) const;

int computeInstrLatency(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
const MCInst &Inst) const;

template <typename MCSubtargetInfo, typename MCInstrInfo,
typename InstrItineraryData, typename MCInstOrMachineInstr>
int computeInstrLatency(
const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
const MCInstOrMachineInstr &Inst,
llvm::function_ref<const MCSchedClassDesc *(const MCSchedClassDesc *)>
ResolveVariantSchedClass =
[](const MCSchedClassDesc *SCDesc) { return SCDesc; }) const;

// Returns the reciprocal throughput information from a MCSchedClassDesc.
static double
getReciprocalThroughput(const MCSubtargetInfo &STI,
Expand All @@ -393,6 +406,54 @@ struct MCSchedModel {
static const MCSchedModel Default;
};

// The first three are only template'd arguments so we can get away with leaving
// them as incomplete types below. The third is a template over
// MCInst/MachineInstr so as to avoid a layering violation here that would make
// the MC layer depend on CodeGen.
template <typename MCSubtargetInfo, typename MCInstrInfo,
typename InstrItineraryData, typename MCInstOrMachineInstr>
int MCSchedModel::computeInstrLatency(
const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
const MCInstOrMachineInstr &Inst,
llvm::function_ref<const MCSchedClassDesc *(const MCSchedClassDesc *)>
ResolveVariantSchedClass) const {
static const int NoInformationAvailable = -1;
// Check if we have a scheduling model for instructions.
if (!hasInstrSchedModel()) {
// Try to fall back to the itinerary model if the scheduling model doesn't
// have a scheduling table. Note the default does not have a table.

llvm::StringRef CPU = STI.getCPU();

// Check if we have a CPU to get the itinerary information.
if (CPU.empty())
return NoInformationAvailable;

// Get itinerary information.
InstrItineraryData IID = STI.getInstrItineraryForCPU(CPU);
// Get the scheduling class of the requested instruction.
const MCInstrDesc &Desc = MCII.get(Inst.getOpcode());
unsigned SCClass = Desc.getSchedClass();

unsigned Latency = 0;

for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx)
if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
Latency = std::max(Latency, *OperCycle);

return int(Latency);
}

unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
SCDesc = ResolveVariantSchedClass(SCDesc);

if (!SCDesc || !SCDesc->isValid())
return NoInformationAvailable;

return MCSchedModel::computeInstrLatency(STI, *SCDesc);
}

} // namespace llvm

#endif
30 changes: 28 additions & 2 deletions llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
Expand Down Expand Up @@ -166,6 +167,13 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));

// This isn't turned on by default, since several of the scheduling models are
// not completely accurate, and we don't want to be misleading.
static cl::opt<bool> PrintLatency(
"asm-print-latency",
cl::desc("Print instruction latencies as verbose asm comments"), cl::Hidden,
cl::init(false));

STATISTIC(EmittedInsts, "Number of machine instrs printed");

char AsmPrinter::ID = 0;
Expand Down Expand Up @@ -1085,7 +1093,8 @@ void AsmPrinter::emitFunctionEntryLabel() {
}

/// emitComments - Pretty-print comments for instructions.
static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
static void emitComments(const MachineInstr &MI, const MCSubtargetInfo *STI,
raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

Expand Down Expand Up @@ -1113,6 +1122,17 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// Check for spill-induced copies
if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
CommentOS << " Reload Reuse\n";

if (PrintLatency) {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const MCSchedModel &SCModel = STI->getSchedModel();
int Latency = SCModel.computeInstrLatency<MCSubtargetInfo, MCInstrInfo,
InstrItineraryData, MachineInstr>(
*STI, *TII, MI);
// Report only interesting latencies.
if (1 < Latency)
CommentOS << " Latency: " << Latency << "\n";
}
}

/// emitImplicitDef - This method emits the specified machine instruction
Expand Down Expand Up @@ -1763,6 +1783,12 @@ void AsmPrinter::emitFunctionBody() {
int NumInstsInFunction = 0;
bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");

const MCSubtargetInfo *STI = nullptr;
if (this->MF)
STI = &getSubtargetInfo();
else
STI = TM.getMCSubtargetInfo();

bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
for (auto &MBB : *MF) {
// Print a label for the basic block.
Expand All @@ -1786,7 +1812,7 @@ void AsmPrinter::emitFunctionBody() {
Handler->beginInstruction(&MI);

if (isVerbose())
emitComments(MI, OutStreamer->getCommentOS());
emitComments(MI, STI, OutStreamer->getCommentOS());

switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
Expand Down
69 changes: 4 additions & 65 deletions llvm/lib/MC/MCDisassembler/Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,74 +162,13 @@ static void emitComments(LLVMDisasmContext *DC,
DC->CommentsToEmit.clear();
}

/// Gets latency information for \p Inst from the itinerary
/// scheduling model, based on \p DC information.
/// \return The maximum expected latency over all the operands or -1
/// if no information is available.
static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
const int NoInformationAvailable = -1;

// Check if we have a CPU to get the itinerary information.
if (DC->getCPU().empty())
return NoInformationAvailable;

// Get itinerary information.
const MCSubtargetInfo *STI = DC->getSubtargetInfo();
InstrItineraryData IID = STI->getInstrItineraryForCPU(DC->getCPU());
// Get the scheduling class of the requested instruction.
const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
unsigned SCClass = Desc.getSchedClass();

unsigned Latency = 0;

for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx)
if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
Latency = std::max(Latency, *OperCycle);

return (int)Latency;
}

/// Gets latency information for \p Inst, based on \p DC information.
/// \return The maximum expected latency over all the definitions or -1
/// if no information is available.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
// Try to compute scheduling information.
const MCSubtargetInfo *STI = DC->getSubtargetInfo();
const MCSchedModel SCModel = STI->getSchedModel();
const int NoInformationAvailable = -1;

// Check if we have a scheduling model for instructions.
if (!SCModel.hasInstrSchedModel())
// Try to fall back to the itinerary model if the scheduling model doesn't
// have a scheduling table. Note the default does not have a table.
return getItineraryLatency(DC, Inst);

// Get the scheduling class of the requested instruction.
const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
unsigned SCClass = Desc.getSchedClass();
const MCSchedClassDesc *SCDesc = SCModel.getSchedClassDesc(SCClass);
// Resolving the variant SchedClass requires an MI to pass to
// SubTargetInfo::resolveSchedClass.
if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant())
return NoInformationAvailable;

// Compute output latency.
int16_t Latency = 0;
for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
DefIdx != DefEnd; ++DefIdx) {
// Lookup the definition's write latency in SubtargetInfo.
const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc,
DefIdx);
Latency = std::max(Latency, WLEntry->Cycles);
}

return Latency;
}

/// Emits latency information in DC->CommentStream for \p Inst, based
/// on the information available in \p DC.
static void emitLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
int Latency = getLatency(DC, Inst);
const MCSubtargetInfo *STI = DC->getSubtargetInfo();
const MCInstrInfo *MCII = DC->getInstrInfo();
const MCSchedModel &SCModel = STI->getSchedModel();
int Latency = SCModel.computeInstrLatency(*STI, *MCII, Inst);

// Report only interesting latencies.
if (Latency < 2)
Expand Down
37 changes: 22 additions & 15 deletions llvm/lib/MC/MCSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,21 +69,28 @@ int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
const MCInstrInfo &MCII,
const MCInst &Inst) const {
unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
if (!SCDesc->isValid())
return 0;

unsigned CPUID = getProcessorID();
while (SCDesc->isVariant()) {
SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, &MCII, CPUID);
SCDesc = getSchedClassDesc(SchedClass);
}

if (SchedClass)
return MCSchedModel::computeInstrLatency(STI, *SCDesc);

llvm_unreachable("unsupported variant scheduling class");
return MCSchedModel::computeInstrLatency<MCSubtargetInfo, MCInstrInfo,
InstrItineraryData, MCInst>(
STI, MCII, Inst,
[&](const MCSchedClassDesc *SCDesc) -> const MCSchedClassDesc * {
if (!SCDesc->isValid())
return nullptr;

unsigned CPUID = getProcessorID();
unsigned SchedClass = 0;
while (SCDesc->isVariant()) {
SchedClass =
STI.resolveVariantSchedClass(SchedClass, &Inst, &MCII, CPUID);
SCDesc = getSchedClassDesc(SchedClass);
}

if (!SchedClass) {
assert(false && "unsupported variant scheduling class");
return nullptr;
}

return SCDesc;
});
}

double
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/AArch64/latency.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=1 | FileCheck %s --match-full-lines --check-prefix=ON
; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=0 | FileCheck %s --match-full-lines --check-prefix=OFF
; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone | FileCheck %s --match-full-lines --check-prefix=OFF

define <4 x i64> @load_v4i64(ptr %ptr){
; ON: ldp q0, q1, [x0] ; Latency: 4
; OFF: ldp q0, q1, [x0]
%a = load <4 x i64>, ptr %ptr
ret <4 x i64> %a
}
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/ARM/latency.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; RUN: llc -mtriple=thumb-none-eabi %s -o - -mcpu=cortex-m0 -asm-print-latency=1 | FileCheck %s --match-full-lines --check-prefix=ON
; RUN: llc -mtriple=thumb-none-eabi %s -o - -mcpu=cortex-m0 -asm-print-latency=0 | FileCheck %s --match-full-lines --check-prefix=OFF
; RUN: llc -mtriple=thumb-none-eabi %s -o - -mcpu=cortex-m0 | FileCheck %s --match-full-lines --check-prefix=OFF

define i64 @load_i64(ptr %ptr){
; ON: ldr r2, [r0] @ Latency: 4
; ON: ldr r1, [r0, #4] @ Latency: 4
; ON: mov r0, r2 @ Latency: 2
; ON: bx lr
; OFF: ldr r2, [r0]
; OFF: ldr r1, [r0, #4]
; OFF: mov r0, r2
; OFf: bx lr
%a = load i64, ptr %ptr
ret i64 %a
}