Skip to content

Commit 4c3e1e3

Browse files
authored
[llvm][AsmPrinter] Add an option to print instruction latencies (llvm#113243)
... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading.
1 parent 92be2cb commit 4c3e1e3

File tree

6 files changed

+143
-84
lines changed

6 files changed

+143
-84
lines changed

llvm/include/llvm/MC/MCSchedule.h

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
#ifndef LLVM_MC_MCSCHEDULE_H
1515
#define LLVM_MC_MCSCHEDULE_H
1616

17-
#include "llvm/Config/llvm-config.h"
18-
#include "llvm/Support/DataTypes.h"
17+
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/MC/MCInstrDesc.h"
19+
#include "llvm/Support/ErrorHandling.h"
1920
#include <cassert>
21+
#include <optional>
2022

2123
namespace llvm {
2224

@@ -25,6 +27,7 @@ struct InstrItinerary;
2527
class MCSubtargetInfo;
2628
class MCInstrInfo;
2729
class MCInst;
30+
class MCInstrDesc;
2831
class InstrItineraryData;
2932

3033
/// Define a kind of processor resource that will be modeled by the scheduler.
@@ -369,9 +372,19 @@ struct MCSchedModel {
369372
const MCSchedClassDesc &SCDesc);
370373

371374
int computeInstrLatency(const MCSubtargetInfo &STI, unsigned SClass) const;
375+
372376
int computeInstrLatency(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
373377
const MCInst &Inst) const;
374378

379+
template <typename MCSubtargetInfo, typename MCInstrInfo,
380+
typename InstrItineraryData, typename MCInstOrMachineInstr>
381+
int computeInstrLatency(
382+
const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
383+
const MCInstOrMachineInstr &Inst,
384+
llvm::function_ref<const MCSchedClassDesc *(const MCSchedClassDesc *)>
385+
ResolveVariantSchedClass =
386+
[](const MCSchedClassDesc *SCDesc) { return SCDesc; }) const;
387+
375388
// Returns the reciprocal throughput information from a MCSchedClassDesc.
376389
static double
377390
getReciprocalThroughput(const MCSubtargetInfo &STI,
@@ -393,6 +406,54 @@ struct MCSchedModel {
393406
static const MCSchedModel Default;
394407
};
395408

409+
// The first three are only template'd arguments so we can get away with leaving
410+
// them as incomplete types below. The third is a template over
411+
// MCInst/MachineInstr so as to avoid a layering violation here that would make
412+
// the MC layer depend on CodeGen.
413+
template <typename MCSubtargetInfo, typename MCInstrInfo,
414+
typename InstrItineraryData, typename MCInstOrMachineInstr>
415+
int MCSchedModel::computeInstrLatency(
416+
const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
417+
const MCInstOrMachineInstr &Inst,
418+
llvm::function_ref<const MCSchedClassDesc *(const MCSchedClassDesc *)>
419+
ResolveVariantSchedClass) const {
420+
static const int NoInformationAvailable = -1;
421+
// Check if we have a scheduling model for instructions.
422+
if (!hasInstrSchedModel()) {
423+
// Try to fall back to the itinerary model if the scheduling model doesn't
424+
// have a scheduling table. Note the default does not have a table.
425+
426+
llvm::StringRef CPU = STI.getCPU();
427+
428+
// Check if we have a CPU to get the itinerary information.
429+
if (CPU.empty())
430+
return NoInformationAvailable;
431+
432+
// Get itinerary information.
433+
InstrItineraryData IID = STI.getInstrItineraryForCPU(CPU);
434+
// Get the scheduling class of the requested instruction.
435+
const MCInstrDesc &Desc = MCII.get(Inst.getOpcode());
436+
unsigned SCClass = Desc.getSchedClass();
437+
438+
unsigned Latency = 0;
439+
440+
for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx)
441+
if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
442+
Latency = std::max(Latency, *OperCycle);
443+
444+
return int(Latency);
445+
}
446+
447+
unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
448+
const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
449+
SCDesc = ResolveVariantSchedClass(SCDesc);
450+
451+
if (!SCDesc || !SCDesc->isValid())
452+
return NoInformationAvailable;
453+
454+
return MCSchedModel::computeInstrLatency(STI, *SCDesc);
455+
}
456+
396457
} // namespace llvm
397458

398459
#endif

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
#include "llvm/MC/MCDirectives.h"
9292
#include "llvm/MC/MCExpr.h"
9393
#include "llvm/MC/MCInst.h"
94+
#include "llvm/MC/MCSchedule.h"
9495
#include "llvm/MC/MCSection.h"
9596
#include "llvm/MC/MCSectionCOFF.h"
9697
#include "llvm/MC/MCSectionELF.h"
@@ -165,6 +166,13 @@ static cl::opt<bool> EmitJumpTableSizesSection(
165166
cl::desc("Emit a section containing jump table addresses and sizes"),
166167
cl::Hidden, cl::init(false));
167168

169+
// This isn't turned on by default, since several of the scheduling models are
170+
// not completely accurate, and we don't want to be misleading.
171+
static cl::opt<bool> PrintLatency(
172+
"asm-print-latency",
173+
cl::desc("Print instruction latencies as verbose asm comments"), cl::Hidden,
174+
cl::init(false));
175+
168176
STATISTIC(EmittedInsts, "Number of machine instrs printed");
169177

170178
char AsmPrinter::ID = 0;
@@ -1084,7 +1092,8 @@ void AsmPrinter::emitFunctionEntryLabel() {
10841092
}
10851093

10861094
/// emitComments - Pretty-print comments for instructions.
1087-
static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
1095+
static void emitComments(const MachineInstr &MI, const MCSubtargetInfo *STI,
1096+
raw_ostream &CommentOS) {
10881097
const MachineFunction *MF = MI.getMF();
10891098
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
10901099

@@ -1112,6 +1121,17 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
11121121
// Check for spill-induced copies
11131122
if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
11141123
CommentOS << " Reload Reuse\n";
1124+
1125+
if (PrintLatency) {
1126+
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1127+
const MCSchedModel &SCModel = STI->getSchedModel();
1128+
int Latency = SCModel.computeInstrLatency<MCSubtargetInfo, MCInstrInfo,
1129+
InstrItineraryData, MachineInstr>(
1130+
*STI, *TII, MI);
1131+
// Report only interesting latencies.
1132+
if (1 < Latency)
1133+
CommentOS << " Latency: " << Latency << "\n";
1134+
}
11151135
}
11161136

11171137
/// emitImplicitDef - This method emits the specified machine instruction
@@ -1772,6 +1792,12 @@ void AsmPrinter::emitFunctionBody() {
17721792
int NumInstsInFunction = 0;
17731793
bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
17741794

1795+
const MCSubtargetInfo *STI = nullptr;
1796+
if (this->MF)
1797+
STI = &getSubtargetInfo();
1798+
else
1799+
STI = TM.getMCSubtargetInfo();
1800+
17751801
bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
17761802
// Create a slot for the entry basic block section so that the section
17771803
// order is preserved when iterating over MBBSectionRanges.
@@ -1801,7 +1827,7 @@ void AsmPrinter::emitFunctionBody() {
18011827
Handler->beginInstruction(&MI);
18021828

18031829
if (isVerbose())
1804-
emitComments(MI, OutStreamer->getCommentOS());
1830+
emitComments(MI, STI, OutStreamer->getCommentOS());
18051831

18061832
switch (MI.getOpcode()) {
18071833
case TargetOpcode::CFI_INSTRUCTION:

llvm/lib/MC/MCDisassembler/Disassembler.cpp

Lines changed: 4 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -162,74 +162,13 @@ static void emitComments(LLVMDisasmContext *DC,
162162
DC->CommentsToEmit.clear();
163163
}
164164

165-
/// Gets latency information for \p Inst from the itinerary
166-
/// scheduling model, based on \p DC information.
167-
/// \return The maximum expected latency over all the operands or -1
168-
/// if no information is available.
169-
static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
170-
const int NoInformationAvailable = -1;
171-
172-
// Check if we have a CPU to get the itinerary information.
173-
if (DC->getCPU().empty())
174-
return NoInformationAvailable;
175-
176-
// Get itinerary information.
177-
const MCSubtargetInfo *STI = DC->getSubtargetInfo();
178-
InstrItineraryData IID = STI->getInstrItineraryForCPU(DC->getCPU());
179-
// Get the scheduling class of the requested instruction.
180-
const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
181-
unsigned SCClass = Desc.getSchedClass();
182-
183-
unsigned Latency = 0;
184-
185-
for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx)
186-
if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
187-
Latency = std::max(Latency, *OperCycle);
188-
189-
return (int)Latency;
190-
}
191-
192-
/// Gets latency information for \p Inst, based on \p DC information.
193-
/// \return The maximum expected latency over all the definitions or -1
194-
/// if no information is available.
195-
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
196-
// Try to compute scheduling information.
197-
const MCSubtargetInfo *STI = DC->getSubtargetInfo();
198-
const MCSchedModel SCModel = STI->getSchedModel();
199-
const int NoInformationAvailable = -1;
200-
201-
// Check if we have a scheduling model for instructions.
202-
if (!SCModel.hasInstrSchedModel())
203-
// Try to fall back to the itinerary model if the scheduling model doesn't
204-
// have a scheduling table. Note the default does not have a table.
205-
return getItineraryLatency(DC, Inst);
206-
207-
// Get the scheduling class of the requested instruction.
208-
const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode());
209-
unsigned SCClass = Desc.getSchedClass();
210-
const MCSchedClassDesc *SCDesc = SCModel.getSchedClassDesc(SCClass);
211-
// Resolving the variant SchedClass requires an MI to pass to
212-
// SubTargetInfo::resolveSchedClass.
213-
if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant())
214-
return NoInformationAvailable;
215-
216-
// Compute output latency.
217-
int16_t Latency = 0;
218-
for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
219-
DefIdx != DefEnd; ++DefIdx) {
220-
// Lookup the definition's write latency in SubtargetInfo.
221-
const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc,
222-
DefIdx);
223-
Latency = std::max(Latency, WLEntry->Cycles);
224-
}
225-
226-
return Latency;
227-
}
228-
229165
/// Emits latency information in DC->CommentStream for \p Inst, based
230166
/// on the information available in \p DC.
231167
static void emitLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
232-
int Latency = getLatency(DC, Inst);
168+
const MCSubtargetInfo *STI = DC->getSubtargetInfo();
169+
const MCInstrInfo *MCII = DC->getInstrInfo();
170+
const MCSchedModel &SCModel = STI->getSchedModel();
171+
int Latency = SCModel.computeInstrLatency(*STI, *MCII, Inst);
233172

234173
// Report only interesting latencies.
235174
if (Latency < 2)

llvm/lib/MC/MCSchedule.cpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,28 @@ int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
6969
int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
7070
const MCInstrInfo &MCII,
7171
const MCInst &Inst) const {
72-
unsigned SchedClass = MCII.get(Inst.getOpcode()).getSchedClass();
73-
const MCSchedClassDesc *SCDesc = getSchedClassDesc(SchedClass);
74-
if (!SCDesc->isValid())
75-
return 0;
76-
77-
unsigned CPUID = getProcessorID();
78-
while (SCDesc->isVariant()) {
79-
SchedClass = STI.resolveVariantSchedClass(SchedClass, &Inst, &MCII, CPUID);
80-
SCDesc = getSchedClassDesc(SchedClass);
81-
}
82-
83-
if (SchedClass)
84-
return MCSchedModel::computeInstrLatency(STI, *SCDesc);
85-
86-
llvm_unreachable("unsupported variant scheduling class");
72+
return MCSchedModel::computeInstrLatency<MCSubtargetInfo, MCInstrInfo,
73+
InstrItineraryData, MCInst>(
74+
STI, MCII, Inst,
75+
[&](const MCSchedClassDesc *SCDesc) -> const MCSchedClassDesc * {
76+
if (!SCDesc->isValid())
77+
return nullptr;
78+
79+
unsigned CPUID = getProcessorID();
80+
unsigned SchedClass = 0;
81+
while (SCDesc->isVariant()) {
82+
SchedClass =
83+
STI.resolveVariantSchedClass(SchedClass, &Inst, &MCII, CPUID);
84+
SCDesc = getSchedClassDesc(SchedClass);
85+
}
86+
87+
if (!SchedClass) {
88+
assert(false && "unsupported variant scheduling class");
89+
return nullptr;
90+
}
91+
92+
return SCDesc;
93+
});
8794
}
8895

8996
double

llvm/test/CodeGen/AArch64/latency.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=1 | FileCheck %s --match-full-lines --check-prefix=ON
2+
; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=0 | FileCheck %s --match-full-lines --check-prefix=OFF
3+
; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone | FileCheck %s --match-full-lines --check-prefix=OFF
4+
5+
define <4 x i64> @load_v4i64(ptr %ptr){
6+
; ON: ldp q0, q1, [x0] ; Latency: 4
7+
; OFF: ldp q0, q1, [x0]
8+
%a = load <4 x i64>, ptr %ptr
9+
ret <4 x i64> %a
10+
}

llvm/test/CodeGen/ARM/latency.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; RUN: llc -mtriple=thumb-none-eabi %s -o - -mcpu=cortex-m0 -asm-print-latency=1 | FileCheck %s --match-full-lines --check-prefix=ON
2+
; RUN: llc -mtriple=thumb-none-eabi %s -o - -mcpu=cortex-m0 -asm-print-latency=0 | FileCheck %s --match-full-lines --check-prefix=OFF
3+
; RUN: llc -mtriple=thumb-none-eabi %s -o - -mcpu=cortex-m0 | FileCheck %s --match-full-lines --check-prefix=OFF
4+
5+
define i64 @load_i64(ptr %ptr){
6+
; ON: ldr r2, [r0] @ Latency: 4
7+
; ON: ldr r1, [r0, #4] @ Latency: 4
8+
; ON: mov r0, r2 @ Latency: 2
9+
; ON: bx lr
10+
; OFF: ldr r2, [r0]
11+
; OFF: ldr r1, [r0, #4]
12+
; OFF: mov r0, r2
13+
; OFf: bx lr
14+
%a = load i64, ptr %ptr
15+
ret i64 %a
16+
}

0 commit comments

Comments
 (0)