-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[llvm][AsmPrinter] Add an option to print instruction latencies #113243
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-aarch64 Author: Jon Roelofs (jroelofs) Changes... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading. Full diff: https://github.com/llvm/llvm-project/pull/113243.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 327e7f7f8a1ed8..015c4cc3d4b721 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -162,6 +162,13 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
+// This isn't turned on by default, since several of the scheduling models are
+// not completely accurate, and we don't want to be misleading.
+static cl::opt<bool> PrintLatency(
+ "asm-print-latency",
+ cl::desc("Print instruction latencies as verbose asm comments."),
+ cl::Hidden, cl::init(false));
+
STATISTIC(EmittedInsts, "Number of machine instrs printed");
char AsmPrinter::ID = 0;
@@ -1080,8 +1087,78 @@ void AsmPrinter::emitFunctionEntryLabel() {
}
}
+/// Gets latency information for \p Inst from the itinerary
+/// scheduling model.
+/// \return The maximum expected latency over all the operands or -1
+/// if no information is available.
+static int getItineraryLatency(const MachineInstr &MI,
+ const MachineFunction *MF,
+ const MCSubtargetInfo *STI) {
+ const int NoInformationAvailable = -1;
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ // Check if we have a CPU to get the itinerary information.
+ if (STI->getCPU().empty())
+ return NoInformationAvailable;
+
+ // Get itinerary information.
+ InstrItineraryData IID = STI->getInstrItineraryForCPU(STI->getCPU());
+ // Get the scheduling class of the requested instruction.
+ const MCInstrDesc &Desc = TII->get(MI.getOpcode());
+ unsigned SCClass = Desc.getSchedClass();
+
+ unsigned Latency = 0;
+
+ for (unsigned Idx = 0, IdxEnd = MI.getNumOperands(); Idx != IdxEnd; ++Idx)
+ if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
+ Latency = std::max(Latency, *OperCycle);
+
+ return (int)Latency;
+}
+
+/// Gets latency information for \p Inst.
+/// \return The maximum expected latency over all the definitions or -1
+/// if no information is available.
+static int getLatency(const MachineInstr &MI, const MCSubtargetInfo *STI) {
+ const MCSchedModel SCModel = STI->getSchedModel();
+ const int NoInformationAvailable = -1;
+
+ const MachineFunction *MF = MI.getMF();
+ if (!MF)
+ return NoInformationAvailable;
+
+ // Check if we have a scheduling model for instructions.
+ if (!SCModel.hasInstrSchedModel())
+ // Try to fall back to the itinerary model if the scheduling model doesn't
+ // have a scheduling table. Note the default does not have a table.
+ return getItineraryLatency(MI, MF, STI);
+
+ // Get the scheduling class of the requested instruction.
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const MCInstrDesc &Desc = TII->get(MI.getOpcode());
+ unsigned SCClass = Desc.getSchedClass();
+ const MCSchedClassDesc *SCDesc = SCModel.getSchedClassDesc(SCClass);
+ // Resolving the variant SchedClass requires an MI to pass to
+ // SubTargetInfo::resolveSchedClass.
+ if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant())
+ return NoInformationAvailable;
+
+ // Compute output latency.
+ int16_t Latency = 0;
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ Latency = std::max(Latency, WLEntry->Cycles);
+ }
+
+ return Latency;
+}
+
/// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+static void emitComments(const MachineInstr &MI, const MCSubtargetInfo *STI,
+ raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -1109,6 +1186,13 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// Check for spill-induced copies
if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
CommentOS << " Reload Reuse\n";
+
+ if (PrintLatency) {
+ int Latency = getLatency(MI, STI);
+ // Report only interesting latencies.
+ if (1 < Latency)
+ CommentOS << " Latency: " << Latency << "\n";
+ }
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -1750,6 +1834,12 @@ void AsmPrinter::emitFunctionBody() {
int NumInstsInFunction = 0;
bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
+ const MCSubtargetInfo *STI = nullptr;
+ if (this->MF)
+ STI = &getSubtargetInfo();
+ else
+ STI = TM.getMCSubtargetInfo();
+
bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
for (auto &MBB : *MF) {
// Print a label for the basic block.
@@ -1773,7 +1863,7 @@ void AsmPrinter::emitFunctionBody() {
Handler->beginInstruction(&MI);
if (isVerbose())
- emitComments(MI, OutStreamer->getCommentOS());
+ emitComments(MI, STI, OutStreamer->getCommentOS());
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
diff --git a/llvm/test/CodeGen/AArch64/latency.ll b/llvm/test/CodeGen/AArch64/latency.ll
new file mode 100644
index 00000000000000..b722eec3e2571a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/latency.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=1 | FileCheck %s --match-full-lines --check-prefix=ON
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=0 | FileCheck %s --match-full-lines --check-prefix=OFF
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone | FileCheck %s --match-full-lines --check-prefix=OFF
+
+define <4 x i64> @load_v4i64(ptr %ptr){
+; ON: ldp q0, q1, [x0] ; Latency: 4
+; OFF: ldp q0, q1, [x0]
+ %a = load <4 x i64>, ptr %ptr
+ ret <4 x i64> %a
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to share the code with the existing implementation in Disassembler.cpp?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 on this
E.g., templatize the getLatency function on MachineInstr/MCInst and provide a functor to call SubTargetInfo::resolveSchedClass
.
I am not sure where to put the common thing after adding overloads for that: |
✅ With the latest revision this PR passed the C/C++ code formatter. |
The instantiation of the template can be in the right library, no? |
... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading.
7622e08
to
4482142
Compare
Ok, that works. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From what I can tell this LGTM. Cheers
... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading.