[llvm][AsmPrinter] Add an option to print instruction latencies #113243

jroelofs · 2024-10-22T00:57:16Z

... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading.

llvmbot · 2024-10-22T00:57:51Z

@llvm/pr-subscribers-mc
@llvm/pr-subscribers-backend-arm

@llvm/pr-subscribers-backend-aarch64

Author: Jon Roelofs (jroelofs)

Changes

... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading.

Full diff: https://github.com/llvm/llvm-project/pull/113243.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (+92-2)
(added) llvm/test/CodeGen/AArch64/latency.ll (+10)

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 327e7f7f8a1ed8..015c4cc3d4b721 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -162,6 +162,13 @@ static cl::opt<bool> EmitJumpTableSizesSection(
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
 
+// This isn't turned on by default, since several of the scheduling models are
+// not completely accurate, and we don't want to be misleading.
+static cl::opt<bool> PrintLatency(
+    "asm-print-latency",
+    cl::desc("Print instruction latencies as verbose asm comments."),
+    cl::Hidden, cl::init(false));
+
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 char AsmPrinter::ID = 0;
@@ -1080,8 +1087,78 @@ void AsmPrinter::emitFunctionEntryLabel() {
   }
 }
 
+/// Gets latency information for \p Inst from the itinerary
+/// scheduling model.
+/// \return The maximum expected latency over all the operands or -1
+/// if no information is available.
+static int getItineraryLatency(const MachineInstr &MI,
+                               const MachineFunction *MF,
+                               const MCSubtargetInfo *STI) {
+  const int NoInformationAvailable = -1;
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+  // Check if we have a CPU to get the itinerary information.
+  if (STI->getCPU().empty())
+    return NoInformationAvailable;
+
+  // Get itinerary information.
+  InstrItineraryData IID = STI->getInstrItineraryForCPU(STI->getCPU());
+  // Get the scheduling class of the requested instruction.
+  const MCInstrDesc &Desc = TII->get(MI.getOpcode());
+  unsigned SCClass = Desc.getSchedClass();
+
+  unsigned Latency = 0;
+
+  for (unsigned Idx = 0, IdxEnd = MI.getNumOperands(); Idx != IdxEnd; ++Idx)
+    if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx))
+      Latency = std::max(Latency, *OperCycle);
+
+  return (int)Latency;
+}
+
+/// Gets latency information for \p Inst.
+/// \return The maximum expected latency over all the definitions or -1
+/// if no information is available.
+static int getLatency(const MachineInstr &MI, const MCSubtargetInfo *STI) {
+  const MCSchedModel SCModel = STI->getSchedModel();
+  const int NoInformationAvailable = -1;
+
+  const MachineFunction *MF = MI.getMF();
+  if (!MF)
+    return NoInformationAvailable;
+
+  // Check if we have a scheduling model for instructions.
+  if (!SCModel.hasInstrSchedModel())
+    // Try to fall back to the itinerary model if the scheduling model doesn't
+    // have a scheduling table.  Note the default does not have a table.
+    return getItineraryLatency(MI, MF, STI);
+
+  // Get the scheduling class of the requested instruction.
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const MCInstrDesc &Desc = TII->get(MI.getOpcode());
+  unsigned SCClass = Desc.getSchedClass();
+  const MCSchedClassDesc *SCDesc = SCModel.getSchedClassDesc(SCClass);
+  // Resolving the variant SchedClass requires an MI to pass to
+  // SubTargetInfo::resolveSchedClass.
+  if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant())
+    return NoInformationAvailable;
+
+  // Compute output latency.
+  int16_t Latency = 0;
+  for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+       DefIdx != DefEnd; ++DefIdx) {
+    // Lookup the definition's write latency in SubtargetInfo.
+    const MCWriteLatencyEntry *WLEntry =
+        STI->getWriteLatencyEntry(SCDesc, DefIdx);
+    Latency = std::max(Latency, WLEntry->Cycles);
+  }
+
+  return Latency;
+}
+
 /// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+static void emitComments(const MachineInstr &MI, const MCSubtargetInfo *STI,
+                         raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getMF();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
@@ -1109,6 +1186,13 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   // Check for spill-induced copies
   if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
     CommentOS << " Reload Reuse\n";
+
+  if (PrintLatency) {
+    int Latency = getLatency(MI, STI);
+    // Report only interesting latencies.
+    if (1 < Latency)
+      CommentOS << " Latency: " << Latency << "\n";
+  }
 }
 
 /// emitImplicitDef - This method emits the specified machine instruction
@@ -1750,6 +1834,12 @@ void AsmPrinter::emitFunctionBody() {
   int NumInstsInFunction = 0;
   bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
 
+  const MCSubtargetInfo *STI = nullptr;
+  if (this->MF)
+    STI = &getSubtargetInfo();
+  else
+    STI = TM.getMCSubtargetInfo();
+
   bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
   for (auto &MBB : *MF) {
     // Print a label for the basic block.
@@ -1773,7 +1863,7 @@ void AsmPrinter::emitFunctionBody() {
         Handler->beginInstruction(&MI);
 
       if (isVerbose())
-        emitComments(MI, OutStreamer->getCommentOS());
+        emitComments(MI, STI, OutStreamer->getCommentOS());
 
       switch (MI.getOpcode()) {
       case TargetOpcode::CFI_INSTRUCTION:
diff --git a/llvm/test/CodeGen/AArch64/latency.ll b/llvm/test/CodeGen/AArch64/latency.ll
new file mode 100644
index 00000000000000..b722eec3e2571a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/latency.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=1 | FileCheck %s --match-full-lines --check-prefix=ON
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone -asm-print-latency=0 | FileCheck %s --match-full-lines --check-prefix=OFF
+; RUN: llc -mtriple=arm64-apple-ios %s -o - -mcpu=cyclone                      | FileCheck %s --match-full-lines --check-prefix=OFF
+
+define <4 x i64> @load_v4i64(ptr %ptr){
+; ON:     ldp q0, q1, [x0] ; Latency: 4
+; OFF:    ldp q0, q1, [x0]
+  %a = load <4 x i64>, ptr %ptr
+  ret <4 x i64> %a
+}

davemgreen

Would it be possible to share the code with the existing implementation in Disassembler.cpp?

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

qcolombet

#113243 (review)

+1 on this

E.g., templatize the getLatency function on MachineInstr/MCInst and provide a functor to call SubTargetInfo::resolveSchedClass.

jroelofs · 2024-10-23T02:00:54Z

#113243 (review)

+1 on this

E.g., templatize the getLatency function on MachineInstr/MCInst and provide a functor to call SubTargetInfo::resolveSchedClass.

I am not sure where to put the common thing after adding overloads for that: MCSchedModel seems like the obvious place, but putting it there would be a layering violation, since it would cause MC to depend on CodeGen.

github-actions · 2024-10-23T02:27:38Z

✅ With the latest revision this PR passed the C/C++ code formatter.

qcolombet · 2024-10-23T11:39:26Z

#113243 (review)

+1 on this
E.g., templatize the getLatency function on MachineInstr/MCInst and provide a functor to call SubTargetInfo::resolveSchedClass.

I am not sure where to put the common thing after adding overloads for that: MCSchedModel seems like the obvious place, but putting it there would be a layering violation, since it would cause MC to depend on CodeGen.

The instantiation of the template can be in the right library, no?
And the functor I was mentioning would be nullptr by default and only implemented in the CodeGen library.

... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading.

jroelofs · 2024-10-26T16:03:12Z

The instantiation of the template can be in the right library, no?
And the functor I was mentioning would be nullptr by default and only implemented in the CodeGen library.

Ok, that works.

davemgreen

From what I can tell this LGTM. Cheers

jroelofs requested review from qcolombet, cachemeifyoucan and dtellenbach October 22, 2024 00:57

llvmbot added the backend:AArch64 label Oct 22, 2024

davemgreen reviewed Oct 22, 2024

View reviewed changes

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Outdated Show resolved Hide resolved

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Outdated Show resolved Hide resolved

qcolombet reviewed Oct 22, 2024

View reviewed changes

llvmbot added backend:ARM mc Machine (object) code labels Oct 23, 2024

jroelofs added 10 commits October 26, 2024 08:18

[llvm][AsmPrinter] Add an option to print instruction latencies

49ed93d

... matching what we have in the disassembler. This isn't turned on by default since several of the scheduling models are not completely accurate, and we don't want to be misleading.

add a test for a cpu with itineraries

78da0e3

use MCSchedModel::computeInstrLatency

90cb604

simplify both implementations

071fa71

clang-format

3be57dd

make the two impls even closer to 'the same'

5af5faf

move disassembler's latency calculation into MCSchedule

5750339

unify getLatency between Disassembler / AsmPrinter

5f3369e

clang-format

8d2c37d

move variant sched class resolution into callback, per quentin's idea

4482142

jroelofs force-pushed the jroelofs/latencies branch from 7622e08 to 4482142 Compare October 26, 2024 15:58

cl::desc's shouldn't have full stops

bd1e4e8

clang-format

e0d27fa

davemgreen approved these changes Nov 4, 2024

View reviewed changes

jroelofs merged commit 4c3e1e3 into llvm:main Nov 6, 2024
6 of 8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[llvm][AsmPrinter] Add an option to print instruction latencies #113243

[llvm][AsmPrinter] Add an option to print instruction latencies #113243

Uh oh!

jroelofs commented Oct 22, 2024

Uh oh!

llvmbot commented Oct 22, 2024 •

edited

Loading

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

Uh oh!

qcolombet left a comment •

edited

Loading

Uh oh!

jroelofs commented Oct 23, 2024

Uh oh!

github-actions bot commented Oct 23, 2024 •

edited

Loading

Uh oh!

qcolombet commented Oct 23, 2024

Uh oh!

jroelofs commented Oct 26, 2024

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

Uh oh!

[llvm][AsmPrinter] Add an option to print instruction latencies #113243

[llvm][AsmPrinter] Add an option to print instruction latencies #113243

Uh oh!

Conversation

jroelofs commented Oct 22, 2024

Uh oh!

llvmbot commented Oct 22, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

qcolombet left a comment • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

jroelofs commented Oct 23, 2024

Uh oh!

github-actions bot commented Oct 23, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

qcolombet commented Oct 23, 2024

Uh oh!

jroelofs commented Oct 26, 2024

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Oct 22, 2024 •

edited

Loading

qcolombet left a comment •

edited

Loading

github-actions bot commented Oct 23, 2024 •

edited

Loading