[LLVM][DecoderEmitter] Add option to use function table in decodeToMCInst (#144814)

jurahul · web-flow · commit ed5f8f238d68 · 2025-06-24T18:49:05.000-07:00
Add option `use-fn-table-in-decode-to-mcinst` to use a table of function
pointers instead of a switch case in the generated `decodeToMCInst`
function.

When the number of switch cases in this function is large, the generated
code takes a long time to compile in release builds. Using a table of
function pointers instead improves the compile time significantly (~3x
speedup in compiling the code in a downstream target). This option will
allow targets to opt into this mode if they desire for better build
times.

Tested with `check-llvm-mc` with the option enabled by default.
diff --git a/llvm/test/TableGen/DecoderEmitterFnTable.td b/llvm/test/TableGen/DecoderEmitterFnTable.td
@@ -0,0 +1,84 @@
+// RUN: llvm-tblgen -gen-disassembler -use-fn-table-in-decode-to-mcinst -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+let Namespace = "arch" in {
+  def R0 : Register<"r0">;
+  def R1 : Register<"r1">;
+  def R2 : Register<"r2">;
+  def R3 : Register<"r3">;
+}
+def Regs : RegisterClass<"Regs", [i32], 32, (add R0, R1, R2, R3)>;
+
+class TestInstruction : Instruction {
+  let Size = 1;
+  let OutOperandList = (outs);
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+}
+
+// Define instructions to generate 4 cases in decodeToMCInst.
+// Lower 2 bits define the number of operands. Each register operand
+// needs 2 bits to encode.
+
+// An instruction with no inputs. Encoded with lower 2 bits = 0 and upper
+// 6 bits = 0 as well.
+def Inst0 : TestInstruction {
+  let Inst = 0x0;
+  let InOperandList = (ins);
+  let AsmString = "Inst0";
+}
+
+// An instruction with a single input. Encoded with lower 2 bits = 1 and the
+// single input in bits 2-3.
+def Inst1 : TestInstruction {
+  bits<2> r0;
+  let Inst{1-0} = 1;
+  let Inst{3-2} = r0;
+  let InOperandList = (ins Regs:$r0);
+  let AsmString = "Inst1";
+}
+
+// An instruction with two inputs. Encoded with lower 2 bits = 2 and the
+// inputs in bits 2-3 and 4-5.
+def Inst2 : TestInstruction {
+  bits<2> r0;
+  bits<2> r1;
+  let Inst{1-0} = 2;
+  let Inst{3-2} = r0;
+  let Inst{5-4} = r1;
+  let InOperandList = (ins Regs:$r0, Regs:$r1);
+  let AsmString = "Inst2";
+}
+
+// An instruction with three inputs. Encoded with lower 2 bits = 3 and the
+// inputs in bits 2-3 and 4-5 and 6-7.
+def Inst3 : TestInstruction {
+  bits<2> r0;
+  bits<2> r1;
+  bits<2> r2;
+  let Inst{1-0} = 3;
+  let Inst{3-2} = r0;
+  let Inst{5-4} = r1;
+  let Inst{7-6} = r2;
+  let InOperandList = (ins Regs:$r0, Regs:$r1, Regs:$r2);
+  let AsmString = "Inst3";
+}
+
+// CHECK-LABEL: DecodeStatus decodeFn0(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: DecodeStatus decodeFn1(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: DecodeStatus decodeFn2(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: DecodeStatus decodeFn3(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: decodeToMCInst(unsigned Idx, DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK: static constexpr DecodeFnTy decodeFnTable[]
+// CHECK-NEXT: decodeFn0,
+// CHECK-NEXT: decodeFn1,
+// CHECK-NEXT: decodeFn2,
+// CHECK-NEXT: decodeFn3,
+// CHECK: return decodeFnTable[Idx](S, insn, MI, Address, Decoder, DecodeComplete)
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -83,6 +83,14 @@ static cl::opt<bool> LargeTable(
              "in the table instead of the default 16 bits."),
     cl::init(false), cl::cat(DisassemblerEmitterCat));
 
+static cl::opt<bool> UseFnTableInDecodetoMCInst(
+    "use-fn-table-in-decode-to-mcinst",
+    cl::desc(
+        "Use a table of function pointers instead of a switch case in the\n"
+        "generated `decodeToMCInst` function. Helps improve compile time\n"
+        "of the generated code."),
+    cl::init(false), cl::cat(DisassemblerEmitterCat));
+
 STATISTIC(NumEncodings, "Number of encodings considered");
 STATISTIC(NumEncodingsLackingDisasm,
           "Number of encodings without disassembler info");
@@ -1066,31 +1074,67 @@ void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
                                          DecoderSet &Decoders,
                                          indent Indent) const {
-  // The decoder function is just a big switch statement based on the
-  // input decoder index.
-  OS << Indent << "template <typename InsnType>\n";
-  OS << Indent << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
-     << " unsigned Idx, InsnType insn, MCInst &MI,\n";
-  OS << Indent << "                                   uint64_t "
-     << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n";
-  Indent += 2;
-  OS << Indent << "DecodeComplete = true;\n";
+  // The decoder function is just a big switch statement or a table of function
+  // pointers based on the input decoder index.
+
   // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
   // It would be better for emitBinaryParser to use a 64-bit tmp whenever
   // possible but fall back to an InsnType-sized tmp for truly large fields.
-  OS << Indent
-     << "using TmpType = "
-        "std::conditional_t<std::is_integral<InsnType>::"
-        "value, InsnType, uint64_t>;\n";
-  OS << Indent << "TmpType tmp;\n";
-  OS << Indent << "switch (Idx) {\n";
-  OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
-  for (const auto &[Index, Decoder] : enumerate(Decoders)) {
-    OS << Indent << "case " << Index << ":\n";
-    OS << Decoder;
-    OS << Indent + 2 << "return S;\n";
+  StringRef TmpTypeDecl =
+      "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
+      "InsnType, uint64_t>;\n";
+  StringRef DecodeParams =
+      "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
+      "MCDisassembler *Decoder, bool &DecodeComplete";
+
+  if (UseFnTableInDecodetoMCInst) {
+    // Emit a function for each case first.
+    for (const auto &[Index, Decoder] : enumerate(Decoders)) {
+      OS << Indent << "template <typename InsnType>\n";
+      OS << Indent << "DecodeStatus decodeFn" << Index << "(" << DecodeParams
+         << ") {\n";
+      Indent += 2;
+      OS << Indent << TmpTypeDecl;
+      OS << Indent << "[[maybe_unused]] TmpType tmp;\n";
+      OS << Decoder;
+      OS << Indent << "return S;\n";
+      Indent -= 2;
+      OS << Indent << "}\n\n";
+    }
+  }
+
+  OS << Indent << "// Handling " << Decoders.size() << " cases.\n";
+  OS << Indent << "template <typename InsnType>\n";
+  OS << Indent << "static DecodeStatus decodeToMCInst(unsigned Idx, "
+     << DecodeParams << ") {\n";
+  Indent += 2;
+  OS << Indent << "DecodeComplete = true;\n";
+
+  if (UseFnTableInDecodetoMCInst) {
+    // Build a table of function pointers.
+    OS << Indent << "using DecodeFnTy = DecodeStatus (*)(" << DecodeParams
+       << ");\n";
+    OS << Indent << "static constexpr DecodeFnTy decodeFnTable[] = {\n";
+    for (size_t Index : llvm::seq(Decoders.size()))
+      OS << Indent + 2 << "decodeFn" << Index << ",\n";
+    OS << Indent << "};\n";
+    OS << Indent << "if (Idx >= " << Decoders.size() << ")\n";
+    OS << Indent + 2 << "llvm_unreachable(\"Invalid index!\");\n";
+    OS << Indent
+       << "return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
+          "DecodeComplete);\n";
+  } else {
+    OS << Indent << TmpTypeDecl;
+    OS << Indent << "TmpType tmp;\n";
+    OS << Indent << "switch (Idx) {\n";
+    OS << Indent << "default: llvm_unreachable(\"Invalid index!\");\n";
+    for (const auto &[Index, Decoder] : enumerate(Decoders)) {
+      OS << Indent << "case " << Index << ":\n";
+      OS << Decoder;
+      OS << Indent + 2 << "return S;\n";
+    }
+    OS << Indent << "}\n";
   }
-  OS << Indent << "}\n";
   Indent -= 2;
   OS << Indent << "}\n";
 }
@@ -1267,7 +1311,8 @@ std::pair<unsigned, bool> FilterChooser::getDecoderIndex(DecoderSet &Decoders,
   // FIXME: emitDecoder() function can take a buffer directly rather than
   // a stream.
   raw_svector_ostream S(Decoder);
-  bool HasCompleteDecoder = emitDecoder(S, indent(4), Opc);
+  indent Indent(UseFnTableInDecodetoMCInst ? 2 : 4);
+  bool HasCompleteDecoder = emitDecoder(S, Indent, Opc);
 
   // Using the full decoder string as the key value here is a bit
   // heavyweight, but is effective. If the string comparisons become a
@@ -2371,7 +2416,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
        << "      makeUp(insn, Len);";
   }
   OS << R"(
-      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);
+      S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm, DecodeComplete);
       assert(DecodeComplete);
 
       LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
@@ -2393,7 +2438,7 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       MCInst TmpMI;
       TmpMI.setOpcode(Opc);
       bool DecodeComplete;
-      S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);
+      S = decodeToMCInst(DecodeIdx, S, insn, TmpMI, Address, DisAsm, DecodeComplete);
       LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
                    << ", using decoder " << DecodeIdx << ": ");