Skip to content

[clang][bytecode] Print jump lines in Function::dump() #135482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 13, 2025

Conversation

tbaederr
Copy link
Contributor

E.g. for

constexpr int foo(int b) {
  int a = 1+1;

  for (int i = 0; i < b; ++i) {
    ++a;
  }
  return a;
}

we now print:

foo 0x7cc8d4bf0580
frame size: 128
arg size:   8
rvo:        0
this arg:   0
0      InitScope         0
16     ConstSint32       1
32     ConstSint32       1
48     AddSint32
56     SetLocalSint32    40
72     ConstSint32       0
88     SetLocalSint32    104
104    GetPtrLocal       104         <-+
120    LoadPopSint32                   |
128    GetPtrParam       0             |
144    LoadPopSint32                   |
152    LTSint32                        |
160    Jf                80     --+    |
176    GetPtrLocal       40       |    |
192    IncPopSint32      1        |    |
208    GetPtrLocal       104      |    |
224    IncPopSint32      1        |    |
240    Jmp               -152     |  --+
256    GetPtrLocal       40     <-+
272    LoadPopSint32
280    Destroy           0
296    RetSint32
304    Destroy           0
320    NoRet

E.g. for
```c++
constexpr int foo(int b) {
  int a = 1+1;

  for (int i = 0; i < b; ++i) {
    ++a;
  }
  return a;
}
```

we now print:
```
foo 0x7cc8d4bf0580
frame size: 128
arg size:   8
rvo:        0
this arg:   0
0      InitScope         0
16     ConstSint32       1
32     ConstSint32       1
48     AddSint32
56     SetLocalSint32    40
72     ConstSint32       0
88     SetLocalSint32    104
104    GetPtrLocal       104         <-+
120    LoadPopSint32                   |
128    GetPtrParam       0             |
144    LoadPopSint32                   |
152    LTSint32                        |
160    Jf                80     --+    |
176    GetPtrLocal       40       |    |
192    IncPopSint32      1        |    |
208    GetPtrLocal       104      |    |
224    IncPopSint32      1        |    |
240    Jmp               -152     |  --+
256    GetPtrLocal       40     <-+
272    LoadPopSint32
280    Destroy           0
296    RetSint32
304    Destroy           0
320    NoRet
```
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Apr 12, 2025
@llvmbot
Copy link
Member

llvmbot commented Apr 12, 2025

@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)

Changes

E.g. for

constexpr int foo(int b) {
  int a = 1+1;

  for (int i = 0; i &lt; b; ++i) {
    ++a;
  }
  return a;
}

we now print:

foo 0x7cc8d4bf0580
frame size: 128
arg size:   8
rvo:        0
this arg:   0
0      InitScope         0
16     ConstSint32       1
32     ConstSint32       1
48     AddSint32
56     SetLocalSint32    40
72     ConstSint32       0
88     SetLocalSint32    104
104    GetPtrLocal       104         &lt;-+
120    LoadPopSint32                   |
128    GetPtrParam       0             |
144    LoadPopSint32                   |
152    LTSint32                        |
160    Jf                80     --+    |
176    GetPtrLocal       40       |    |
192    IncPopSint32      1        |    |
208    GetPtrLocal       104      |    |
224    IncPopSint32      1        |    |
240    Jmp               -152     |  --+
256    GetPtrLocal       40     &lt;-+
272    LoadPopSint32
280    Destroy           0
296    RetSint32
304    Destroy           0
320    NoRet

Full diff: https://github.com/llvm/llvm-project/pull/135482.diff

2 Files Affected:

  • (modified) clang/lib/AST/ByteCode/Disasm.cpp (+152-25)
  • (modified) clang/utils/TableGen/ClangOpcodesEmitter.cpp (+5-9)
diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp
index 12c434029562d..d4c9ce6050b85 100644
--- a/clang/lib/AST/ByteCode/Disasm.cpp
+++ b/clang/lib/AST/ByteCode/Disasm.cpp
@@ -33,39 +33,74 @@
 using namespace clang;
 using namespace clang::interp;
 
-template <typename T> inline static T ReadArg(Program &P, CodePtr &OpPC) {
+template <typename T>
+inline static std::string printArg(Program &P, CodePtr &OpPC) {
   if constexpr (std::is_pointer_v<T>) {
     uint32_t ID = OpPC.read<uint32_t>();
-    return reinterpret_cast<T>(P.getNativePointer(ID));
+    std::string Result;
+    llvm::raw_string_ostream SS(Result);
+    SS << reinterpret_cast<T>(P.getNativePointer(ID));
+    return Result;
   } else {
-    return OpPC.read<T>();
+    std::string Result;
+    llvm::raw_string_ostream SS(Result);
+    auto Arg = OpPC.read<T>();
+    SS << Arg;
+    return Result;
   }
 }
 
-template <> inline Floating ReadArg<Floating>(Program &P, CodePtr &OpPC) {
-  Floating F = Floating::deserialize(*OpPC);
+template <> inline std::string printArg<Floating>(Program &P, CodePtr &OpPC) {
+  auto F = Floating::deserialize(*OpPC);
   OpPC += align(F.bytesToSerialize());
-  return F;
+
+  std::string Result;
+  llvm::raw_string_ostream SS(Result);
+  SS << F;
+  return Result;
 }
 
 template <>
-inline IntegralAP<false> ReadArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) {
-  IntegralAP<false> I = IntegralAP<false>::deserialize(*OpPC);
-  OpPC += align(I.bytesToSerialize());
-  return I;
-}
+inline std::string printArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) {
+  auto F = IntegralAP<false>::deserialize(*OpPC);
+  OpPC += align(F.bytesToSerialize());
 
+  std::string Result;
+  llvm::raw_string_ostream SS(Result);
+  SS << F;
+  return Result;
+}
 template <>
-inline IntegralAP<true> ReadArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) {
-  IntegralAP<true> I = IntegralAP<true>::deserialize(*OpPC);
-  OpPC += align(I.bytesToSerialize());
-  return I;
+inline std::string printArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) {
+  auto F = IntegralAP<true>::deserialize(*OpPC);
+  OpPC += align(F.bytesToSerialize());
+
+  std::string Result;
+  llvm::raw_string_ostream SS(Result);
+  SS << F;
+  return Result;
 }
 
-template <> inline FixedPoint ReadArg<FixedPoint>(Program &P, CodePtr &OpPC) {
-  FixedPoint I = FixedPoint::deserialize(*OpPC);
-  OpPC += align(I.bytesToSerialize());
-  return I;
+template <> inline std::string printArg<FixedPoint>(Program &P, CodePtr &OpPC) {
+  auto F = FixedPoint::deserialize(*OpPC);
+  OpPC += align(F.bytesToSerialize());
+
+  std::string Result;
+  llvm::raw_string_ostream SS(Result);
+  SS << F;
+  return Result;
+}
+
+static bool isJumpOpcode(Opcode Op) {
+  return Op == OP_Jmp || Op == OP_Jf || Op == OP_Jt;
+}
+
+static size_t getNumDisplayWidth(size_t N) {
+  unsigned L = 1u, M = 10u;
+  while (M <= N && ++L != std::numeric_limits<size_t>::digits10 + 1)
+    M *= 10u;
+
+  return L;
 }
 
 LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); }
@@ -80,23 +115,115 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const {
   OS << "rvo:        " << hasRVO() << "\n";
   OS << "this arg:   " << hasThisPointer() << "\n";
 
-  auto PrintName = [&OS](const char *Name) {
-    OS << Name;
-    long N = 30 - strlen(Name);
-    if (N > 0)
-      OS.indent(N);
+  struct OpText {
+    size_t Addr;
+    std::string Op;
+    bool IsJump;
+    llvm::SmallVector<std::string> Args;
   };
 
+  auto PrintName = [](const char *Name) -> std::string {
+    return std::string(Name);
+  };
+
+  llvm::SmallVector<OpText> Code;
+  size_t LongestAddr = 0;
+  size_t LongestOp = 0;
+
   for (CodePtr Start = getCodeBegin(), PC = Start; PC != getCodeEnd();) {
     size_t Addr = PC - Start;
+    OpText Text;
     auto Op = PC.read<Opcode>();
-    OS << llvm::format("%8d", Addr) << " ";
+    Text.Addr = Addr;
+    Text.IsJump = isJumpOpcode(Op);
     switch (Op) {
 #define GET_DISASM
 #include "Opcodes.inc"
 #undef GET_DISASM
     }
+    Code.push_back(Text);
+    LongestOp = std::max(Text.Op.size(), LongestOp);
+    LongestAddr = std::max(getNumDisplayWidth(Addr), LongestAddr);
   }
+
+  // Record jumps and their targets.
+  struct JmpData {
+    size_t From;
+    size_t To;
+  };
+  llvm::SmallVector<JmpData> Jumps;
+  for (auto &Text : Code) {
+    if (Text.IsJump)
+      Jumps.push_back({Text.Addr, Text.Addr + std::stoi(Text.Args[0]) +
+                                      align(sizeof(Opcode)) +
+                                      align(sizeof(int32_t))});
+  }
+
+  llvm::SmallVector<std::string> Text;
+  Text.reserve(Code.size());
+  size_t LongestLine = 0;
+  // Print code to a string, one at a time.
+  for (auto C : Code) {
+    std::string Line;
+    llvm::raw_string_ostream LS(Line);
+    LS << C.Addr;
+    LS.indent(LongestAddr - getNumDisplayWidth(C.Addr) + 4);
+    LS << C.Op;
+    LS.indent(LongestOp - C.Op.size() + 4);
+    for (auto &Arg : C.Args) {
+      LS << Arg << ' ';
+    }
+    Text.push_back(Line);
+    LongestLine = std::max(Line.size(), LongestLine);
+  }
+
+  assert(Code.size() == Text.size());
+
+  auto spaces = [](unsigned N) -> std::string {
+    std::string S;
+    for (unsigned I = 0; I != N; ++I)
+      S += ' ';
+    return S;
+  };
+
+  // Now, draw the jump lines.
+  for (auto &J : Jumps) {
+    if (J.To > J.From) {
+      bool FoundStart = false;
+      for (size_t LineIndex = 0; LineIndex != Text.size(); ++LineIndex) {
+        Text[LineIndex] += spaces(LongestLine - Text[LineIndex].size());
+
+        if (Code[LineIndex].Addr == J.From) {
+          Text[LineIndex] += "  --+";
+          FoundStart = true;
+        } else if (Code[LineIndex].Addr == J.To) {
+          Text[LineIndex] += "  <-+";
+          break;
+        } else if (FoundStart) {
+          Text[LineIndex] += "    |";
+        }
+      }
+      LongestLine += 5;
+    } else {
+      bool FoundStart = false;
+      for (ssize_t LineIndex = Text.size() - 1; LineIndex >= 0; --LineIndex) {
+        Text[LineIndex] += spaces(LongestLine - Text[LineIndex].size());
+        if (Code[LineIndex].Addr == J.From) {
+          Text[LineIndex] += "  --+";
+          FoundStart = true;
+        } else if (Code[LineIndex].Addr == J.To) {
+          Text[LineIndex] += "  <-+";
+          break;
+        } else if (FoundStart) {
+          Text[LineIndex] += "    |";
+        }
+      }
+      LongestLine += 5;
+    }
+  }
+
+  for (auto &Line : Text)
+    OS << Line << '\n';
 }
 
 LLVM_DUMP_METHOD void Program::dump() const { dump(llvm::errs()); }
diff --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
index 64534a50877ec..5d6d90994cf37 100644
--- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp
+++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
@@ -171,16 +171,12 @@ void ClangOpcodesEmitter::EmitDisasm(raw_ostream &OS, StringRef N,
   OS << "#ifdef GET_DISASM\n";
   Enumerate(R, N, [R, &OS](ArrayRef<const Record *>, const Twine &ID) {
     OS << "case OP_" << ID << ":\n";
-    OS << "  PrintName(\"" << ID << "\");\n";
-    OS << "  OS << \"\\t\"";
+    OS << "  Text.Op = PrintName(\"" << ID << "\");\n";
+    for (const auto *Arg : R->getValueAsListOfDefs("Args"))
+      OS << "  Text.Args.push_back(printArg<" << Arg->getValueAsString("Name")
+         << ">(P, PC));\n";
 
-    for (const auto *Arg : R->getValueAsListOfDefs("Args")) {
-      OS << " << ReadArg<" << Arg->getValueAsString("Name") << ">(P, PC)";
-      OS << " << \" \"";
-    }
-
-    OS << " << \"\\n\";\n";
-    OS << "  continue;\n";
+    OS << "  break;\n";
   });
   OS << "#endif\n";
 }

@tbaederr tbaederr merged commit 578ca5e into llvm:main Apr 13, 2025
14 checks passed
var-const pushed a commit to ldionne/llvm-project that referenced this pull request Apr 17, 2025
E.g. for
```c++
constexpr int foo(int b) {
  int a = 1+1;

  for (int i = 0; i < b; ++i) {
    ++a;
  }
  return a;
}
```

we now print:
```
foo 0x7cc8d4bf0580
frame size: 128
arg size:   8
rvo:        0
this arg:   0
0      InitScope         0
16     ConstSint32       1
32     ConstSint32       1
48     AddSint32
56     SetLocalSint32    40
72     ConstSint32       0
88     SetLocalSint32    104
104    GetPtrLocal       104         <-+
120    LoadPopSint32                   |
128    GetPtrParam       0             |
144    LoadPopSint32                   |
152    LTSint32                        |
160    Jf                80     --+    |
176    GetPtrLocal       40       |    |
192    IncPopSint32      1        |    |
208    GetPtrLocal       104      |    |
224    IncPopSint32      1        |    |
240    Jmp               -152     |  --+
256    GetPtrLocal       40     <-+
272    LoadPopSint32
280    Destroy           0
296    RetSint32
304    Destroy           0
320    NoRet
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants