Skip to content

Commit c6ff809

Browse files
authored
[llvm-mc] Add --hex to disassemble hex bytes
`--disassemble`/`--cdis` parses input bytes as decimal, 0bbin, 0ooct, or 0xhex. While the hexadecimal digit form is most commonly used, requiring a 0x prefix for each byte (`0x48 0x29 0xc3`) is cumbersome. Tools like xxd -p and rz-asm use a plain hex dump form without the 0x prefix or space separator. This patch adds --hex to disassemble such hex bytes with optional whitespace. ``` % rz-asm -a x86 -b 64 -d 4829c34829c4 sub rbx, rax sub rsp, rax % llvm-mc -triple=x86_64 --cdis --hex --output-asm-variant=1 <<< 4829c34829c4 .text sub rbx, rax sub rsp, rax ``` Pull Request: #119992
1 parent e2a94a9 commit c6ff809

File tree

5 files changed

+98
-11
lines changed

5 files changed

+98
-11
lines changed

llvm/docs/CommandGuide/llvm-mc.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ End-user Options
9292

9393
Generate DWARF debugging info for assembly source files.
9494

95+
.. option:: --hex
96+
97+
Take raw hexadecimal bytes as input for disassembly. Whitespace is ignored.
98+
9599
.. option:: --large-code-model
96100

97101
Create CFI directives that assume the code might be more than 2 GB.
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# RUN: rm -rf %t && split-file %s %t && cd %t
2+
# RUN: llvm-mc -triple=x86_64 --disassemble --hex a.s | FileCheck %s
3+
# RUN: llvm-mc -triple=x86_64 --disassemble --hex decode1.s 2>&1 | FileCheck %s --check-prefix=DECODE1 --implicit-check-not=warning:
4+
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex decode2.s 2>&1 | FileCheck %s --check-prefix=DECODE2 --implicit-check-not=warning:
5+
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err1.s 2>&1 | FileCheck %s --check-prefix=ERR1 --implicit-check-not=error:
6+
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err2.s 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error:
7+
8+
#--- a.s
9+
4883ec08 31 # comment
10+
# comment
11+
ed4829 c390
12+
[c3c3][4829c3]
13+
[90]
14+
15+
# CHECK: subq $8, %rsp
16+
# CHECK-NEXT: xorl %ebp, %ebp
17+
# CHECK-NEXT: subq %rax, %rbx
18+
# CHECK-NEXT: nop
19+
# CHECK-NEXT: retq
20+
# CHECK-NEXT: retq
21+
# CHECK-NEXT: subq %rax, %rbx
22+
# CHECK-NEXT: nop
23+
# CHECK-EMPTY:
24+
25+
#--- decode1.s
26+
4889
27+
28+
# DECODE1: 1:1: warning: invalid instruction encoding
29+
30+
#--- decode2.s
31+
[4889][4889] [4889]4889c3
32+
[4889]
33+
34+
# DECODE2: 1:2: warning: invalid instruction encoding
35+
# DECODE2: 1:8: warning: invalid instruction encoding
36+
# DECODE2: 1:15: warning: invalid instruction encoding
37+
# DECODE2: 2:3: warning: invalid instruction encoding
38+
39+
#--- err1.s
40+
0x31ed
41+
0xcc
42+
g0
43+
44+
# ERR1: 1:1: error: invalid input token
45+
# ERR1: 2:1: error: invalid input token
46+
# ERR1: 3:1: error: invalid input token
47+
# ERR1: xorl %ebp, %ebp
48+
# ERR1-NEXT: int3
49+
# ERR1-EMPTY:
50+
51+
#--- err2.s
52+
g
53+
90c
54+
cc
55+
c
56+
57+
# ERR2: 1:1: error: expected two hex digits
58+
# ERR2: 2:3: error: expected two hex digits
59+
# ERR2: 4:1: error: expected two hex digits
60+
# ERR2: nop
61+
# ERR2-NEXT: int3
62+
# ERR2-EMPTY:

llvm/tools/llvm-mc/Disassembler.cpp

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "Disassembler.h"
15+
#include "llvm/ADT/StringExtras.h"
1516
#include "llvm/MC/MCAsmInfo.h"
1617
#include "llvm/MC/MCContext.h"
1718
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -94,10 +95,8 @@ static bool SkipToToken(StringRef &Str) {
9495
}
9596
}
9697

97-
98-
static bool ByteArrayFromString(ByteArrayTy &ByteArray,
99-
StringRef &Str,
100-
SourceMgr &SM) {
98+
static bool byteArrayFromString(ByteArrayTy &ByteArray, StringRef &Str,
99+
SourceMgr &SM, bool HexBytes) {
101100
while (SkipToToken(Str)) {
102101
// Handled by higher level
103102
if (Str[0] == '[' || Str[0] == ']')
@@ -109,7 +108,24 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
109108

110109
// Convert to a byte and add to the byte vector.
111110
unsigned ByteVal;
112-
if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
111+
if (HexBytes) {
112+
if (Next < 2) {
113+
SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
114+
SourceMgr::DK_Error, "expected two hex digits");
115+
Str = Str.substr(Next);
116+
return true;
117+
}
118+
Next = 2;
119+
unsigned C0 = hexDigitValue(Value[0]);
120+
unsigned C1 = hexDigitValue(Value[1]);
121+
if (C0 == -1u || C1 == -1u) {
122+
SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
123+
SourceMgr::DK_Error, "invalid input token");
124+
Str = Str.substr(Next);
125+
return true;
126+
}
127+
ByteVal = C0 * 16 + C1;
128+
} else if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
113129
// If we have an error, print it and skip to the end of line.
114130
SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
115131
"invalid input token");
@@ -130,9 +146,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
130146
int Disassembler::disassemble(const Target &T, const std::string &Triple,
131147
MCSubtargetInfo &STI, MCStreamer &Streamer,
132148
MemoryBuffer &Buffer, SourceMgr &SM,
133-
MCContext &Ctx,
134-
const MCTargetOptions &MCOptions) {
135-
149+
MCContext &Ctx, const MCTargetOptions &MCOptions,
150+
bool HexBytes) {
136151
std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
137152
if (!MRI) {
138153
errs() << "error: no register info for target " << Triple << "\n";
@@ -188,7 +203,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
188203
}
189204

190205
// It's a real token, get the bytes and emit them
191-
ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
206+
ErrorOccurred |= byteArrayFromString(ByteArray, Str, SM, HexBytes);
192207

193208
if (!ByteArray.first.empty())
194209
ErrorOccurred |=

llvm/tools/llvm-mc/Disassembler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class Disassembler {
3232
static int disassemble(const Target &T, const std::string &Triple,
3333
MCSubtargetInfo &STI, MCStreamer &Streamer,
3434
MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx,
35-
const MCTargetOptions &MCOptions);
35+
const MCTargetOptions &MCOptions, bool HexBytes);
3636
};
3737

3838
} // namespace llvm

llvm/tools/llvm-mc/llvm-mc.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ static cl::opt<bool>
9494
cl::desc("Prefer hex format for immediate values"),
9595
cl::cat(MCCategory));
9696

97+
static cl::opt<bool>
98+
HexBytes("hex",
99+
cl::desc("Take raw hexadecimal bytes as input for disassembly. "
100+
"Whitespace is ignored"),
101+
cl::cat(MCCategory));
102+
97103
static cl::list<std::string>
98104
DefineSymbol("defsym",
99105
cl::desc("Defines a symbol to be an integer constant"),
@@ -592,7 +598,7 @@ int main(int argc, char **argv) {
592598
}
593599
if (disassemble)
594600
Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer,
595-
SrcMgr, Ctx, MCOptions);
601+
SrcMgr, Ctx, MCOptions, HexBytes);
596602

597603
// Keep output if no errors.
598604
if (Res == 0) {

0 commit comments

Comments
 (0)