Skip to content

[llvm-mc] Add --hex to disassemble hex bytes #119992

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/docs/CommandGuide/llvm-mc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ End-user Options

Generate DWARF debugging info for assembly source files.

.. option:: --hex

Take raw hexadecimal bytes as input for disassembly. Whitespace is ignored.

.. option:: --large-code-model

Create CFI directives that assume the code might be more than 2 GB.
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/MC/Disassembler/X86/hex-bytes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# RUN: rm -rf %t && split-file %s %t && cd %t
# RUN: llvm-mc -triple=x86_64 --disassemble --hex a.s | FileCheck %s
# RUN: llvm-mc -triple=x86_64 --disassemble --hex decode1.s 2>&1 | FileCheck %s --check-prefix=DECODE1 --implicit-check-not=warning:
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex decode2.s 2>&1 | FileCheck %s --check-prefix=DECODE2 --implicit-check-not=warning:
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err1.s 2>&1 | FileCheck %s --check-prefix=ERR1 --implicit-check-not=error:
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err2.s 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error:

#--- a.s
4883ec08 31 # comment
# comment
ed4829 c390
[c3c3][4829c3]
[90]

# CHECK: subq $8, %rsp
# CHECK-NEXT: xorl %ebp, %ebp
# CHECK-NEXT: subq %rax, %rbx
# CHECK-NEXT: nop
# CHECK-NEXT: retq
# CHECK-NEXT: retq
# CHECK-NEXT: subq %rax, %rbx
# CHECK-NEXT: nop
# CHECK-EMPTY:

#--- decode1.s
4889

# DECODE1: 1:1: warning: invalid instruction encoding

#--- decode2.s
[4889][4889] [4889]4889c3
[4889]

# DECODE2: 1:2: warning: invalid instruction encoding
# DECODE2: 1:8: warning: invalid instruction encoding
# DECODE2: 1:15: warning: invalid instruction encoding
# DECODE2: 2:3: warning: invalid instruction encoding

#--- err1.s
0x31ed
0xcc
g0

# ERR1: 1:1: error: invalid input token
# ERR1: 2:1: error: invalid input token
# ERR1: 3:1: error: invalid input token
# ERR1: xorl %ebp, %ebp
# ERR1-NEXT: int3
# ERR1-EMPTY:

#--- err2.s
g
90c
cc
c

# ERR2: 1:1: error: expected two hex digits
# ERR2: 2:3: error: expected two hex digits
# ERR2: 4:1: error: expected two hex digits
# ERR2: nop
# ERR2-NEXT: int3
# ERR2-EMPTY:
33 changes: 24 additions & 9 deletions llvm/tools/llvm-mc/Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "Disassembler.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
Expand Down Expand Up @@ -94,10 +95,8 @@ static bool SkipToToken(StringRef &Str) {
}
}


static bool ByteArrayFromString(ByteArrayTy &ByteArray,
StringRef &Str,
SourceMgr &SM) {
static bool byteArrayFromString(ByteArrayTy &ByteArray, StringRef &Str,
SourceMgr &SM, bool HexBytes) {
while (SkipToToken(Str)) {
// Handled by higher level
if (Str[0] == '[' || Str[0] == ']')
Expand All @@ -109,7 +108,24 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,

// Convert to a byte and add to the byte vector.
unsigned ByteVal;
if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
if (HexBytes) {
if (Next < 2) {
SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
SourceMgr::DK_Error, "expected two hex digits");
Str = Str.substr(Next);
return true;
}
Next = 2;
unsigned C0 = hexDigitValue(Value[0]);
unsigned C1 = hexDigitValue(Value[1]);
if (C0 == -1u || C1 == -1u) {
SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
SourceMgr::DK_Error, "invalid input token");
Str = Str.substr(Next);
return true;
}
ByteVal = C0 * 16 + C1;
} else if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
// If we have an error, print it and skip to the end of line.
SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
"invalid input token");
Expand All @@ -130,9 +146,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
int Disassembler::disassemble(const Target &T, const std::string &Triple,
MCSubtargetInfo &STI, MCStreamer &Streamer,
MemoryBuffer &Buffer, SourceMgr &SM,
MCContext &Ctx,
const MCTargetOptions &MCOptions) {

MCContext &Ctx, const MCTargetOptions &MCOptions,
bool HexBytes) {
std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
if (!MRI) {
errs() << "error: no register info for target " << Triple << "\n";
Expand Down Expand Up @@ -188,7 +203,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
}

// It's a real token, get the bytes and emit them
ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
ErrorOccurred |= byteArrayFromString(ByteArray, Str, SM, HexBytes);

if (!ByteArray.first.empty())
ErrorOccurred |=
Expand Down
2 changes: 1 addition & 1 deletion llvm/tools/llvm-mc/Disassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class Disassembler {
static int disassemble(const Target &T, const std::string &Triple,
MCSubtargetInfo &STI, MCStreamer &Streamer,
MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx,
const MCTargetOptions &MCOptions);
const MCTargetOptions &MCOptions, bool HexBytes);
};

} // namespace llvm
Expand Down
8 changes: 7 additions & 1 deletion llvm/tools/llvm-mc/llvm-mc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ static cl::opt<bool>
cl::desc("Prefer hex format for immediate values"),
cl::cat(MCCategory));

static cl::opt<bool>
HexBytes("hex",
cl::desc("Take raw hexadecimal bytes as input for disassembly. "
"Whitespace is ignored"),
cl::cat(MCCategory));

static cl::list<std::string>
DefineSymbol("defsym",
cl::desc("Defines a symbol to be an integer constant"),
Expand Down Expand Up @@ -592,7 +598,7 @@ int main(int argc, char **argv) {
}
if (disassemble)
Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer,
SrcMgr, Ctx, MCOptions);
SrcMgr, Ctx, MCOptions, HexBytes);

// Keep output if no errors.
if (Res == 0) {
Expand Down
Loading