Skip to content

Commit 8d5854e

Browse files
committed
[BOLT] Add option to verify instruction encoder/decoder
Summary: Add option `-check-encoding` to verify if the input to LLVM disassembler matches the output of the assembler. When set, the verification runs on every instruction in processed functions. I'm not enabling the option by default as it could be quite noisy on x86 where instruction encoding is ambiguous and can include redundant prefixes. (cherry picked from FBD16595415)
1 parent 79ff4ec commit 8d5854e

File tree

3 files changed

+48
-8
lines changed

3 files changed

+48
-8
lines changed

bolt/src/BinaryContext.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -685,11 +685,9 @@ bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
685685
<< Twine::utohexstr(BF.getAddress() + BF.getSize())
686686
<< " starting at offset "
687687
<< (Offset - BF.getSize()) << " in function "
688-
<< BF << '\n';
689-
for (auto I = BF.getSize(); I < BF.getMaxSize(); ++I) {
690-
errs() << format("%.2x ", (*FunctionData)[I]);
691-
}
692-
errs() << '\n';
688+
<< BF << '\n'
689+
<< FunctionData->slice(BF.getSize(), BF.getMaxSize() - BF.getSize())
690+
<< '\n';
693691
}
694692

695693
return false;

bolt/src/BinaryContext.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "JumpTable.h"
2121
#include "MCPlusBuilder.h"
2222
#include "llvm/ADT/iterator.h"
23+
#include "llvm/ADT/ArrayRef.h"
2324
#include "llvm/ADT/Triple.h"
2425
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
2526
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -46,6 +47,7 @@
4647
#include <shared_mutex>
4748
#include <string>
4849
#include <system_error>
50+
#include <type_traits>
4951
#include <unordered_map>
5052
#include <vector>
5153

@@ -1039,6 +1041,18 @@ class BinaryContext {
10391041
}
10401042
};
10411043

1044+
template <typename T,
1045+
typename = std::enable_if_t<sizeof(T) == 1> >
1046+
inline raw_ostream &operator<<(raw_ostream &OS,
1047+
const ArrayRef<T> &ByteArray) {
1048+
const char *Sep = "";
1049+
for (const auto Byte : ByteArray) {
1050+
OS << Sep << format("%.2x", Byte);
1051+
Sep = " ";
1052+
}
1053+
return OS;
1054+
}
1055+
10421056
} // namespace bolt
10431057
} // namespace llvm
10441058

bolt/src/BinaryFunction.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,15 @@ AlignMacroOpFusion("align-macro-fusion",
8181
cl::cat(BoltRelocCategory));
8282

8383
cl::opt<bool>
84-
PreserveBlocksAlignment("preserve-blocks-alignment",
85-
cl::desc("try to preserve basic block alignment"),
84+
CheckEncoding("check-encoding",
85+
cl::desc("perform verification of LLVM instruction encoding/decoding. "
86+
"Every instruction in the input is decoded and re-encoded. "
87+
"If the resulting bytes do not match the input, a warning message "
88+
"is printed."),
8689
cl::init(false),
8790
cl::ZeroOrMore,
88-
cl::cat(BoltOptCategory));
91+
cl::Hidden,
92+
cl::cat(BoltCategory));
8993

9094
static cl::opt<bool>
9195
DotToolTipCode("dot-tooltip-code",
@@ -114,6 +118,13 @@ JumpTables("jump-tables",
114118
cl::ZeroOrMore,
115119
cl::cat(BoltOptCategory));
116120

121+
cl::opt<bool>
122+
PreserveBlocksAlignment("preserve-blocks-alignment",
123+
cl::desc("try to preserve basic block alignment"),
124+
cl::init(false),
125+
cl::ZeroOrMore,
126+
cl::cat(BoltOptCategory));
127+
117128
cl::opt<bool>
118129
PrintDynoStats("dyno-stats",
119130
cl::desc("print execution info based on profile"),
@@ -1016,6 +1027,23 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
10161027
break;
10171028
}
10181029

1030+
// Check integrity of LLVM assembler/disassembler.
1031+
if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
1032+
!BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
1033+
SmallString<256> Code;
1034+
SmallVector<MCFixup, 4> Fixups;
1035+
raw_svector_ostream VecOS(Code);
1036+
BC.MCE->encodeInstruction(Instruction, VecOS, Fixups, *BC.STI);
1037+
auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
1038+
if (FunctionData.slice(Offset, Size) != EncodedData) {
1039+
errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
1040+
<< "function " << *this << ":\n";
1041+
BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
1042+
errs() << " input: " << FunctionData.slice(Offset, Size)
1043+
<< "\n output: " << EncodedData << "\n\n";
1044+
}
1045+
}
1046+
10191047
// Cannot process functions with AVX-512 instructions.
10201048
if (MIB->hasEVEXEncoding(Instruction)) {
10211049
if (opts::Verbosity >= 1) {

0 commit comments

Comments
 (0)