Skip to content

Commit 8ea59ec

Browse files
authored
[BOLT] Use rewriter interface for updating binary build ID (#94273)
Move functionality for patching build ID into a separate rewriter class and change the way we do the patching. Support build ID in different note sections in order to update the build ID in the Linux kernel binary which puts in into ".notes" section instead of ".note.gnu.build-id".
1 parent 4973ad4 commit 8ea59ec

File tree

9 files changed

+147
-104
lines changed

9 files changed

+147
-104
lines changed

bolt/include/bolt/Core/BinarySection.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ class BinarySection {
284284
return true;
285285
}
286286
}
287+
bool isNote() const { return isELF() && ELFType == ELF::SHT_NOTE; }
287288
bool isReordered() const { return IsReordered; }
288289
bool isAnonymous() const { return IsAnonymous; }
289290
bool isRelro() const { return IsRelro; }

bolt/include/bolt/Rewrite/MetadataManager.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ class MetadataManager {
2828
/// Register a new \p Rewriter.
2929
void registerRewriter(std::unique_ptr<MetadataRewriter> Rewriter);
3030

31+
/// Run initializers after sections are discovered.
32+
void runSectionInitializers();
33+
3134
/// Execute initialization of rewriters while functions are disassembled, but
3235
/// CFG is not yet built.
3336
void runInitializersPreCFG();

bolt/include/bolt/Rewrite/MetadataRewriter.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ class MetadataRewriter {
4545
/// Return name for the rewriter.
4646
StringRef getName() const { return Name; }
4747

48+
/// Run initialization after the binary is read and sections are identified,
49+
/// but before functions are discovered.
50+
virtual Error sectionInitializer() { return Error::success(); }
51+
4852
/// Interface for modifying/annotating functions in the binary based on the
4953
/// contents of the section. Functions are in pre-cfg state.
5054
virtual Error preCFGInitializer() { return Error::success(); }

bolt/include/bolt/Rewrite/MetadataRewriters.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ class BinaryContext;
2121

2222
std::unique_ptr<MetadataRewriter> createLinuxKernelRewriter(BinaryContext &);
2323

24+
std::unique_ptr<MetadataRewriter> createBuildIDRewriter(BinaryContext &);
25+
2426
std::unique_ptr<MetadataRewriter> createPseudoProbeRewriter(BinaryContext &);
2527

2628
std::unique_ptr<MetadataRewriter> createSDTRewriter(BinaryContext &);

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,6 @@ class RewriteInstance {
7979
return InputFile->getFileName();
8080
}
8181

82-
/// Set the build-id string if we did not fail to parse the contents of the
83-
/// ELF note section containing build-id information.
84-
void parseBuildID();
85-
86-
/// The build-id is typically a stream of 20 bytes. Return these bytes in
87-
/// printable hexadecimal form if they are available, or std::nullopt
88-
/// otherwise.
89-
std::optional<std::string> getPrintableBuildID() const;
90-
9182
/// If this instance uses a profile, return appropriate profile reader.
9283
const ProfileReaderBase *getProfileReader() const {
9384
return ProfileReader.get();
@@ -184,6 +175,9 @@ class RewriteInstance {
184175
/// Link additional runtime code to support instrumentation.
185176
void linkRuntime();
186177

178+
/// Process metadata in sections before functions are discovered.
179+
void processSectionMetadata();
180+
187181
/// Process metadata in special sections before CFG is built for functions.
188182
void processMetadataPreCFG();
189183

@@ -368,11 +362,6 @@ class RewriteInstance {
368362
/// Loop over now emitted functions to write translation maps
369363
void encodeBATSection();
370364

371-
/// Update the ELF note section containing the binary build-id to reflect
372-
/// a new build-id, so tools can differentiate between the old and the
373-
/// rewritten binary.
374-
void patchBuildID();
375-
376365
/// Return file offset corresponding to a virtual \p Address.
377366
/// Return 0 if the address has no mapping in the file, including being
378367
/// part of .bss section.
@@ -562,18 +551,12 @@ class RewriteInstance {
562551
/// Exception handling and stack unwinding information in this binary.
563552
ErrorOr<BinarySection &> EHFrameSection{std::errc::bad_address};
564553

565-
/// .note.gnu.build-id section.
566-
ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};
567-
568554
/// Helper for accessing sections by name.
569555
BinarySection *getSection(const Twine &Name) {
570556
ErrorOr<BinarySection &> ErrOrSection = BC->getUniqueSectionByName(Name);
571557
return ErrOrSection ? &ErrOrSection.get() : nullptr;
572558
}
573559

574-
/// A reference to the build-id bytes in the original binary
575-
StringRef BuildID;
576-
577560
/// Keep track of functions we fail to write in the binary. We need to avoid
578561
/// rewriting CFI info for these functions.
579562
std::vector<uint64_t> FailedAddresses;

bolt/lib/Rewrite/BuildIDRewriter.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
//===- bolt/Rewrite/BuildIDRewriter.cpp -----------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Read and update build ID stored in ELF note section.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "bolt/Rewrite/MetadataRewriter.h"
14+
#include "bolt/Rewrite/MetadataRewriters.h"
15+
#include "llvm/Support/Errc.h"
16+
17+
using namespace llvm;
18+
using namespace bolt;
19+
20+
namespace {
21+
22+
/// The build-id is typically a stream of 20 bytes. Return these bytes in
23+
/// printable hexadecimal form.
24+
std::string getPrintableBuildID(StringRef BuildID) {
25+
std::string Str;
26+
raw_string_ostream OS(Str);
27+
for (const char &Char : BuildID)
28+
OS << format("%.2x", static_cast<unsigned char>(Char));
29+
30+
return OS.str();
31+
}
32+
33+
class BuildIDRewriter final : public MetadataRewriter {
34+
35+
/// Information about binary build ID.
36+
ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};
37+
StringRef BuildID;
38+
std::optional<uint64_t> BuildIDOffset;
39+
std::optional<uint64_t> BuildIDSize;
40+
41+
public:
42+
BuildIDRewriter(StringRef Name, BinaryContext &BC)
43+
: MetadataRewriter(Name, BC) {}
44+
45+
Error sectionInitializer() override;
46+
47+
Error postEmitFinalizer() override;
48+
};
49+
50+
Error BuildIDRewriter::sectionInitializer() {
51+
// Typically, build ID will reside in .note.gnu.build-id section. Howerver,
52+
// a linker script can change the section name and such is the case with
53+
// the Linux kernel. Hence, we iterate over all note sections.
54+
for (BinarySection &NoteSection : BC.sections()) {
55+
if (!NoteSection.isNote())
56+
continue;
57+
58+
StringRef Buf = NoteSection.getContents();
59+
DataExtractor DE = DataExtractor(Buf, BC.AsmInfo->isLittleEndian(),
60+
BC.AsmInfo->getCodePointerSize());
61+
DataExtractor::Cursor Cursor(0);
62+
while (Cursor && !DE.eof(Cursor)) {
63+
const uint32_t NameSz = DE.getU32(Cursor);
64+
const uint32_t DescSz = DE.getU32(Cursor);
65+
const uint32_t Type = DE.getU32(Cursor);
66+
67+
StringRef Name =
68+
NameSz ? Buf.slice(Cursor.tell(), Cursor.tell() + NameSz) : "<empty>";
69+
Cursor.seek(alignTo(Cursor.tell() + NameSz, 4));
70+
71+
const uint64_t DescOffset = Cursor.tell();
72+
StringRef Desc =
73+
DescSz ? Buf.slice(DescOffset, DescOffset + DescSz) : "<empty>";
74+
Cursor.seek(alignTo(DescOffset + DescSz, 4));
75+
76+
if (!Cursor)
77+
return createStringError(errc::executable_format_error,
78+
"out of bounds while reading note section: %s",
79+
toString(Cursor.takeError()).c_str());
80+
81+
if (Type == ELF::NT_GNU_BUILD_ID && Name.substr(0, 3) == "GNU" &&
82+
DescSz) {
83+
BuildIDSection = NoteSection;
84+
BuildID = Desc;
85+
BC.setFileBuildID(getPrintableBuildID(Desc));
86+
BuildIDOffset = DescOffset;
87+
BuildIDSize = DescSz;
88+
89+
return Error::success();
90+
}
91+
}
92+
}
93+
94+
return Error::success();
95+
}
96+
97+
Error BuildIDRewriter::postEmitFinalizer() {
98+
if (!BuildIDSection || !BuildIDOffset)
99+
return Error::success();
100+
101+
const uint8_t LastByte = BuildID[BuildID.size() - 1];
102+
SmallVector<char, 1> Patch = {static_cast<char>(LastByte ^ 1)};
103+
BuildIDSection->addPatch(*BuildIDOffset + BuildID.size() - 1, Patch);
104+
BC.outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
105+
106+
return Error::success();
107+
}
108+
} // namespace
109+
110+
std::unique_ptr<MetadataRewriter>
111+
llvm::bolt::createBuildIDRewriter(BinaryContext &BC) {
112+
return std::make_unique<BuildIDRewriter>("build-id-rewriter", BC);
113+
}

bolt/lib/Rewrite/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_llvm_library(LLVMBOLTRewrite
2121
LinuxKernelRewriter.cpp
2222
MachORewriteInstance.cpp
2323
MetadataManager.cpp
24+
BuildIDRewriter.cpp
2425
PseudoProbeRewriter.cpp
2526
RewriteInstance.cpp
2627
SDTRewriter.cpp

bolt/lib/Rewrite/MetadataManager.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,18 @@ void MetadataManager::registerRewriter(
2020
Rewriters.emplace_back(std::move(Rewriter));
2121
}
2222

23+
void MetadataManager::runSectionInitializers() {
24+
for (auto &Rewriter : Rewriters) {
25+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
26+
<< " after reading sections\n");
27+
if (Error E = Rewriter->sectionInitializer()) {
28+
errs() << "BOLT-ERROR: while running " << Rewriter->getName()
29+
<< " after reading sections: " << toString(std::move(E)) << '\n';
30+
exit(1);
31+
}
32+
}
33+
}
34+
2335
void MetadataManager::runInitializersPreCFG() {
2436
for (auto &Rewriter : Rewriters) {
2537
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 8 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -643,82 +643,6 @@ Error RewriteInstance::discoverStorage() {
643643
return Error::success();
644644
}
645645

646-
void RewriteInstance::parseBuildID() {
647-
if (!BuildIDSection)
648-
return;
649-
650-
StringRef Buf = BuildIDSection->getContents();
651-
652-
// Reading notes section (see Portable Formats Specification, Version 1.1,
653-
// pg 2-5, section "Note Section").
654-
DataExtractor DE =
655-
DataExtractor(Buf,
656-
/*IsLittleEndian=*/true, InputFile->getBytesInAddress());
657-
uint64_t Offset = 0;
658-
if (!DE.isValidOffset(Offset))
659-
return;
660-
uint32_t NameSz = DE.getU32(&Offset);
661-
if (!DE.isValidOffset(Offset))
662-
return;
663-
uint32_t DescSz = DE.getU32(&Offset);
664-
if (!DE.isValidOffset(Offset))
665-
return;
666-
uint32_t Type = DE.getU32(&Offset);
667-
668-
LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
669-
<< "; Type = " << Type << "\n");
670-
671-
// Type 3 is a GNU build-id note section
672-
if (Type != 3)
673-
return;
674-
675-
StringRef Name = Buf.slice(Offset, Offset + NameSz);
676-
Offset = alignTo(Offset + NameSz, 4);
677-
if (Name.substr(0, 3) != "GNU")
678-
return;
679-
680-
BuildID = Buf.slice(Offset, Offset + DescSz);
681-
}
682-
683-
std::optional<std::string> RewriteInstance::getPrintableBuildID() const {
684-
if (BuildID.empty())
685-
return std::nullopt;
686-
687-
std::string Str;
688-
raw_string_ostream OS(Str);
689-
const unsigned char *CharIter = BuildID.bytes_begin();
690-
while (CharIter != BuildID.bytes_end()) {
691-
if (*CharIter < 0x10)
692-
OS << "0";
693-
OS << Twine::utohexstr(*CharIter);
694-
++CharIter;
695-
}
696-
return OS.str();
697-
}
698-
699-
void RewriteInstance::patchBuildID() {
700-
raw_fd_ostream &OS = Out->os();
701-
702-
if (BuildID.empty())
703-
return;
704-
705-
size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
706-
assert(IDOffset != StringRef::npos && "failed to patch build-id");
707-
708-
uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
709-
if (!FileOffset) {
710-
BC->errs()
711-
<< "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
712-
return;
713-
}
714-
715-
char LastIDByte = BuildID[BuildID.size() - 1];
716-
LastIDByte ^= 1;
717-
OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);
718-
719-
BC->outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
720-
}
721-
722646
Error RewriteInstance::run() {
723647
assert(BC && "failed to create a binary context");
724648

@@ -1977,7 +1901,6 @@ Error RewriteInstance::readSpecialSections() {
19771901
".rela" + std::string(BC->getMainCodeSectionName()));
19781902
HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab");
19791903
EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
1980-
BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");
19811904

19821905
if (ErrorOr<BinarySection &> BATSec =
19831906
BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
@@ -2035,10 +1958,7 @@ Error RewriteInstance::readSpecialSections() {
20351958
report_error("expected valid eh_frame section", EHFrameOrError.takeError());
20361959
CFIRdWrt.reset(new CFIReaderWriter(*BC, *EHFrameOrError.get()));
20371960

2038-
// Parse build-id
2039-
parseBuildID();
2040-
if (std::optional<std::string> FileBuildID = getPrintableBuildID())
2041-
BC->setFileBuildID(*FileBuildID);
1961+
processSectionMetadata();
20421962

20431963
// Read .dynamic/PT_DYNAMIC.
20441964
return readELFDynamic();
@@ -3218,14 +3138,20 @@ void RewriteInstance::initializeMetadataManager() {
32183138
if (BC->IsLinuxKernel)
32193139
MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC));
32203140

3141+
MetadataManager.registerRewriter(createBuildIDRewriter(*BC));
3142+
32213143
MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));
32223144

32233145
MetadataManager.registerRewriter(createSDTRewriter(*BC));
32243146
}
32253147

3226-
void RewriteInstance::processMetadataPreCFG() {
3148+
void RewriteInstance::processSectionMetadata() {
32273149
initializeMetadataManager();
32283150

3151+
MetadataManager.runSectionInitializers();
3152+
}
3153+
3154+
void RewriteInstance::processMetadataPreCFG() {
32293155
MetadataManager.runInitializersPreCFG();
32303156

32313157
processProfileDataPreCFG();
@@ -5772,8 +5698,6 @@ void RewriteInstance::rewriteFile() {
57725698
// Update symbol tables.
57735699
patchELFSymTabs();
57745700

5775-
patchBuildID();
5776-
57775701
if (opts::EnableBAT)
57785702
encodeBATSection();
57795703

0 commit comments

Comments
 (0)