Skip to content

[BOLT] Use rewriter interface for updating binary build ID #94273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bolt/include/bolt/Core/BinarySection.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ class BinarySection {
return true;
}
}
bool isNote() const { return isELF() && ELFType == ELF::SHT_NOTE; }
bool isReordered() const { return IsReordered; }
bool isAnonymous() const { return IsAnonymous; }
bool isRelro() const { return IsRelro; }
Expand Down
3 changes: 3 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class MetadataManager {
/// Register a new \p Rewriter.
void registerRewriter(std::unique_ptr<MetadataRewriter> Rewriter);

/// Run initializers after sections are discovered.
void runSectionInitializers();

/// Execute initialization of rewriters while functions are disassembled, but
/// CFG is not yet built.
void runInitializersPreCFG();
Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ class MetadataRewriter {
/// Return name for the rewriter.
StringRef getName() const { return Name; }

/// Run initialization after the binary is read and sections are identified,
/// but before functions are discovered.
virtual Error sectionInitializer() { return Error::success(); }

/// Interface for modifying/annotating functions in the binary based on the
/// contents of the section. Functions are in pre-cfg state.
virtual Error preCFGInitializer() { return Error::success(); }
Expand Down
2 changes: 2 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataRewriters.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class BinaryContext;

std::unique_ptr<MetadataRewriter> createLinuxKernelRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createBuildIDRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createPseudoProbeRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createSDTRewriter(BinaryContext &);
Expand Down
23 changes: 3 additions & 20 deletions bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,6 @@ class RewriteInstance {
return InputFile->getFileName();
}

/// Set the build-id string if we did not fail to parse the contents of the
/// ELF note section containing build-id information.
void parseBuildID();

/// The build-id is typically a stream of 20 bytes. Return these bytes in
/// printable hexadecimal form if they are available, or std::nullopt
/// otherwise.
std::optional<std::string> getPrintableBuildID() const;

/// If this instance uses a profile, return appropriate profile reader.
const ProfileReaderBase *getProfileReader() const {
return ProfileReader.get();
Expand Down Expand Up @@ -184,6 +175,9 @@ class RewriteInstance {
/// Link additional runtime code to support instrumentation.
void linkRuntime();

/// Process metadata in sections before functions are discovered.
void processSectionMetadata();

/// Process metadata in special sections before CFG is built for functions.
void processMetadataPreCFG();

Expand Down Expand Up @@ -368,11 +362,6 @@ class RewriteInstance {
/// Loop over now emitted functions to write translation maps
void encodeBATSection();

/// Update the ELF note section containing the binary build-id to reflect
/// a new build-id, so tools can differentiate between the old and the
/// rewritten binary.
void patchBuildID();

/// Return file offset corresponding to a virtual \p Address.
/// Return 0 if the address has no mapping in the file, including being
/// part of .bss section.
Expand Down Expand Up @@ -562,18 +551,12 @@ class RewriteInstance {
/// Exception handling and stack unwinding information in this binary.
ErrorOr<BinarySection &> EHFrameSection{std::errc::bad_address};

/// .note.gnu.build-id section.
ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};

/// Helper for accessing sections by name.
BinarySection *getSection(const Twine &Name) {
ErrorOr<BinarySection &> ErrOrSection = BC->getUniqueSectionByName(Name);
return ErrOrSection ? &ErrOrSection.get() : nullptr;
}

/// A reference to the build-id bytes in the original binary
StringRef BuildID;

/// Keep track of functions we fail to write in the binary. We need to avoid
/// rewriting CFI info for these functions.
std::vector<uint64_t> FailedAddresses;
Expand Down
113 changes: 113 additions & 0 deletions bolt/lib/Rewrite/BuildIDRewriter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
//===- bolt/Rewrite/BuildIDRewriter.cpp -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Read and update build ID stored in ELF note section.
//
//===----------------------------------------------------------------------===//

#include "bolt/Rewrite/MetadataRewriter.h"
#include "bolt/Rewrite/MetadataRewriters.h"
#include "llvm/Support/Errc.h"

using namespace llvm;
using namespace bolt;

namespace {

/// The build-id is typically a stream of 20 bytes. Return these bytes in
/// printable hexadecimal form.
std::string getPrintableBuildID(StringRef BuildID) {
std::string Str;
raw_string_ostream OS(Str);
for (const char &Char : BuildID)
OS << format("%.2x", static_cast<unsigned char>(Char));

return OS.str();
}

class BuildIDRewriter final : public MetadataRewriter {

/// Information about binary build ID.
ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};
StringRef BuildID;
std::optional<uint64_t> BuildIDOffset;
std::optional<uint64_t> BuildIDSize;

public:
BuildIDRewriter(StringRef Name, BinaryContext &BC)
: MetadataRewriter(Name, BC) {}

Error sectionInitializer() override;

Error postEmitFinalizer() override;
};

Error BuildIDRewriter::sectionInitializer() {
// Typically, build ID will reside in .note.gnu.build-id section. Howerver,
// a linker script can change the section name and such is the case with
// the Linux kernel. Hence, we iterate over all note sections.
for (BinarySection &NoteSection : BC.sections()) {
if (!NoteSection.isNote())
continue;

StringRef Buf = NoteSection.getContents();
DataExtractor DE = DataExtractor(Buf, BC.AsmInfo->isLittleEndian(),
BC.AsmInfo->getCodePointerSize());
DataExtractor::Cursor Cursor(0);
while (Cursor && !DE.eof(Cursor)) {
const uint32_t NameSz = DE.getU32(Cursor);
const uint32_t DescSz = DE.getU32(Cursor);
const uint32_t Type = DE.getU32(Cursor);

StringRef Name =
NameSz ? Buf.slice(Cursor.tell(), Cursor.tell() + NameSz) : "<empty>";
Cursor.seek(alignTo(Cursor.tell() + NameSz, 4));

const uint64_t DescOffset = Cursor.tell();
StringRef Desc =
DescSz ? Buf.slice(DescOffset, DescOffset + DescSz) : "<empty>";
Cursor.seek(alignTo(DescOffset + DescSz, 4));

if (!Cursor)
return createStringError(errc::executable_format_error,
"out of bounds while reading note section: %s",
toString(Cursor.takeError()).c_str());

if (Type == ELF::NT_GNU_BUILD_ID && Name.substr(0, 3) == "GNU" &&
DescSz) {
BuildIDSection = NoteSection;
BuildID = Desc;
BC.setFileBuildID(getPrintableBuildID(Desc));
BuildIDOffset = DescOffset;
BuildIDSize = DescSz;

return Error::success();
}
}
}

return Error::success();
}

Error BuildIDRewriter::postEmitFinalizer() {
if (!BuildIDSection || !BuildIDOffset)
return Error::success();

const uint8_t LastByte = BuildID[BuildID.size() - 1];
SmallVector<char, 1> Patch = {static_cast<char>(LastByte ^ 1)};
BuildIDSection->addPatch(*BuildIDOffset + BuildID.size() - 1, Patch);
BC.outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";

return Error::success();
}
} // namespace

std::unique_ptr<MetadataRewriter>
llvm::bolt::createBuildIDRewriter(BinaryContext &BC) {
return std::make_unique<BuildIDRewriter>("build-id-rewriter", BC);
}
1 change: 1 addition & 0 deletions bolt/lib/Rewrite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_llvm_library(LLVMBOLTRewrite
LinuxKernelRewriter.cpp
MachORewriteInstance.cpp
MetadataManager.cpp
BuildIDRewriter.cpp
PseudoProbeRewriter.cpp
RewriteInstance.cpp
SDTRewriter.cpp
Expand Down
12 changes: 12 additions & 0 deletions bolt/lib/Rewrite/MetadataManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ void MetadataManager::registerRewriter(
Rewriters.emplace_back(std::move(Rewriter));
}

void MetadataManager::runSectionInitializers() {
for (auto &Rewriter : Rewriters) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
<< " after reading sections\n");
if (Error E = Rewriter->sectionInitializer()) {
errs() << "BOLT-ERROR: while running " << Rewriter->getName()
<< " after reading sections: " << toString(std::move(E)) << '\n';
exit(1);
}
}
}

void MetadataManager::runInitializersPreCFG() {
for (auto &Rewriter : Rewriters) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
Expand Down
92 changes: 8 additions & 84 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,82 +643,6 @@ Error RewriteInstance::discoverStorage() {
return Error::success();
}

void RewriteInstance::parseBuildID() {
if (!BuildIDSection)
return;

StringRef Buf = BuildIDSection->getContents();

// Reading notes section (see Portable Formats Specification, Version 1.1,
// pg 2-5, section "Note Section").
DataExtractor DE =
DataExtractor(Buf,
/*IsLittleEndian=*/true, InputFile->getBytesInAddress());
uint64_t Offset = 0;
if (!DE.isValidOffset(Offset))
return;
uint32_t NameSz = DE.getU32(&Offset);
if (!DE.isValidOffset(Offset))
return;
uint32_t DescSz = DE.getU32(&Offset);
if (!DE.isValidOffset(Offset))
return;
uint32_t Type = DE.getU32(&Offset);

LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
<< "; Type = " << Type << "\n");

// Type 3 is a GNU build-id note section
if (Type != 3)
return;

StringRef Name = Buf.slice(Offset, Offset + NameSz);
Offset = alignTo(Offset + NameSz, 4);
if (Name.substr(0, 3) != "GNU")
return;

BuildID = Buf.slice(Offset, Offset + DescSz);
}

std::optional<std::string> RewriteInstance::getPrintableBuildID() const {
if (BuildID.empty())
return std::nullopt;

std::string Str;
raw_string_ostream OS(Str);
const unsigned char *CharIter = BuildID.bytes_begin();
while (CharIter != BuildID.bytes_end()) {
if (*CharIter < 0x10)
OS << "0";
OS << Twine::utohexstr(*CharIter);
++CharIter;
}
return OS.str();
}

void RewriteInstance::patchBuildID() {
raw_fd_ostream &OS = Out->os();

if (BuildID.empty())
return;

size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
assert(IDOffset != StringRef::npos && "failed to patch build-id");

uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
if (!FileOffset) {
BC->errs()
<< "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
return;
}

char LastIDByte = BuildID[BuildID.size() - 1];
LastIDByte ^= 1;
OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);

BC->outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
}

Error RewriteInstance::run() {
assert(BC && "failed to create a binary context");

Expand Down Expand Up @@ -1977,7 +1901,6 @@ Error RewriteInstance::readSpecialSections() {
".rela" + std::string(BC->getMainCodeSectionName()));
HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab");
EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");

if (ErrorOr<BinarySection &> BATSec =
BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
Expand Down Expand Up @@ -2035,10 +1958,7 @@ Error RewriteInstance::readSpecialSections() {
report_error("expected valid eh_frame section", EHFrameOrError.takeError());
CFIRdWrt.reset(new CFIReaderWriter(*BC, *EHFrameOrError.get()));

// Parse build-id
parseBuildID();
if (std::optional<std::string> FileBuildID = getPrintableBuildID())
BC->setFileBuildID(*FileBuildID);
processSectionMetadata();

// Read .dynamic/PT_DYNAMIC.
return readELFDynamic();
Expand Down Expand Up @@ -3218,14 +3138,20 @@ void RewriteInstance::initializeMetadataManager() {
if (BC->IsLinuxKernel)
MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC));

MetadataManager.registerRewriter(createBuildIDRewriter(*BC));

MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));

MetadataManager.registerRewriter(createSDTRewriter(*BC));
}

void RewriteInstance::processMetadataPreCFG() {
void RewriteInstance::processSectionMetadata() {
initializeMetadataManager();

MetadataManager.runSectionInitializers();
}

void RewriteInstance::processMetadataPreCFG() {
MetadataManager.runInitializersPreCFG();

processProfileDataPreCFG();
Expand Down Expand Up @@ -5772,8 +5698,6 @@ void RewriteInstance::rewriteFile() {
// Update symbol tables.
patchELFSymTabs();

patchBuildID();

if (opts::EnableBAT)
encodeBATSection();

Expand Down
Loading