Skip to content

Commit 41e3146

Browse files
committed
[WebAssembly] Implement build-id feature
Implement the --build-id flag similarly to ELF, and generate a build_id section according to the WebAssembly tool convention specified in WebAssembly/tool-conventions#183 The default style ("fast" aka "tree") hashes the contents of the output and (unlike ELF) generates a v5 UUID based on the hash (using a random namespace). It also supports generating a random v4 UUID, a sha1 hash, and a user-specified string (as ELF does). Differential Revision: https://reviews.llvm.org/D107662
1 parent 1e1f119 commit 41e3146

File tree

7 files changed

+256
-0
lines changed

7 files changed

+256
-0
lines changed

lld/test/wasm/build-id.test

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# RUN: llvm-mc -filetype=obj -triple=wasm32 %p/Inputs/start.s -o %t
2+
3+
# RUN: wasm-ld --build-id %t -o %t2
4+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
5+
# RUN: wasm-ld --build-id=fast %t -o %t2
6+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
7+
# RUN: wasm-ld --build-id %t -o %t2 --threads=1
8+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
9+
10+
# RUN: wasm-ld --build-id=sha1 %t -o %t2
11+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
12+
# RUN: wasm-ld --build-id=sha1 %t -o %t2 --threads=1
13+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
14+
15+
# RUN: wasm-ld --build-id=tree %t -o %t2
16+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
17+
# RUN: wasm-ld --build-id=tree %t -o %t2 --threads=1
18+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s
19+
20+
# RUN: wasm-ld --build-id=uuid %t -o %t2
21+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=UUID %s
22+
23+
# RUN: wasm-ld --build-id=0x12345678 %t -o %t2
24+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=HEX %s
25+
26+
# RUN: wasm-ld %t -o %t2
27+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
28+
29+
# RUN: wasm-ld --build-id=sha1 --build-id=none %t -o %t2
30+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
31+
# RUN: wasm-ld --build-id --build-id=none %t -o %t2
32+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=NONE %s
33+
# RUN: wasm-ld --build-id=none --build-id %t -o %t2
34+
# RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s
35+
36+
.section .data.foo,"",@
37+
.globl foo
38+
.hidden foo
39+
.p2align 2
40+
foo:
41+
.int32 1
42+
.size foo, 4
43+
44+
45+
# DEFAULT: Contents of section build_id:
46+
# DEFAULT-NEXT: 0000 10cdbf99 f76b1f5e ebb2f36a 1bde1d6c .....k.^...j...l
47+
# DEFAULT-NEXT: 0010 01
48+
49+
# SHA1: Contents of section build_id:
50+
# SHA1-NEXT: 0000 14ad22e8 54d72438 94af85de 3c5592bd ..".T.$8....<U..
51+
# SHA1-NEXT: 0010 1b5ec96f 6b .^.ok
52+
53+
# UUID: Contents of section build_id:
54+
# UUID-NEXT: 0000 10
55+
56+
# HEX: Contents of section build_id:
57+
# HEX-NEXT: 0000 04123456 78 ..4Vx
58+
59+
60+
# NONE-NOT: Contents of section build_id:

lld/wasm/Config.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ namespace wasm {
2626
// For --unresolved-symbols.
2727
enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic };
2828

29+
// For --build-id.
30+
enum class BuildIdKind { None, Fast, Sha1, Hexstring, Uuid };
31+
2932
// This struct contains the global configuration for the linker.
3033
// Most fields are direct mapping from the command line options
3134
// and such fields have the same name as the corresponding options.
@@ -72,6 +75,7 @@ struct Configuration {
7275
llvm::StringRef thinLTOJobs;
7376
bool ltoDebugPassManager;
7477
UnresolvedPolicy unresolvedSymbols;
78+
BuildIdKind buildId = BuildIdKind::None;
7579

7680
llvm::StringRef entry;
7781
llvm::StringRef mapFile;
@@ -85,6 +89,7 @@ struct Configuration {
8589
llvm::CachePruningPolicy thinLTOCachePolicy;
8690
std::optional<std::vector<std::string>> features;
8791
std::optional<std::vector<std::string>> extraFeatures;
92+
llvm::SmallVector<uint8_t, 0> buildIdVector;
8893

8994
// The following config options do not directly correspond to any
9095
// particular command line options.

lld/wasm/Driver.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,33 @@ static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &args) {
385385
return errorOrWarn;
386386
}
387387

388+
// Parse --build-id or --build-id=<style>. We handle "tree" as a
389+
// synonym for "sha1" because all our hash functions including
390+
// -build-id=sha1 are actually tree hashes for performance reasons.
391+
static std::pair<BuildIdKind, SmallVector<uint8_t, 0>>
392+
getBuildId(opt::InputArgList &args) {
393+
auto *arg = args.getLastArg(OPT_build_id, OPT_build_id_eq);
394+
if (!arg)
395+
return {BuildIdKind::None, {}};
396+
397+
if (arg->getOption().getID() == OPT_build_id)
398+
return {BuildIdKind::Fast, {}};
399+
400+
StringRef s = arg->getValue();
401+
if (s == "fast")
402+
return {BuildIdKind::Fast, {}};
403+
if (s == "sha1" || s == "tree")
404+
return {BuildIdKind::Sha1, {}};
405+
if (s == "uuid")
406+
return {BuildIdKind::Uuid, {}};
407+
if (s.startswith("0x"))
408+
return {BuildIdKind::Hexstring, parseHex(s.substr(2))};
409+
410+
if (s != "none")
411+
error("unknown --build-id style: " + s);
412+
return {BuildIdKind::None, {}};
413+
}
414+
388415
// Initializes Config members by the command line options.
389416
static void readConfigs(opt::InputArgList &args) {
390417
config->bsymbolic = args.hasArg(OPT_Bsymbolic);
@@ -519,6 +546,8 @@ static void readConfigs(opt::InputArgList &args) {
519546

520547
if (args.hasArg(OPT_print_map))
521548
config->mapFile = "-";
549+
550+
std::tie(config->buildId, config->buildIdVector) = getBuildId(args);
522551
}
523552

524553
// Some Config members do not directly correspond to any particular

lld/wasm/Options.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries (default)">
4242

4343
def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;
4444

45+
def build_id: F<"build-id">, HelpText<"Alias for --build-id=fast">;
46+
47+
def build_id_eq: J<"build-id=">, HelpText<"Generate build ID note">,
48+
MetaVarName<"[fast,sha1,uuid,0x<hexstring>]">;
49+
4550
defm color_diagnostics: B<"color-diagnostics",
4651
"Alias for --color-diagnostics=always",
4752
"Alias for --color-diagnostics=never">;

lld/wasm/SyntheticSections.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -888,5 +888,39 @@ void RelocSection::writeBody() {
888888
sec->writeRelocations(bodyOutputStream);
889889
}
890890

891+
static size_t getHashSize() {
892+
switch (config->buildId) {
893+
case BuildIdKind::Fast:
894+
case BuildIdKind::Uuid:
895+
return 16;
896+
case BuildIdKind::Sha1:
897+
return 20;
898+
case BuildIdKind::Hexstring:
899+
return config->buildIdVector.size();
900+
case BuildIdKind::None:
901+
return 0;
902+
}
903+
}
904+
905+
BuildIdSection::BuildIdSection()
906+
: SyntheticSection(llvm::wasm::WASM_SEC_CUSTOM, buildIdSectionName),
907+
hashSize(getHashSize()) {}
908+
909+
void BuildIdSection::writeBody() {
910+
LLVM_DEBUG(llvm::dbgs() << "BuildId writebody\n");
911+
// Write hash size
912+
auto &os = bodyOutputStream;
913+
writeUleb128(os, hashSize, "build id size");
914+
writeBytes(os, std::vector<char>(hashSize, ' ').data(), hashSize,
915+
"placeholder");
916+
}
917+
918+
void BuildIdSection::writeBuildId(llvm::ArrayRef<uint8_t> buf) {
919+
assert(buf.size() == hashSize);
920+
LLVM_DEBUG(dbgs() << "buildid write " << buf.size() << " "
921+
<< hashPlaceholderPtr << '\n');
922+
memcpy(hashPlaceholderPtr, buf.data(), hashSize);
923+
}
924+
891925
} // namespace wasm
892926
} // namespace lld

lld/wasm/SyntheticSections.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,34 @@ class RelocSection : public SyntheticSection {
429429
OutputSection *sec;
430430
};
431431

432+
class BuildIdSection : public SyntheticSection {
433+
public:
434+
BuildIdSection();
435+
void writeBody() override;
436+
bool isNeeded() const override {
437+
return config->buildId != BuildIdKind::None;
438+
}
439+
void writeBuildId(llvm::ArrayRef<uint8_t> buf);
440+
void writeTo(uint8_t *buf) override {
441+
LLVM_DEBUG(llvm::dbgs() << "BuildId writeto buf " << buf << " offset "
442+
<< offset << " headersize " << header.size() << '\n');
443+
// The actual build ID is derived from a hash of all of the output
444+
// sections, so it can't be calculated until they are written. Here
445+
// we write the section leaving zeros in place of the hash.
446+
SyntheticSection::writeTo(buf);
447+
// Calculate and store the location where the hash will be written.
448+
hashPlaceholderPtr = buf + offset + header.size() +
449+
+sizeof(buildIdSectionName) /*name string*/ +
450+
1 /* hash size */;
451+
}
452+
453+
const uint32_t hashSize;
454+
455+
private:
456+
static constexpr char buildIdSectionName[] = "build_id";
457+
uint8_t *hashPlaceholderPtr = nullptr;
458+
};
459+
432460
// Linker generated output sections
433461
struct OutStruct {
434462
DylinkSection *dylinkSec;
@@ -447,6 +475,7 @@ struct OutStruct {
447475
NameSection *nameSec;
448476
ProducersSection *producersSec;
449477
TargetFeaturesSection *targetFeaturesSec;
478+
BuildIdSection *buildIdSec;
450479
};
451480

452481
extern OutStruct out;

lld/wasm/Writer.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
#include "SymbolTable.h"
1818
#include "SyntheticSections.h"
1919
#include "WriterUtils.h"
20+
#include "lld/Common/Arrays.h"
2021
#include "lld/Common/CommonLinkerContext.h"
2122
#include "lld/Common/Strings.h"
23+
#include "llvm/ADT/ArrayRef.h"
2224
#include "llvm/ADT/DenseSet.h"
2325
#include "llvm/ADT/SmallSet.h"
2426
#include "llvm/ADT/SmallVector.h"
@@ -30,6 +32,9 @@
3032
#include "llvm/Support/FormatVariadic.h"
3133
#include "llvm/Support/LEB128.h"
3234
#include "llvm/Support/Parallel.h"
35+
#include "llvm/Support/RandomNumberGenerator.h"
36+
#include "llvm/Support/SHA1.h"
37+
#include "llvm/Support/xxhash.h"
3338

3439
#include <cstdarg>
3540
#include <map>
@@ -103,6 +108,7 @@ class Writer {
103108

104109
void writeHeader();
105110
void writeSections();
111+
void writeBuildId();
106112

107113
uint64_t fileSize = 0;
108114

@@ -219,6 +225,91 @@ void Writer::writeSections() {
219225
});
220226
}
221227

228+
// Computes a hash value of Data using a given hash function.
229+
// In order to utilize multiple cores, we first split data into 1MB
230+
// chunks, compute a hash for each chunk, and then compute a hash value
231+
// of the hash values.
232+
233+
static void
234+
computeHash(llvm::MutableArrayRef<uint8_t> hashBuf,
235+
llvm::ArrayRef<uint8_t> data,
236+
std::function<void(uint8_t *dest, ArrayRef<uint8_t> arr)> hashFn) {
237+
std::vector<ArrayRef<uint8_t>> chunks = split(data, 1024 * 1024);
238+
std::vector<uint8_t> hashes(chunks.size() * hashBuf.size());
239+
240+
// Compute hash values.
241+
parallelFor(0, chunks.size(), [&](size_t i) {
242+
hashFn(hashes.data() + i * hashBuf.size(), chunks[i]);
243+
});
244+
245+
// Write to the final output buffer.
246+
hashFn(hashBuf.data(), hashes);
247+
}
248+
249+
static void makeUUID(unsigned version, llvm::ArrayRef<uint8_t> fileHash,
250+
llvm::MutableArrayRef<uint8_t> output) {
251+
assert(version == 4 || version == 5 && "Unknown UUID version");
252+
assert(output.size() == 16 && "Wrong size for UUID output");
253+
if (version == 5) {
254+
// Build a valid v5 UUID from a hardcoded (randomly-generated) namespace
255+
// UUID, and the computed hash of the output.
256+
std::vector<uint8_t> namespaceUUID{0xA1, 0xFA, 0x48, 0x2D, 0x0E, 0x22,
257+
0x03, 0x8D, 0x33, 0x8B, 0x52, 0x1C,
258+
0xD6, 0xD2, 0x12, 0xB2};
259+
SHA1 sha;
260+
sha.update(namespaceUUID);
261+
sha.update(fileHash);
262+
auto s = sha.final();
263+
std::copy(s.begin(), &s[output.size()], output.begin());
264+
} else if (version == 4) {
265+
if (auto ec = llvm::getRandomBytes(output.data(), output.size()))
266+
error("entropy source failure: " + ec.message());
267+
}
268+
// Set the UUID version and variant fields.
269+
// The version is the upper nibble of byte 6 (0b0101xxxx or 0b0100xxxx)
270+
output[6] = (static_cast<uint8_t>(version) << 4) | (output[6] & 0xF);
271+
272+
// The variant is DCE 1.1/ISO 11578 (0b10xxxxxx)
273+
output[8] &= 0xBF;
274+
output[8] |= 0x80;
275+
}
276+
277+
void Writer::writeBuildId() {
278+
if (!out.buildIdSec->isNeeded())
279+
return;
280+
if (config->buildId == BuildIdKind::Hexstring) {
281+
out.buildIdSec->writeBuildId(config->buildIdVector);
282+
return;
283+
}
284+
285+
// Compute a hash of all sections of the output file.
286+
size_t hashSize = out.buildIdSec->hashSize;
287+
std::vector<uint8_t> buildId(hashSize);
288+
llvm::ArrayRef<uint8_t> buf{buffer->getBufferStart(), size_t(fileSize)};
289+
290+
switch (config->buildId) {
291+
case BuildIdKind::Fast:{
292+
std::vector<uint8_t> fileHash(8);
293+
computeHash(fileHash, buf, [](uint8_t *dest, ArrayRef<uint8_t> arr) {
294+
support::endian::write64le(dest, xxHash64(arr));
295+
});
296+
makeUUID(5, fileHash, buildId);
297+
break;
298+
}
299+
case BuildIdKind::Sha1:
300+
computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
301+
memcpy(dest, SHA1::hash(arr).data(), hashSize);
302+
});
303+
break;
304+
case BuildIdKind::Uuid:
305+
makeUUID(4, {}, buildId);
306+
break;
307+
default:
308+
llvm_unreachable("unknown BuildIdKind");
309+
}
310+
out.buildIdSec->writeBuildId(buildId);
311+
}
312+
222313
static void setGlobalPtr(DefinedGlobal *g, uint64_t memoryPtr) {
223314
LLVM_DEBUG(dbgs() << "setGlobalPtr " << g->getName() << " -> " << memoryPtr << "\n");
224315
g->global->setPointerValue(memoryPtr);
@@ -456,6 +547,7 @@ void Writer::addSections() {
456547
addSection(out.nameSec);
457548
addSection(out.producersSec);
458549
addSection(out.targetFeaturesSec);
550+
addSection(out.buildIdSec);
459551
}
460552

461553
void Writer::finalizeSections() {
@@ -1577,6 +1669,7 @@ void Writer::createSyntheticSections() {
15771669
out.elemSec = make<ElemSection>();
15781670
out.producersSec = make<ProducersSection>();
15791671
out.targetFeaturesSec = make<TargetFeaturesSection>();
1672+
out.buildIdSec = make<BuildIdSection>();
15801673
}
15811674

15821675
void Writer::createSyntheticSectionsPostLayout() {
@@ -1738,6 +1831,7 @@ void Writer::run() {
17381831

17391832
log("-- writeSections");
17401833
writeSections();
1834+
writeBuildId();
17411835
if (errorCount())
17421836
return;
17431837

0 commit comments

Comments
 (0)