Skip to content

Commit bf9814b

Browse files
committed
[SanitizerBinaryMetadata] Emit constants as ULEB128
Emit all constant integers produced by SanitizerBinaryMetadata as ULEB128 to further reduce binary space used. Increasing the version is not necessary given this change depends on (and will land) along with the bump to v2. To support this, the !pcsections metadata format is extended to allow for per-section options, encoded in the first MD operator which must always be a string and contain the section: "<section>!<options>". Reviewed By: dvyukov Differential Revision: https://reviews.llvm.org/D143484
1 parent 3d53b52 commit bf9814b

File tree

11 files changed

+135
-58
lines changed

11 files changed

+135
-58
lines changed

clang/test/CodeGen/sanitize-metadata.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@ int atomics() {
3131
// CHECK-LABEL: __sanitizer_metadata_covered.module_dtor
3232
// CHECK: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered)
3333

34-
// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered", ![[ATOMICS_COVERED_AUX:[0-9]+]]}
35-
// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i8 1}
36-
// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics"}
34+
// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered!C", ![[ATOMICS_COVERED_AUX:[0-9]+]]}
35+
// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i64 1}
36+
// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics!C"}

compiler-rt/test/metadata/common.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,20 @@ template <typename T> T consume(const char *&pos, const char *end) {
2525
return v;
2626
}
2727

28+
uint64_t consume_uleb128(const char *&pos, const char *end) {
29+
uint64_t val = 0;
30+
int shift = 0;
31+
uint8_t cur;
32+
do {
33+
cur = *pos++;
34+
val |= uint64_t{cur & 0x7fu} << shift;
35+
shift += 7;
36+
} while (cur & 0x80);
37+
assert(shift < 64);
38+
assert(pos <= end);
39+
return val;
40+
}
41+
2842
constexpr uint32_t kSanitizerBinaryMetadataUARHasSize = 1 << 2;
2943

3044
uint32_t meta_version;
@@ -45,13 +59,13 @@ void __sanitizer_metadata_covered_add(uint32_t version, const char *start,
4559
const auto base = reinterpret_cast<uintptr_t>(pos);
4660
const intptr_t offset = offset_ptr_sized ? consume<intptr_t>(pos, end)
4761
: consume<int32_t>(pos, end);
48-
[[maybe_unused]] const uint32_t size = consume<uint32_t>(pos, end);
49-
const uint32_t features = consume<uint8_t>(pos, end);
50-
uint32_t stack_args = 0;
62+
[[maybe_unused]] const uint64_t size = consume_uleb128(pos, end);
63+
const uint64_t features = consume_uleb128(pos, end);
64+
uint64_t stack_args = 0;
5165
if (features & kSanitizerBinaryMetadataUARHasSize)
52-
stack_args = consume<uint32_t>(pos, end);
66+
stack_args = consume_uleb128(pos, end);
5367
if (const char *name = symbolize(base + offset))
54-
printf("%s: features=%x stack_args=%u\n", name, features, stack_args);
68+
printf("%s: features=%lx stack_args=%lu\n", name, features, stack_args);
5569
}
5670
meta_version = version;
5771
meta_start = start;

llvm/docs/PCSectionsMetadata.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ The size of each entry depends on the code model. With large and medium sized
5959
code models, the entry size matches pointer size. For any smaller code model
6060
the entry size is just 32 bits.
6161

62+
Encoding Options
63+
----------------
64+
65+
Optional encoding options can be passed in the first ``MDString`` operator:
66+
``<section>!<options>``. The following options are available:
67+
68+
* ``C`` -- Compress constant integers of size 2-8 bytes as ULEB128; this
69+
includes the function size (but excludes the PC entry).
70+
71+
For example, ``foo!C`` will emit into section ``foo`` with all constants
72+
encoded as ULEB128.
73+
6274
Guarantees on Code Generation
6375
=============================
6476

llvm/include/llvm/CodeGen/AsmPrinter.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,13 @@ class AsmPrinter : public MachineFunctionPass {
643643
/// Emit a long long directive and value.
644644
void emitInt64(uint64_t Value) const;
645645

646+
/// Emit the specified signed leb128 value.
647+
void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
648+
649+
/// Emit the specified unsigned leb128 value.
650+
void emitULEB128(uint64_t Value, const char *Desc = nullptr,
651+
unsigned PadTo = 0) const;
652+
646653
/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
647654
/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
648655
/// .set if it is available.
@@ -670,13 +677,6 @@ class AsmPrinter : public MachineFunctionPass {
670677
// Dwarf Emission Helper Routines
671678
//===------------------------------------------------------------------===//
672679

673-
/// Emit the specified signed leb128 value.
674-
void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
675-
676-
/// Emit the specified unsigned leb128 value.
677-
void emitULEB128(uint64_t Value, const char *Desc = nullptr,
678-
unsigned PadTo = 0) const;
679-
680680
/// Emit a .byte 42 directive that corresponds to an encoding. If verbose
681681
/// assembly output is enabled, we output comments describing the encoding.
682682
/// Desc is a string saying what the encoding is specifying (e.g. "LSDA").

llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ inline constexpr int kSanitizerBinaryMetadataAtomicsBit = 0;
3030
inline constexpr int kSanitizerBinaryMetadataUARBit = 1;
3131
inline constexpr int kSanitizerBinaryMetadataUARHasSizeBit = 2;
3232

33-
inline constexpr uint32_t kSanitizerBinaryMetadataAtomics =
33+
inline constexpr uint64_t kSanitizerBinaryMetadataAtomics =
3434
1 << kSanitizerBinaryMetadataAtomicsBit;
35-
inline constexpr uint32_t kSanitizerBinaryMetadataUAR =
35+
inline constexpr uint64_t kSanitizerBinaryMetadataUAR =
3636
1 << kSanitizerBinaryMetadataUARBit;
37-
inline constexpr uint32_t kSanitizerBinaryMetadataUARHasSize =
37+
inline constexpr uint64_t kSanitizerBinaryMetadataUARHasSize =
3838
1 << kSanitizerBinaryMetadataUARHasSizeBit;
3939

4040
inline constexpr char kSanitizerBinaryMetadataCoveredSection[] =

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,9 +1496,22 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
14961496
// constants may appear, which will simply be emitted into the current
14971497
// section (the user of MD_pcsections decides the format of encoded data).
14981498
assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string");
1499+
bool ConstULEB128 = false;
14991500
for (const MDOperand &MDO : MD.operands()) {
15001501
if (auto *S = dyn_cast<MDString>(MDO)) {
1501-
SwitchSection(S->getString());
1502+
// Found string, start of new section!
1503+
// Find options for this section "<section>!<opts>" - supported options:
1504+
// C = Compress constant integers of size 2-8 bytes as ULEB128.
1505+
const StringRef SecWithOpt = S->getString();
1506+
const size_t OptStart = SecWithOpt.find('!'); // likely npos
1507+
const StringRef Sec = SecWithOpt.substr(0, OptStart);
1508+
const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty
1509+
ConstULEB128 = Opts.find('C') != StringRef::npos;
1510+
#ifndef NDEBUG
1511+
for (char O : Opts)
1512+
assert((O == '!' || O == 'C') && "Invalid !pcsections options");
1513+
#endif
1514+
SwitchSection(Sec);
15021515
const MCSymbol *Prev = Syms.front();
15031516
for (const MCSymbol *Sym : Syms) {
15041517
if (Sym == Prev || !Deltas) {
@@ -1510,17 +1523,30 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
15101523
// `base + addr`.
15111524
emitLabelDifference(Sym, Base, RelativeRelocSize);
15121525
} else {
1513-
emitLabelDifference(Sym, Prev, 4);
1526+
// Emit delta between symbol and previous symbol.
1527+
if (ConstULEB128)
1528+
emitLabelDifferenceAsULEB128(Sym, Prev);
1529+
else
1530+
emitLabelDifference(Sym, Prev, 4);
15141531
}
15151532
Prev = Sym;
15161533
}
15171534
} else {
1535+
// Emit auxiliary data after PC.
15181536
assert(isa<MDNode>(MDO) && "expecting either string or tuple");
15191537
const auto *AuxMDs = cast<MDNode>(MDO);
15201538
for (const MDOperand &AuxMDO : AuxMDs->operands()) {
15211539
assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
1522-
const auto *C = cast<ConstantAsMetadata>(AuxMDO);
1523-
emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue());
1540+
const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue();
1541+
const DataLayout &DL = F.getParent()->getDataLayout();
1542+
const uint64_t Size = DL.getTypeStoreSize(C->getType());
1543+
1544+
if (auto *CI = dyn_cast<ConstantInt>(C);
1545+
CI && ConstULEB128 && Size > 1 && Size <= 8) {
1546+
emitULEB128(CI->getZExtValue());
1547+
} else {
1548+
emitGlobalConstant(DL, C);
1549+
}
15241550
}
15251551
}
15261552
}
@@ -2788,6 +2814,22 @@ void AsmPrinter::emitInt16(int Value) const { OutStreamer->emitInt16(Value); }
27882814
/// Emit a long directive and value.
27892815
void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); }
27902816

2817+
/// EmitSLEB128 - emit the specified signed leb128 value.
2818+
void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
2819+
if (isVerbose() && Desc)
2820+
OutStreamer->AddComment(Desc);
2821+
2822+
OutStreamer->emitSLEB128IntValue(Value);
2823+
}
2824+
2825+
void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
2826+
unsigned PadTo) const {
2827+
if (isVerbose() && Desc)
2828+
OutStreamer->AddComment(Desc);
2829+
2830+
OutStreamer->emitULEB128IntValue(Value, PadTo);
2831+
}
2832+
27912833
/// Emit a long long directive and value.
27922834
void AsmPrinter::emitInt64(uint64_t Value) const {
27932835
OutStreamer->emitInt64(Value);
@@ -2801,6 +2843,12 @@ void AsmPrinter::emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
28012843
OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
28022844
}
28032845

2846+
/// Emit something like ".uleb128 Hi-Lo".
2847+
void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
2848+
const MCSymbol *Lo) const {
2849+
OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
2850+
}
2851+
28042852
/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
28052853
/// where the size in bytes of the directive is specified by Size and Label
28062854
/// specifies the label. This implicitly uses .set if it is available.

llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,28 +32,6 @@ using namespace llvm;
3232
// Dwarf Emission Helper Routines
3333
//===----------------------------------------------------------------------===//
3434

35-
/// EmitSLEB128 - emit the specified signed leb128 value.
36-
void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
37-
if (isVerbose() && Desc)
38-
OutStreamer->AddComment(Desc);
39-
40-
OutStreamer->emitSLEB128IntValue(Value);
41-
}
42-
43-
void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
44-
unsigned PadTo) const {
45-
if (isVerbose() && Desc)
46-
OutStreamer->AddComment(Desc);
47-
48-
OutStreamer->emitULEB128IntValue(Value, PadTo);
49-
}
50-
51-
/// Emit something like ".uleb128 Hi-Lo".
52-
void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
53-
const MCSymbol *Lo) const {
54-
OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
55-
}
56-
5735
static const char *DecodeDWARFEncoding(unsigned Encoding) {
5836
switch (Encoding) {
5937
case dwarf::DW_EH_PE_absptr:

llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
5252
if (!MD)
5353
return false;
5454
const auto &Section = *cast<MDString>(MD->getOperand(0));
55-
if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection))
55+
if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection))
5656
return false;
5757
auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
5858
// Assume it currently only has features.

llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,16 @@
3535
#include "llvm/InitializePasses.h"
3636
#include "llvm/Pass.h"
3737
#include "llvm/ProfileData/InstrProf.h"
38+
#include "llvm/Support/Allocator.h"
3839
#include "llvm/Support/CommandLine.h"
3940
#include "llvm/Support/Debug.h"
41+
#include "llvm/Support/StringSaver.h"
4042
#include "llvm/TargetParser/Triple.h"
4143
#include "llvm/Transforms/Instrumentation.h"
4244
#include "llvm/Transforms/Utils/ModuleUtils.h"
4345

4446
#include <array>
4547
#include <cstdint>
46-
#include <limits>
4748

4849
using namespace llvm;
4950

@@ -148,7 +149,7 @@ class SanitizerBinaryMetadata {
148149
// to determine if a memory operation is atomic or not in modules compiled
149150
// with SanitizerBinaryMetadata.
150151
bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
151-
uint32_t &FeatureMask);
152+
uint64_t &FeatureMask);
152153

153154
// Get start/end section marker pointer.
154155
GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
@@ -169,6 +170,8 @@ class SanitizerBinaryMetadata {
169170
const SanitizerBinaryMetadataOptions Options;
170171
const Triple TargetTriple;
171172
IRBuilder<> IRB;
173+
BumpPtrAllocator Alloc;
174+
UniqueStringSaver StringPool{Alloc};
172175
};
173176

174177
bool SanitizerBinaryMetadata::run() {
@@ -245,7 +248,7 @@ void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
245248

246249
// The metadata features enabled for this function, stored along covered
247250
// metadata (if enabled).
248-
uint32_t FeatureMask = 0;
251+
uint64_t FeatureMask = 0;
249252
// Don't emit unnecessary covered metadata for all functions to save space.
250253
bool RequiresCovered = false;
251254

@@ -270,10 +273,8 @@ void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
270273
const auto *MI = &MetadataInfo::Covered;
271274
MIS.insert(MI);
272275
const StringRef Section = getSectionName(MI->SectionSuffix);
273-
// The feature mask will be placed after the size of the function.
274-
assert(FeatureMask <= std::numeric_limits<uint8_t>::max() &&
275-
"Increase feature mask bytes and bump version");
276-
Constant *CFM = IRB.getInt8(FeatureMask);
276+
// The feature mask will be placed after the function size.
277+
Constant *CFM = IRB.getInt64(FeatureMask);
277278
F.setMetadata(LLVMContext::MD_pcsections,
278279
MDB.createPCSections({{Section, {CFM}}}));
279280
}
@@ -380,7 +381,7 @@ bool maybeSharedMutable(const Value *Addr) {
380381
}
381382

382383
bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
383-
MDBuilder &MDB, uint32_t &FeatureMask) {
384+
MDBuilder &MDB, uint64_t &FeatureMask) {
384385
SmallVector<const MetadataInfo *, 1> InstMetadata;
385386
bool RequiresCovered = false;
386387

@@ -435,8 +436,9 @@ SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
435436
}
436437

437438
StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
438-
// FIXME: Other TargetTriple (req. string pool)
439-
return SectionSuffix;
439+
// FIXME: Other TargetTriples.
440+
// Request ULEB128 encoding for all integer constants.
441+
return StringPool.save(SectionSuffix + "!C");
440442
}
441443

442444
Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {

llvm/test/CodeGen/X86/pcsections.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,32 @@ entry:
137137
ret void
138138
}
139139

140+
define void @multiple_uleb128() !pcsections !6 {
141+
; CHECK-LABEL: multiple_uleb128:
142+
; CHECK: .section section_aux,"awo",@progbits,.text
143+
; CHECK-NEXT: .Lpcsection_base8:
144+
; DEFCM-NEXT: .long .Lfunc_begin3-.Lpcsection_base8
145+
; LARGE-NEXT: .quad .Lfunc_begin3-.Lpcsection_base8
146+
; CHECK-NEXT: .uleb128 .Lfunc_end6-.Lfunc_begin3
147+
; CHECK-NEXT: .byte 42
148+
; CHECK-NEXT: .ascii "\345\216&"
149+
; CHECK-NEXT: .byte 255
150+
; CHECK-NEXT: .section section_aux_21264,"awo",@progbits,.text
151+
; CHECK-NEXT: .Lpcsection_base9:
152+
; DEFCM-NEXT: .long .Lfunc_begin3-.Lpcsection_base9
153+
; LARGE-NEXT: .quad .Lfunc_begin3-.Lpcsection_base9
154+
; CHECK-NEXT: .long .Lfunc_end6-.Lfunc_begin3
155+
; CHECK-NEXT: .long 21264
156+
; CHECK-NEXT: .text
157+
entry:
158+
ret void
159+
}
160+
140161
!0 = !{!"section_no_aux"}
141162
!1 = !{!"section_aux", !3}
142163
!2 = !{!"section_aux_42", !4, !"section_aux_21264", !5}
143164
!3 = !{i32 10, i32 20, i32 30}
144165
!4 = !{i32 42}
145166
!5 = !{i32 21264}
167+
!6 = !{!"section_aux!C", !7, !"section_aux_21264", !5}
168+
!7 = !{i64 42, i32 624485, i8 255}

llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2071,6 +2071,6 @@ entry:
20712071
; CHECK-DAG: ret:
20722072
; CHECK-NEXT: ret void
20732073

2074-
; CHECK: !0 = !{!"sanmd_covered", !1}
2075-
; CHECK: !1 = !{i8 1}
2076-
; CHECK: !2 = !{!"sanmd_atomics"}
2074+
; CHECK: !0 = !{!"sanmd_covered!C", !1}
2075+
; CHECK: !1 = !{i64 1}
2076+
; CHECK: !2 = !{!"sanmd_atomics!C"}

0 commit comments

Comments
 (0)