Skip to content

Commit ede8215

Browse files
committed
[BPF] Generate BTF info using 'btf:type_tag' annotation
This is a follow-up for BPF mailing list discussion at [1]. Previous commit in a series updated DWARF generation for the following example: int __attribute__((btf_type_tag("tag1"))) *g; To generate DWARF that looks as follows: 0x0000001e: DW_TAG_variable DW_AT_name ("g") DW_AT_type (0x00000029 "int *") 0x00000029: DW_TAG_pointer_type DW_AT_type (0x00000032 "int") 0x00000032: DW_TAG_base_type DW_AT_name ("int") 0x00000036: DW_TAG_LLVM_annotation DW_AT_name ("btf:type_tag") DW_AT_const_value ("tag1") The fresh part is attachment of `btf:type_tag` annotations to types other than pointers. This commit changes BTF generation to rely on `btf:type_tag` annotations to generate TYPE_TAG entries. This necessitates the following changes: - The logic for `BTFTypeTypeTag` chains creation is moved to `BTFDebug::addType()`; - Special logic is added to avoid duplicate BTF entries for tagged and un-tagged type variants, e.g. in the following case: #define __tag1 __attribute__((btf_type_tag("tag1"))) #define __tag2 __attribute__((btf_type_tag("tag2"))) struct foo {}; struct bar { struct foo __tag1 aa; struct foo __tag2 bb; struct foo cc; }; Debug information generated for this example contains three instances of `DICompositeType(name: "foo")` with different `annotations` fields, however single BTF definition for structure "foo" should be generated. Field `BTFDebug::DIDedupMap` and method `BTFDebug::lookupType()` are responsible for this logic; - Care is taken to avoid references to type tags in relocation entries. [1] https://lore.kernel.org/bpf/[email protected]/ This was previously tracked as differential revision: https://reviews.llvm.org/D145891
1 parent f75fc64 commit ede8215

20 files changed

+2107
-78
lines changed

llvm/lib/Target/BPF/BTFDebug.cpp

Lines changed: 554 additions & 75 deletions
Large diffs are not rendered by default.

llvm/lib/Target/BPF/BTFDebug.h

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class BTFTypeBase {
4848
virtual ~BTFTypeBase() = default;
4949
void setId(uint32_t Id) { this->Id = Id; }
5050
uint32_t getId() { return Id; }
51+
uint32_t getKind() { return Kind; }
5152
uint32_t roundupToBytes(uint32_t NumBits) { return (NumBits + 7) >> 3; }
5253
/// Get the size of this BTF type entry.
5354
virtual uint32_t getSize() { return BTF::CommonTypeSize; }
@@ -68,10 +69,12 @@ class BTFTypeDerived : public BTFTypeBase {
6869

6970
public:
7071
BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag, bool NeedsFixup);
71-
BTFTypeDerived(unsigned NextTypeId, unsigned Tag, StringRef Name);
72+
BTFTypeDerived(unsigned NextTypeId, enum BTF::TypeKinds Kind,
73+
StringRef Name = StringRef());
7274
void completeType(BTFDebug &BDebug) override;
7375
void emitType(MCStreamer &OS) override;
7476
void setPointeeType(uint32_t PointeeType);
77+
uint32_t getPointeeType();
7578
};
7679

7780
/// Handle struct or union forward declaration.
@@ -240,6 +243,8 @@ class BTFTypeTypeTag : public BTFTypeBase {
240243
BTFTypeTypeTag(uint32_t NextTypeId, StringRef Tag);
241244
BTFTypeTypeTag(const DIDerivedType *DTy, StringRef Tag);
242245
void completeType(BTFDebug &BDebug) override;
246+
uint32_t getNextTypeId();
247+
StringRef getTag();
243248
};
244249

245250
/// String table.
@@ -285,6 +290,20 @@ struct BTFFieldReloc {
285290
uint32_t RelocKind; ///< What to patch the instruction
286291
};
287292

293+
/// Used for de-duplication for types annotated with btf_type_tag annotation,
294+
/// See comment at BTFDebug.cpp:addType() for details.
295+
struct BTFTypeDedupKey {
296+
const DIType *CanonTy;
297+
298+
BTFTypeDedupKey(const DIType *CanonTy) : CanonTy(CanonTy) {}
299+
300+
bool operator==(const BTFTypeDedupKey &Other) const;
301+
302+
struct Hash {
303+
size_t operator()(BTFTypeDedupKey const &Key) const;
304+
};
305+
};
306+
288307
/// Collect and emit BTF information.
289308
class BTFDebug : public DebugHandlerBase {
290309
MCStreamer &OS;
@@ -296,6 +315,8 @@ class BTFDebug : public DebugHandlerBase {
296315
BTFStringTable StringTable;
297316
std::vector<std::unique_ptr<BTFTypeBase>> TypeEntries;
298317
std::unordered_map<const DIType *, uint32_t> DIToIdMap;
318+
std::unordered_map<BTFTypeDedupKey, uint32_t, BTFTypeDedupKey::Hash>
319+
DIDedupMap;
299320
std::map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
300321
std::map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
301322
std::map<uint32_t, std::vector<BTFFieldReloc>> FieldRelocTable;
@@ -311,11 +332,17 @@ class BTFDebug : public DebugHandlerBase {
311332
/// Add types to TypeEntries.
312333
/// @{
313334
/// Add types to TypeEntries and DIToIdMap.
314-
uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty);
335+
uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty,
336+
uint32_t *RealId = nullptr);
315337
/// Add types to TypeEntries only and return type id.
316338
uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry);
339+
uint32_t replaceType(uint32_t Id, std::unique_ptr<BTFTypeBase> TypeEntry);
317340
/// @}
318341

342+
BTFTypeBase *getType(uint32_t Id);
343+
344+
std::optional<uint32_t> lookupType(const DIType *Ty);
345+
319346
/// IR type visiting functions.
320347
/// @{
321348
void visitTypeEntry(const DIType *Ty);
@@ -368,7 +395,10 @@ class BTFDebug : public DebugHandlerBase {
368395
/// the base type of DTy. Return the type id of the first BTF type_tag
369396
/// in the chain. If no type_tag's are generated, a negative value
370397
/// is returned.
371-
int genBTFTypeTags(const DIDerivedType *DTy, int BaseTypeId);
398+
uint32_t genBTFTypeTags(const DIType *Ty, int BaseId,
399+
const DIDerivedType *DTy, StringRef AnnotName);
400+
uint32_t genBTFTypeTagsV1(const DIDerivedType *DTy);
401+
uint32_t genBTFTypeTagsV2(const DIType *Ty, uint32_t BaseId);
372402

373403
/// Generate one field relocation record.
374404
void generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId,
@@ -390,6 +420,16 @@ class BTFDebug : public DebugHandlerBase {
390420
/// Emit the .BTF.ext section.
391421
void emitBTFExtSection();
392422

423+
uint32_t skipBTFTypeTags(uint32_t Id);
424+
425+
/// BTF post processing phase rewriting type chains like below:
426+
/// CONST -> TYPE_TAG '...' -> ...
427+
/// To:
428+
/// TYPE_TAG '...' -> CONST -> ...
429+
void moveTypeTagsBeforeCVR();
430+
class QualifiedTypesCache;
431+
void rebuildTypeTagsChain(uint32_t Id, QualifiedTypesCache &Cache);
432+
393433
protected:
394434
/// Gather pre-function debug information.
395435
void beginFunctionImpl(const MachineFunction *MF) override;
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
#!/usr/bin/env python3
2+
3+
# Ad-hoc script to print BTF file in a readable format.
4+
# Follows the same printing conventions as bpftool with format 'raw'.
5+
# Usage:
6+
#
7+
# ./print_btf.py <btf_file>
8+
#
9+
# Parameters:
10+
#
11+
# <btf_file> :: a file name or '-' to read from stdin.
12+
#
13+
# Intended usage:
14+
#
15+
# llvm-objcopy --dump-section .BTF=- <input> | ./print_btf.py -
16+
#
17+
# Kernel documentation contains detailed format description:
18+
# https://www.kernel.org/doc/html/latest/bpf/btf.html
19+
20+
import struct
21+
import ctypes
22+
import sys
23+
24+
class SafeDict(dict):
25+
def __getitem__(self, key):
26+
try:
27+
return dict.__getitem__(self, key)
28+
except KeyError:
29+
return f'<BAD_KEY: {key}>'
30+
31+
KINDS = SafeDict({
32+
0: 'UNKN',
33+
1: 'INT',
34+
2: 'PTR',
35+
3: 'ARRAY',
36+
4: 'STRUCT',
37+
5: 'UNION',
38+
6: 'ENUM',
39+
7: 'FWD',
40+
8: 'TYPEDEF',
41+
9: 'VOLATILE',
42+
10: 'CONST',
43+
11: 'RESTRICT',
44+
12: 'FUNC',
45+
13: 'FUNC_PROTO',
46+
14: 'VAR',
47+
15: 'DATASEC',
48+
16: 'FLOAT',
49+
17: 'DECL_TAG',
50+
18: 'TYPE_TAG',
51+
19: 'ENUM64',
52+
})
53+
54+
INT_ENCODING = SafeDict({
55+
0 << 0: '(none)',
56+
1 << 0: 'SIGNED',
57+
1 << 1: 'CHAR',
58+
1 << 2: 'BOOL'
59+
})
60+
61+
ENUM_ENCODING = SafeDict({
62+
0: 'UNSIGNED',
63+
1: 'SIGNED'
64+
})
65+
66+
FUNC_LINKAGE = SafeDict({
67+
0: 'static',
68+
1: 'global',
69+
2: 'extern'
70+
})
71+
72+
VAR_LINKAGE = SafeDict({
73+
0: 'static',
74+
1: 'global',
75+
})
76+
77+
FWD_KIND = SafeDict({
78+
0: 'struct',
79+
1: 'union',
80+
})
81+
82+
for val, name in KINDS.items():
83+
globals()['BTF_KIND_' + name] = val
84+
85+
def warn(message):
86+
print(message, file=sys.stderr)
87+
88+
def print_btf(filename):
89+
if filename == '-':
90+
buf = sys.stdin.buffer.read()
91+
else:
92+
with open(filename, 'rb') as file:
93+
buf = file.read()
94+
95+
fmt_cache = {}
96+
endian_pfx = ''
97+
off = 0
98+
99+
def unpack(fmt):
100+
nonlocal off, endian_pfx
101+
fmt = endian_pfx + fmt
102+
if fmt not in fmt_cache:
103+
fmt_cache[fmt] = struct.Struct(fmt)
104+
st = fmt_cache[fmt]
105+
r = st.unpack_from(buf, off)
106+
off += st.size
107+
return r
108+
109+
# Use magic number at the header start to determine endianness
110+
magic, = unpack('H')
111+
if magic == 0xeb9f:
112+
endian_pfx = '<'
113+
elif magic == 0x9feb:
114+
endian_pfx = '>'
115+
else:
116+
warn(f'Unexpected BTF magic: {magic:02x}')
117+
return
118+
119+
# Rest of the header
120+
version, flags, hdr_len = unpack('BBI')
121+
type_off, type_len, str_off, str_len = unpack('IIII')
122+
123+
# Offsets in the header are relative to the end of a header
124+
type_off += hdr_len
125+
str_off += hdr_len
126+
off = hdr_len
127+
type_end = type_off + type_len
128+
129+
def string(rel_off):
130+
try:
131+
start = str_off + rel_off
132+
end = buf.index(b"\0", start);
133+
if start == end:
134+
return '(anon)'
135+
return buf[start:end].decode('utf8')
136+
except ValueError as e:
137+
warn(f"Can't get string at offset {str_off} + {rel_off}: {e}")
138+
return f'<BAD_STRING {rel_off}>'
139+
140+
idx = 1
141+
while off < type_end:
142+
name_off, info, size = unpack('III')
143+
kind = (info >> 24) & 0x1f
144+
vlen = info & 0xffff
145+
kflag = info >> 31
146+
kind_name = KINDS[kind]
147+
name = string(name_off)
148+
149+
def warn_nonzero(val, name):
150+
nonlocal idx
151+
if val != 0:
152+
warn(f'<{idx}> {name} should be 0 but is {val}')
153+
154+
if kind == BTF_KIND_INT:
155+
info, = unpack('I')
156+
encoding = (info & 0x0f000000) >> 24
157+
offset = (info & 0x00ff0000) >> 16
158+
bits = info & 0x000000ff
159+
enc_name = INT_ENCODING[encoding]
160+
print(f"[{idx}] {kind_name} '{name}' size={size} "
161+
f"bits_offset={offset} "
162+
f"nr_bits={bits} encoding={enc_name}")
163+
warn_nonzero(kflag, 'kflag')
164+
warn_nonzero(vlen, 'vlen')
165+
166+
elif kind in [BTF_KIND_PTR, BTF_KIND_CONST, BTF_KIND_VOLATILE,
167+
BTF_KIND_RESTRICT]:
168+
print(f"[{idx}] {kind_name} '{name}' type_id={size}")
169+
warn_nonzero(name_off, 'name_off')
170+
warn_nonzero(kflag, 'kflag')
171+
warn_nonzero(vlen, 'vlen')
172+
173+
elif kind == BTF_KIND_ARRAY:
174+
warn_nonzero(name_off, 'name_off')
175+
warn_nonzero(kflag, 'kflag')
176+
warn_nonzero(vlen, 'vlen')
177+
warn_nonzero(size, 'size')
178+
type, index_type, nelems = unpack('III')
179+
print(f"[{idx}] {kind_name} '{name}' type_id={type} "
180+
f"index_type_id={index_type} nr_elems={nelems}")
181+
182+
elif kind in [BTF_KIND_STRUCT, BTF_KIND_UNION]:
183+
print(f"[{idx}] {kind_name} '{name}' size={size} vlen={vlen}")
184+
if kflag not in [0, 1]:
185+
warn(f'<{idx}> kflag should 0 or 1: {kflag}')
186+
for _ in range(0, vlen):
187+
name_off, type, offset = unpack('III')
188+
if kflag == 0:
189+
print(f"\t'{string(name_off)}' type_id={type} "
190+
f"bits_offset={offset}")
191+
else:
192+
bits_offset = offset & 0xffffff
193+
bitfield_size = offset >> 24
194+
print(f"\t'{string(name_off)}' type_id={type} "
195+
f"bits_offset={bits_offset} "
196+
f"bitfield_size={bitfield_size}")
197+
198+
elif kind == BTF_KIND_ENUM:
199+
encoding=ENUM_ENCODING[kflag]
200+
print(f"[{idx}] {kind_name} '{name}' encoding={encoding} "
201+
f"size={size} vlen={vlen}")
202+
for _ in range(0, vlen):
203+
name_off, = unpack('I')
204+
val, = unpack('i' if kflag == 1 else 'I')
205+
print(f"\t'{string(name_off)}' val={val}")
206+
207+
elif kind == BTF_KIND_ENUM64:
208+
encoding=ENUM_ENCODING[kflag]
209+
print(f"[{idx}] {kind_name} '{name}' encoding={encoding} "
210+
f"size={size} vlen={vlen}")
211+
for _ in range(0, vlen):
212+
name_off, lo, hi = unpack('III')
213+
val = hi << 32 | lo
214+
if kflag == 1:
215+
val = ctypes.c_long(val).value
216+
print(f"\t'{string(name_off)}' val={val}LL")
217+
218+
elif kind == BTF_KIND_FWD:
219+
print(f"[{idx}] {kind_name} '{name}' fwd_kind={FWD_KIND[kflag]}")
220+
warn_nonzero(vlen, 'vlen')
221+
warn_nonzero(size, 'size')
222+
223+
elif kind in [BTF_KIND_TYPEDEF, BTF_KIND_TYPE_TAG]:
224+
print(f"[{idx}] {kind_name} '{name}' type_id={size}")
225+
warn_nonzero(kflag, 'kflag')
226+
warn_nonzero(kflag, 'vlen')
227+
228+
elif kind == BTF_KIND_FUNC:
229+
linkage = FUNC_LINKAGE[vlen]
230+
print(f"[{idx}] {kind_name} '{name}' type_id={size} "
231+
f"linkage={linkage}")
232+
warn_nonzero(kflag, 'kflag')
233+
234+
elif kind == BTF_KIND_FUNC_PROTO:
235+
print(f"[{idx}] {kind_name} '{name}' ret_type_id={size} "
236+
f"vlen={vlen}")
237+
warn_nonzero(name_off, 'name_off')
238+
warn_nonzero(kflag, 'kflag')
239+
for _ in range(0, vlen):
240+
name_off, type = unpack('II')
241+
print(f"\t'{string(name_off)}' type_id={type}")
242+
243+
elif kind == BTF_KIND_VAR:
244+
linkage, = unpack('I')
245+
linkage = VAR_LINKAGE[linkage]
246+
print(f"[{idx}] {kind_name} '{name}' type_id={size}, "
247+
f"linkage={linkage}")
248+
warn_nonzero(kflag, 'kflag')
249+
warn_nonzero(vlen, 'vlen')
250+
251+
elif kind == BTF_KIND_DATASEC:
252+
print(f"[{idx}] {kind_name} '{name}' size={size} vlen={vlen}")
253+
warn_nonzero(kflag, 'kflag')
254+
warn_nonzero(size, 'size')
255+
for _ in range(0, vlen):
256+
type, offset, size = unpack('III')
257+
print(f"\ttype_id={type} offset={offset} size={size}")
258+
259+
elif kind == BTF_KIND_FLOAT:
260+
print(f"[{idx}] {kind_name} '{name}' size={size}")
261+
warn_nonzero(kflag, 'kflag')
262+
warn_nonzero(vlen, 'vlen')
263+
264+
elif kind == BTF_KIND_DECL_TAG:
265+
component_idx, = unpack('i')
266+
print(f"[{idx}] {kind_name} '{name}' type_id={size} " +
267+
f"component_idx={component_idx}")
268+
warn_nonzero(kflag, 'kflag')
269+
warn_nonzero(vlen, 'vlen')
270+
271+
else:
272+
warn(f'<{idx}> Unexpected entry: kind={kind_name} '
273+
f'name_off={name_off} '
274+
f'vlen={vlen} kflag={kflag} size={size}')
275+
276+
idx += 1
277+
278+
if __name__ == '__main__':
279+
if len(sys.argv) != 2:
280+
warn('Usage: {sys.argv[0]} <btf_file>')
281+
sys.exit(1)
282+
print_btf(sys.argv[1])

0 commit comments

Comments
 (0)