Skip to content

Commit 3e6fe5c

Browse files
anakryikoborkmann
authored andcommitted
libbpf: Fix internal USDT address translation logic for shared libraries
Perform the same virtual address to file offset translation that libbpf is doing for executable ELF binaries also for shared libraries. Currently libbpf is making a simplifying and sometimes wrong assumption that for shared libraries relative virtual addresses inside ELF are always equal to file offsets. Unfortunately, this is not always the case with LLVM's lld linker, which now by default generates quite more complicated ELF segments layout. E.g., for liburandom_read.so from selftests/bpf, here's an excerpt from readelf output listing ELF segments (a.k.a. program headers): Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0001f8 0x0001f8 R 0x8 LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0005e4 0x0005e4 R 0x1000 LOAD 0x0005f0 0x00000000000015f0 0x00000000000015f0 0x000160 0x000160 R E 0x1000 LOAD 0x000750 0x0000000000002750 0x0000000000002750 0x000210 0x000210 RW 0x1000 LOAD 0x000960 0x0000000000003960 0x0000000000003960 0x000028 0x000029 RW 0x1000 Compare that to what is generated by GNU ld (or LLVM lld's with extra -znoseparate-code argument which disables this cleverness in the name of file size reduction): Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000550 0x000550 R 0x1000 LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000131 0x000131 R E 0x1000 LOAD 0x002000 0x0000000000002000 0x0000000000002000 0x0000ac 0x0000ac R 0x1000 LOAD 0x002dc0 0x0000000000003dc0 0x0000000000003dc0 0x000262 0x000268 RW 0x1000 You can see from the first example above that for executable (Flg == "R E") PT_LOAD segment (LOAD #2), Offset doesn't match VirtAddr columns. And it does in the second case (GNU ld output). This is important because all the addresses, including USDT specs, operate in a virtual address space, while kernel is expecting file offsets when performing uprobe attach. So such mismatches have to be properly taken care of and compensated by libbpf, which is what this patch is fixing. Also patch clarifies few function and variable names, as well as updates comments to reflect this important distinction (virtaddr vs file offset) and to ephasize that shared libraries are not all that different from executables in this regard. This patch also changes selftests/bpf Makefile to force urand_read and liburand_read.so to be built with Clang and LLVM's lld (and explicitly request this ELF file size optimization through -znoseparate-code linker parameter) to validate libbpf logic and ensure regressions don't happen in the future. I've bundled these selftests changes together with libbpf changes to keep the above description tied with both libbpf and selftests changes. Fixes: 74cc631 ("libbpf: Add USDT notes parsing and resolution logic") Signed-off-by: Andrii Nakryiko <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent de5bb43 commit 3e6fe5c

File tree

2 files changed

+72
-65
lines changed

2 files changed

+72
-65
lines changed

tools/lib/bpf/usdt.c

Lines changed: 63 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, siz
441441
return 0;
442442
}
443443

444-
static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
444+
static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
445445
{
446446
char path[PATH_MAX], line[PATH_MAX], mode[16];
447447
size_t seg_start, seg_end, seg_off;
@@ -531,35 +531,40 @@ static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs,
531531
return err;
532532
}
533533

534-
static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative)
534+
static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr)
535535
{
536536
struct elf_seg *seg;
537537
int i;
538538

539-
if (relative) {
540-
/* for shared libraries, address is relative offset and thus
541-
* should be fall within logical offset-based range of
542-
* [offset_start, offset_end)
543-
*/
544-
for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
545-
if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start))
546-
return seg;
547-
}
548-
} else {
549-
/* for binaries, address is absolute and thus should be within
550-
* absolute address range of [seg_start, seg_end)
551-
*/
552-
for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
553-
if (seg->start <= addr && addr < seg->end)
554-
return seg;
555-
}
539+
/* for ELF binaries (both executables and shared libraries), we are
540+
* given virtual address (absolute for executables, relative for
541+
* libraries) which should match address range of [seg_start, seg_end)
542+
*/
543+
for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
544+
if (seg->start <= virtaddr && virtaddr < seg->end)
545+
return seg;
556546
}
547+
return NULL;
548+
}
557549

550+
static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset)
551+
{
552+
struct elf_seg *seg;
553+
int i;
554+
555+
/* for VMA segments from /proc/<pid>/maps file, provided "address" is
556+
* actually a file offset, so should be fall within logical
557+
* offset-based range of [offset_start, offset_end)
558+
*/
559+
for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
560+
if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start))
561+
return seg;
562+
}
558563
return NULL;
559564
}
560565

561-
static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
562-
GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
566+
static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
567+
const char *data, size_t name_off, size_t desc_off,
563568
struct usdt_note *usdt_note);
564569

565570
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
@@ -568,8 +573,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
568573
const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
569574
struct usdt_target **out_targets, size_t *out_target_cnt)
570575
{
571-
size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0;
572-
struct elf_seg *segs = NULL, *lib_segs = NULL;
576+
size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0;
577+
struct elf_seg *segs = NULL, *vma_segs = NULL;
573578
struct usdt_target *targets = NULL, *target;
574579
long base_addr = 0;
575580
Elf_Scn *notes_scn, *base_scn;
@@ -613,8 +618,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
613618
struct elf_seg *seg = NULL;
614619
void *tmp;
615620

616-
err = parse_usdt_note(elf, path, base_addr, &nhdr,
617-
data->d_buf, name_off, desc_off, &note);
621+
err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
618622
if (err)
619623
goto err_out;
620624

@@ -654,30 +658,29 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
654658
usdt_rel_ip += base_addr - note.base_addr;
655659
}
656660

657-
if (ehdr.e_type == ET_EXEC) {
658-
/* When attaching uprobes (which what USDTs basically
659-
* are) kernel expects a relative IP to be specified,
660-
* so if we are attaching to an executable ELF binary
661-
* (i.e., not a shared library), we need to calculate
662-
* proper relative IP based on ELF's load address
663-
*/
664-
seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */);
665-
if (!seg) {
666-
err = -ESRCH;
667-
pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
668-
usdt_provider, usdt_name, path, usdt_abs_ip);
669-
goto err_out;
670-
}
671-
if (!seg->is_exec) {
672-
err = -ESRCH;
673-
pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
674-
path, seg->start, seg->end, usdt_provider, usdt_name,
675-
usdt_abs_ip);
676-
goto err_out;
677-
}
661+
/* When attaching uprobes (which is what USDTs basically are)
662+
* kernel expects file offset to be specified, not a relative
663+
* virtual address, so we need to translate virtual address to
664+
* file offset, for both ET_EXEC and ET_DYN binaries.
665+
*/
666+
seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip);
667+
if (!seg) {
668+
err = -ESRCH;
669+
pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
670+
usdt_provider, usdt_name, path, usdt_abs_ip);
671+
goto err_out;
672+
}
673+
if (!seg->is_exec) {
674+
err = -ESRCH;
675+
pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
676+
path, seg->start, seg->end, usdt_provider, usdt_name,
677+
usdt_abs_ip);
678+
goto err_out;
679+
}
680+
/* translate from virtual address to file offset */
681+
usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset;
678682

679-
usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset);
680-
} else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */
683+
if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) {
681684
/* If we don't have BPF cookie support but need to
682685
* attach to a shared library, we'll need to know and
683686
* record absolute addresses of attach points due to
@@ -697,33 +700,33 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
697700
goto err_out;
698701
}
699702

700-
/* lib_segs are lazily initialized only if necessary */
701-
if (lib_seg_cnt == 0) {
702-
err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt);
703+
/* vma_segs are lazily initialized only if necessary */
704+
if (vma_seg_cnt == 0) {
705+
err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt);
703706
if (err) {
704707
pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n",
705708
pid, path, err);
706709
goto err_out;
707710
}
708711
}
709712

710-
seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */);
713+
seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip);
711714
if (!seg) {
712715
err = -ESRCH;
713716
pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n",
714717
usdt_provider, usdt_name, path, usdt_rel_ip);
715718
goto err_out;
716719
}
717720

718-
usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset);
721+
usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip;
719722
}
720723

721724
pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n",
722725
usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path,
723726
note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args,
724727
seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0);
725728

726-
/* Adjust semaphore address to be a relative offset */
729+
/* Adjust semaphore address to be a file offset */
727730
if (note.sema_addr) {
728731
if (!man->has_sema_refcnt) {
729732
pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n",
@@ -732,7 +735,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
732735
goto err_out;
733736
}
734737

735-
seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */);
738+
seg = find_elf_seg(segs, seg_cnt, note.sema_addr);
736739
if (!seg) {
737740
err = -ESRCH;
738741
pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n",
@@ -747,7 +750,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
747750
goto err_out;
748751
}
749752

750-
usdt_sema_off = note.sema_addr - (seg->start - seg->offset);
753+
usdt_sema_off = note.sema_addr - seg->start + seg->offset;
751754

752755
pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n",
753756
usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ",
@@ -770,7 +773,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
770773
target->rel_ip = usdt_rel_ip;
771774
target->sema_off = usdt_sema_off;
772775

773-
/* notes->args references strings from Elf itself, so they can
776+
/* notes.args references strings from Elf itself, so they can
774777
* be referenced safely until elf_end() call
775778
*/
776779
target->spec_str = note.args;
@@ -788,7 +791,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
788791

789792
err_out:
790793
free(segs);
791-
free(lib_segs);
794+
free(vma_segs);
792795
if (err < 0)
793796
free(targets);
794797
return err;
@@ -1089,8 +1092,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct
10891092
/* Parse out USDT ELF note from '.note.stapsdt' section.
10901093
* Logic inspired by perf's code.
10911094
*/
1092-
static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
1093-
GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
1095+
static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
1096+
const char *data, size_t name_off, size_t desc_off,
10941097
struct usdt_note *note)
10951098
{
10961099
const char *provider, *name, *args;

tools/testing/selftests/bpf/Makefile

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,15 @@ $(OUTPUT)/%:%.c
172172
# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
173173
$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
174174
$(call msg,LIB,,$@)
175-
$(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@
175+
$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \
176+
-fuse-ld=lld -Wl,-znoseparate-code -fPIC -shared -o $@
176177

177178
$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
178179
$(call msg,BINARY,,$@)
179-
$(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
180-
liburandom_read.so $(LDLIBS) \
181-
-Wl,-rpath=. -Wl,--build-id=sha1 -o $@
180+
$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
181+
liburandom_read.so $(LDLIBS) \
182+
-fuse-ld=lld -Wl,-znoseparate-code \
183+
-Wl,-rpath=. -Wl,--build-id=sha1 -o $@
182184

183185
$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
184186
$(call msg,MOD,,$@)
@@ -580,6 +582,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
580582
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
581583
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
582584
feature bpftool \
583-
$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko)
585+
$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \
586+
no_alu32 bpf_gcc bpf_testmod.ko \
587+
liburandom_read.so)
584588

585589
.PHONY: docs docs-clean

0 commit comments

Comments
 (0)