Skip to content

Commit b1d462b

Browse files
committed
Merge branch 'Make uprobe attachment APK aware'
Daniel Müller says: ==================== On Android, APKs (android packages; zip packages with somewhat prescriptive contents) are first class citizens in the system: the shared objects contained in them don't exist in unpacked form on the file system. Rather, they are mmaped directly from within the archive and the archive is also what the kernel is aware of. For users that complicates the process of attaching a uprobe to a function contained in a shared object in one such APK: they'd have to find the byte offset of said function from the beginning of the archive. That is cumbersome to do manually and can be fragile, because various changes could invalidate said offset. That is why for uprobes inside ELF files (not inside an APK), commit d112c9ce249b ("libbpf: Support function name-based attach uprobes") added support for attaching to symbols by name. On Android, that mechanism currently does not work, because this logic is not APK aware. This patch set introduces first class support for attaching uprobes to functions inside ELF objects contained in APKs via function names. We add support for recognizing the following syntax for a binary path: <archive>!/<binary-in-archive> (e.g., /system/app/test-app.apk!/lib/arm64-v8a/libc++.so) This syntax is common in the Android eco system and used by tools such as simpleperf. It is also what is being proposed for bcc [0]. If the user provides such a binary path, we find <binary-in-archive> (lib/arm64-v8a/libc++.so in the example) inside of <archive> (/system/app/test-app.apk). We perform the regular ELF offset search inside the binary and add that to the offset within the archive itself, to retrieve the offset at which to attach the uprobe. [0] iovisor/bcc#4440 Changelog --------- v3->v4: - use ERR_PTR instead of libbpf_err_ptr() in zip_archive_open() - eliminated err variable from elf_find_func_offset_from_archive() v2->v3: - adjusted zip_archive_open() to report errno - fixed provided libbpf_strlcpy() buffer size argument - adjusted find_cd() to handle errors better - use fewer local variables in get_entry_at_offset() v1->v2: - removed unaligned_* types - switched to using __u32 and __u16 - switched to using errno constants instead of hard-coded negative values - added another pr_debug() message - shortened central_directory_* to cd_* - inlined cd_file_header_at_offset() function - bunch of syntactical changes ==================== Signed-off-by: Andrii Nakryiko <[email protected]>
2 parents db52b58 + c44fd84 commit b1d462b

File tree

4 files changed

+495
-28
lines changed

4 files changed

+495
-28
lines changed

tools/lib/bpf/Build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
22
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
33
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
4-
usdt.o
4+
usdt.o zip.o

tools/lib/bpf/libbpf.c

Lines changed: 119 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#include "libbpf_internal.h"
5454
#include "hashmap.h"
5555
#include "bpf_gen_internal.h"
56+
#include "zip.h"
5657

5758
#ifndef BPF_FS_MAGIC
5859
#define BPF_FS_MAGIC 0xcafe4a11
@@ -10530,32 +10531,19 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
1053010531
return NULL;
1053110532
}
1053210533

10533-
/* Find offset of function name in object specified by path. "name" matches
10534-
* symbol name or name@@LIB for library functions.
10534+
/* Find offset of function name in the provided ELF object. "binary_path" is
10535+
* the path to the ELF binary represented by "elf", and only used for error
10536+
* reporting matters. "name" matches symbol name or name@@LIB for library
10537+
* functions.
1053510538
*/
10536-
static long elf_find_func_offset(const char *binary_path, const char *name)
10539+
static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
1053710540
{
10538-
int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
10541+
int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
1053910542
bool is_shared_lib, is_name_qualified;
10540-
char errmsg[STRERR_BUFSIZE];
1054110543
long ret = -ENOENT;
1054210544
size_t name_len;
1054310545
GElf_Ehdr ehdr;
10544-
Elf *elf;
1054510546

10546-
fd = open(binary_path, O_RDONLY | O_CLOEXEC);
10547-
if (fd < 0) {
10548-
ret = -errno;
10549-
pr_warn("failed to open %s: %s\n", binary_path,
10550-
libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
10551-
return ret;
10552-
}
10553-
elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
10554-
if (!elf) {
10555-
pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
10556-
close(fd);
10557-
return -LIBBPF_ERRNO__FORMAT;
10558-
}
1055910547
if (!gelf_getehdr(elf, &ehdr)) {
1056010548
pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
1056110549
ret = -LIBBPF_ERRNO__FORMAT;
@@ -10568,7 +10556,7 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
1056810556
/* Does name specify "@@LIB"? */
1056910557
is_name_qualified = strstr(name, "@@") != NULL;
1057010558

10571-
/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
10559+
/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
1057210560
* a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
1057310561
* linked binary may not have SHT_DYMSYM, so absence of a section should not be
1057410562
* reported as a warning/error.
@@ -10681,11 +10669,101 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
1068110669
}
1068210670
}
1068310671
out:
10672+
return ret;
10673+
}
10674+
10675+
/* Find offset of function name in ELF object specified by path. "name" matches
10676+
* symbol name or name@@LIB for library functions.
10677+
*/
10678+
static long elf_find_func_offset_from_file(const char *binary_path, const char *name)
10679+
{
10680+
char errmsg[STRERR_BUFSIZE];
10681+
long ret = -ENOENT;
10682+
Elf *elf;
10683+
int fd;
10684+
10685+
fd = open(binary_path, O_RDONLY | O_CLOEXEC);
10686+
if (fd < 0) {
10687+
ret = -errno;
10688+
pr_warn("failed to open %s: %s\n", binary_path,
10689+
libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
10690+
return ret;
10691+
}
10692+
elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
10693+
if (!elf) {
10694+
pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
10695+
close(fd);
10696+
return -LIBBPF_ERRNO__FORMAT;
10697+
}
10698+
10699+
ret = elf_find_func_offset(elf, binary_path, name);
1068410700
elf_end(elf);
1068510701
close(fd);
1068610702
return ret;
1068710703
}
1068810704

10705+
/* Find offset of function name in archive specified by path. Currently
10706+
* supported are .zip files that do not compress their contents, as used on
10707+
* Android in the form of APKs, for example. "file_name" is the name of the ELF
10708+
* file inside the archive. "func_name" matches symbol name or name@@LIB for
10709+
* library functions.
10710+
*
10711+
* An overview of the APK format specifically provided here:
10712+
* https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
10713+
*/
10714+
static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
10715+
const char *func_name)
10716+
{
10717+
struct zip_archive *archive;
10718+
struct zip_entry entry;
10719+
long ret;
10720+
Elf *elf;
10721+
10722+
archive = zip_archive_open(archive_path);
10723+
if (IS_ERR(archive)) {
10724+
ret = PTR_ERR(archive);
10725+
pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
10726+
return ret;
10727+
}
10728+
10729+
ret = zip_archive_find_entry(archive, file_name, &entry);
10730+
if (ret) {
10731+
pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
10732+
archive_path, ret);
10733+
goto out;
10734+
}
10735+
pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
10736+
(unsigned long)entry.data_offset);
10737+
10738+
if (entry.compression) {
10739+
pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
10740+
archive_path);
10741+
ret = -LIBBPF_ERRNO__FORMAT;
10742+
goto out;
10743+
}
10744+
10745+
elf = elf_memory((void *)entry.data, entry.data_length);
10746+
if (!elf) {
10747+
pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
10748+
elf_errmsg(-1));
10749+
ret = -LIBBPF_ERRNO__LIBELF;
10750+
goto out;
10751+
}
10752+
10753+
ret = elf_find_func_offset(elf, file_name, func_name);
10754+
if (ret > 0) {
10755+
pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
10756+
func_name, file_name, archive_path, entry.data_offset, ret,
10757+
ret + entry.data_offset);
10758+
ret += entry.data_offset;
10759+
}
10760+
elf_end(elf);
10761+
10762+
out:
10763+
zip_archive_close(archive);
10764+
return ret;
10765+
}
10766+
1068910767
static const char *arch_specific_lib_paths(void)
1069010768
{
1069110769
/*
@@ -10771,9 +10849,10 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
1077110849
const char *binary_path, size_t func_offset,
1077210850
const struct bpf_uprobe_opts *opts)
1077310851
{
10774-
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10852+
const char *archive_path = NULL, *archive_sep = NULL;
1077510853
char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
10776-
char full_binary_path[PATH_MAX];
10854+
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10855+
char full_path[PATH_MAX];
1077710856
struct bpf_link *link;
1077810857
size_t ref_ctr_off;
1077910858
int pfd, err;
@@ -10790,21 +10869,34 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
1079010869
if (!binary_path)
1079110870
return libbpf_err_ptr(-EINVAL);
1079210871

10793-
if (!strchr(binary_path, '/')) {
10794-
err = resolve_full_path(binary_path, full_binary_path,
10795-
sizeof(full_binary_path));
10872+
/* Check if "binary_path" refers to an archive. */
10873+
archive_sep = strstr(binary_path, "!/");
10874+
if (archive_sep) {
10875+
full_path[0] = '\0';
10876+
libbpf_strlcpy(full_path, binary_path,
10877+
min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
10878+
archive_path = full_path;
10879+
binary_path = archive_sep + 2;
10880+
} else if (!strchr(binary_path, '/')) {
10881+
err = resolve_full_path(binary_path, full_path, sizeof(full_path));
1079610882
if (err) {
1079710883
pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
1079810884
prog->name, binary_path, err);
1079910885
return libbpf_err_ptr(err);
1080010886
}
10801-
binary_path = full_binary_path;
10887+
binary_path = full_path;
1080210888
}
1080310889
func_name = OPTS_GET(opts, func_name, NULL);
1080410890
if (func_name) {
1080510891
long sym_off;
1080610892

10807-
sym_off = elf_find_func_offset(binary_path, func_name);
10893+
if (archive_path) {
10894+
sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
10895+
func_name);
10896+
binary_path = archive_path;
10897+
} else {
10898+
sym_off = elf_find_func_offset_from_file(binary_path, func_name);
10899+
}
1080810900
if (sym_off < 0)
1080910901
return libbpf_err_ptr(sym_off);
1081010902
func_offset += sym_off;

0 commit comments

Comments
 (0)