Skip to content

Commit f80acbd

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-task-fd-query'
Yonghong Song says: ==================== Currently, suppose a userspace application has loaded a bpf program and attached it to a tracepoint/kprobe/uprobe, and a bpf introspection tool, e.g., bpftool, wants to show which bpf program is attached to which tracepoint/kprobe/uprobe. Such attachment information will be really useful to understand the overall bpf deployment in the system. There is a name field (16 bytes) for each program, which could be used to encode the attachment point. There are some drawbacks for this approaches. First, bpftool user (e.g., an admin) may not really understand the association between the name and the attachment point. Second, if one program is attached to multiple places, encoding a proper name which can imply all these attachments becomes difficult. This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY. Given a pid and fd, this command will return bpf related information to user space. Right now it only supports tracepoint/kprobe/uprobe perf event fd's. For such a fd, BPF_TASK_FD_QUERY will return . prog_id . tracepoint name, or . k[ret]probe funcname + offset or kernel addr, or . u[ret]probe filename + offset to the userspace. The user can use "bpftool prog" to find more information about bpf program itself with prog_id. Patch #1 adds function perf_get_event() in kernel/events/core.c. Patch #2 implements the bpf subcommand BPF_TASK_FD_QUERY. Patch #3 syncs tools bpf.h header and also add bpf_task_fd_query() in the libbpf library for samples/selftests/bpftool to use. Patch #4 adds ksym_get_addr() utility function. Patch #5 add a test in samples/bpf for querying k[ret]probes and u[ret]probes. Patch #6 add a test in tools/testing/selftests/bpf for querying raw_tracepoint and tracepoint. Patch #7 add a new subcommand "perf" to bpftool. Changelogs: v4 -> v5: . return strlen(buf) instead of strlen(buf) + 1 in the attr.buf_len. As long as user provides non-empty buffer, it will be filed with empty string, truncated string, or full string based on the buffer size and the length of to-be-copied string. v3 -> v4: . made attr buf_len input/output. The length of actual buffter is written to buf_len so user space knows what is actually needed. If user provides a buffer with length >= 1 but less than required, do partial copy and return -ENOSPC. . code simplification with put_user. . changed query result attach_info to fd_type. . add tests at selftests/bpf to test zero len, null buf and insufficient buf. v2 -> v3: . made perf_get_event() return perf_event pointer const. this was to ensure that event fields are not meddled. . detect whether newly BPF_TASK_FD_QUERY is supported or not in "bpftool perf" and warn users if it is not. v1 -> v2: . changed bpf subcommand name from BPF_PERF_EVENT_QUERY to BPF_TASK_FD_QUERY. . fixed various "bpftool perf" issues and added documentation and auto-completion. ==================== Acked-by: Daniel Borkmann <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 31ad392 + b04df40 commit f80acbd

File tree

23 files changed

+1257
-2
lines changed

23 files changed

+1257
-2
lines changed

include/linux/perf_event.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
868868
extern void perf_event_free_task(struct task_struct *task);
869869
extern void perf_event_delayed_put(struct task_struct *task);
870870
extern struct file *perf_event_get(unsigned int fd);
871+
extern const struct perf_event *perf_get_event(struct file *file);
871872
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
872873
extern void perf_event_print_debug(void);
873874
extern void perf_pmu_disable(struct pmu *pmu);
@@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { }
12891290
static inline void perf_event_free_task(struct task_struct *task) { }
12901291
static inline void perf_event_delayed_put(struct task_struct *task) { }
12911292
static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
1293+
static inline const struct perf_event *perf_get_event(struct file *file)
1294+
{
1295+
return ERR_PTR(-EINVAL);
1296+
}
12921297
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
12931298
{
12941299
return ERR_PTR(-EINVAL);

include/linux/trace_events.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
473473
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
474474
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
475475
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
476+
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
477+
u32 *fd_type, const char **buf,
478+
u64 *probe_offset, u64 *probe_addr);
476479
#else
477480
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
478481
{
@@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
504507
{
505508
return NULL;
506509
}
510+
static inline int bpf_get_perf_event_info(const struct perf_event *event,
511+
u32 *prog_id, u32 *fd_type,
512+
const char **buf, u64 *probe_offset,
513+
u64 *probe_addr)
514+
{
515+
return -EOPNOTSUPP;
516+
}
507517
#endif
508518

509519
enum {
@@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
560570
#ifdef CONFIG_KPROBE_EVENTS
561571
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
562572
extern void perf_kprobe_destroy(struct perf_event *event);
573+
extern int bpf_get_kprobe_info(const struct perf_event *event,
574+
u32 *fd_type, const char **symbol,
575+
u64 *probe_offset, u64 *probe_addr,
576+
bool perf_type_tracepoint);
563577
#endif
564578
#ifdef CONFIG_UPROBE_EVENTS
565579
extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
566580
extern void perf_uprobe_destroy(struct perf_event *event);
581+
extern int bpf_get_uprobe_info(const struct perf_event *event,
582+
u32 *fd_type, const char **filename,
583+
u64 *probe_offset, bool perf_type_tracepoint);
567584
#endif
568585
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
569586
char *filter_str);

include/uapi/linux/bpf.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ enum bpf_cmd {
9797
BPF_RAW_TRACEPOINT_OPEN,
9898
BPF_BTF_LOAD,
9999
BPF_BTF_GET_FD_BY_ID,
100+
BPF_TASK_FD_QUERY,
100101
};
101102

102103
enum bpf_map_type {
@@ -380,6 +381,22 @@ union bpf_attr {
380381
__u32 btf_log_size;
381382
__u32 btf_log_level;
382383
};
384+
385+
struct {
386+
__u32 pid; /* input: pid */
387+
__u32 fd; /* input: fd */
388+
__u32 flags; /* input: flags */
389+
__u32 buf_len; /* input/output: buf len */
390+
__aligned_u64 buf; /* input/output:
391+
* tp_name for tracepoint
392+
* symbol for kprobe
393+
* filename for uprobe
394+
*/
395+
__u32 prog_id; /* output: prod_id */
396+
__u32 fd_type; /* output: BPF_FD_TYPE_* */
397+
__u64 probe_offset; /* output: probe_offset */
398+
__u64 probe_addr; /* output: probe_addr */
399+
} task_fd_query;
383400
} __attribute__((aligned(8)));
384401

385402
/* The description below is an attempt at providing documentation to eBPF
@@ -2557,4 +2574,13 @@ struct bpf_fib_lookup {
25572574
__u8 dmac[6]; /* ETH_ALEN */
25582575
};
25592576

2577+
enum bpf_task_fd_type {
2578+
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
2579+
BPF_FD_TYPE_TRACEPOINT, /* tp name */
2580+
BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
2581+
BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
2582+
BPF_FD_TYPE_UPROBE, /* filename + offset */
2583+
BPF_FD_TYPE_URETPROBE, /* filename + offset */
2584+
};
2585+
25602586
#endif /* _UAPI__LINUX_BPF_H__ */

kernel/bpf/syscall.c

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
#include <linux/vmalloc.h>
1919
#include <linux/mmzone.h>
2020
#include <linux/anon_inodes.h>
21+
#include <linux/fdtable.h>
2122
#include <linux/file.h>
23+
#include <linux/fs.h>
2224
#include <linux/license.h>
2325
#include <linux/filter.h>
2426
#include <linux/version.h>
@@ -2178,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
21782180
return btf_get_fd_by_id(attr->btf_id);
21792181
}
21802182

2183+
static int bpf_task_fd_query_copy(const union bpf_attr *attr,
2184+
union bpf_attr __user *uattr,
2185+
u32 prog_id, u32 fd_type,
2186+
const char *buf, u64 probe_offset,
2187+
u64 probe_addr)
2188+
{
2189+
char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
2190+
u32 len = buf ? strlen(buf) : 0, input_len;
2191+
int err = 0;
2192+
2193+
if (put_user(len, &uattr->task_fd_query.buf_len))
2194+
return -EFAULT;
2195+
input_len = attr->task_fd_query.buf_len;
2196+
if (input_len && ubuf) {
2197+
if (!len) {
2198+
/* nothing to copy, just make ubuf NULL terminated */
2199+
char zero = '\0';
2200+
2201+
if (put_user(zero, ubuf))
2202+
return -EFAULT;
2203+
} else if (input_len >= len + 1) {
2204+
/* ubuf can hold the string with NULL terminator */
2205+
if (copy_to_user(ubuf, buf, len + 1))
2206+
return -EFAULT;
2207+
} else {
2208+
/* ubuf cannot hold the string with NULL terminator,
2209+
* do a partial copy with NULL terminator.
2210+
*/
2211+
char zero = '\0';
2212+
2213+
err = -ENOSPC;
2214+
if (copy_to_user(ubuf, buf, input_len - 1))
2215+
return -EFAULT;
2216+
if (put_user(zero, ubuf + input_len - 1))
2217+
return -EFAULT;
2218+
}
2219+
}
2220+
2221+
if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
2222+
put_user(fd_type, &uattr->task_fd_query.fd_type) ||
2223+
put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
2224+
put_user(probe_addr, &uattr->task_fd_query.probe_addr))
2225+
return -EFAULT;
2226+
2227+
return err;
2228+
}
2229+
2230+
#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
2231+
2232+
static int bpf_task_fd_query(const union bpf_attr *attr,
2233+
union bpf_attr __user *uattr)
2234+
{
2235+
pid_t pid = attr->task_fd_query.pid;
2236+
u32 fd = attr->task_fd_query.fd;
2237+
const struct perf_event *event;
2238+
struct files_struct *files;
2239+
struct task_struct *task;
2240+
struct file *file;
2241+
int err;
2242+
2243+
if (CHECK_ATTR(BPF_TASK_FD_QUERY))
2244+
return -EINVAL;
2245+
2246+
if (!capable(CAP_SYS_ADMIN))
2247+
return -EPERM;
2248+
2249+
if (attr->task_fd_query.flags != 0)
2250+
return -EINVAL;
2251+
2252+
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
2253+
if (!task)
2254+
return -ENOENT;
2255+
2256+
files = get_files_struct(task);
2257+
put_task_struct(task);
2258+
if (!files)
2259+
return -ENOENT;
2260+
2261+
err = 0;
2262+
spin_lock(&files->file_lock);
2263+
file = fcheck_files(files, fd);
2264+
if (!file)
2265+
err = -EBADF;
2266+
else
2267+
get_file(file);
2268+
spin_unlock(&files->file_lock);
2269+
put_files_struct(files);
2270+
2271+
if (err)
2272+
goto out;
2273+
2274+
if (file->f_op == &bpf_raw_tp_fops) {
2275+
struct bpf_raw_tracepoint *raw_tp = file->private_data;
2276+
struct bpf_raw_event_map *btp = raw_tp->btp;
2277+
2278+
err = bpf_task_fd_query_copy(attr, uattr,
2279+
raw_tp->prog->aux->id,
2280+
BPF_FD_TYPE_RAW_TRACEPOINT,
2281+
btp->tp->name, 0, 0);
2282+
goto put_file;
2283+
}
2284+
2285+
event = perf_get_event(file);
2286+
if (!IS_ERR(event)) {
2287+
u64 probe_offset, probe_addr;
2288+
u32 prog_id, fd_type;
2289+
const char *buf;
2290+
2291+
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
2292+
&buf, &probe_offset,
2293+
&probe_addr);
2294+
if (!err)
2295+
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
2296+
fd_type, buf,
2297+
probe_offset,
2298+
probe_addr);
2299+
goto put_file;
2300+
}
2301+
2302+
err = -ENOTSUPP;
2303+
put_file:
2304+
fput(file);
2305+
out:
2306+
return err;
2307+
}
2308+
21812309
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
21822310
{
21832311
union bpf_attr attr = {};
@@ -2264,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
22642392
case BPF_BTF_GET_FD_BY_ID:
22652393
err = bpf_btf_get_fd_by_id(&attr);
22662394
break;
2395+
case BPF_TASK_FD_QUERY:
2396+
err = bpf_task_fd_query(&attr, uattr);
2397+
break;
22672398
default:
22682399
err = -EINVAL;
22692400
break;

kernel/events/core.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
1121211212
return file;
1121311213
}
1121411214

11215+
const struct perf_event *perf_get_event(struct file *file)
11216+
{
11217+
if (file->f_op != &perf_fops)
11218+
return ERR_PTR(-EINVAL);
11219+
11220+
return file->private_data;
11221+
}
11222+
1121511223
const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
1121611224
{
1121711225
if (!event)

kernel/trace/bpf_trace.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/uaccess.h>
1515
#include <linux/ctype.h>
1616
#include <linux/kprobes.h>
17+
#include <linux/syscalls.h>
1718
#include <linux/error-injection.h>
1819

1920
#include "trace_probe.h"
@@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
11631164
mutex_unlock(&bpf_event_mutex);
11641165
return err;
11651166
}
1167+
1168+
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
1169+
u32 *fd_type, const char **buf,
1170+
u64 *probe_offset, u64 *probe_addr)
1171+
{
1172+
bool is_tracepoint, is_syscall_tp;
1173+
struct bpf_prog *prog;
1174+
int flags, err = 0;
1175+
1176+
prog = event->prog;
1177+
if (!prog)
1178+
return -ENOENT;
1179+
1180+
/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
1181+
if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
1182+
return -EOPNOTSUPP;
1183+
1184+
*prog_id = prog->aux->id;
1185+
flags = event->tp_event->flags;
1186+
is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
1187+
is_syscall_tp = is_syscall_trace_event(event->tp_event);
1188+
1189+
if (is_tracepoint || is_syscall_tp) {
1190+
*buf = is_tracepoint ? event->tp_event->tp->name
1191+
: event->tp_event->name;
1192+
*fd_type = BPF_FD_TYPE_TRACEPOINT;
1193+
*probe_offset = 0x0;
1194+
*probe_addr = 0x0;
1195+
} else {
1196+
/* kprobe/uprobe */
1197+
err = -EOPNOTSUPP;
1198+
#ifdef CONFIG_KPROBE_EVENTS
1199+
if (flags & TRACE_EVENT_FL_KPROBE)
1200+
err = bpf_get_kprobe_info(event, fd_type, buf,
1201+
probe_offset, probe_addr,
1202+
event->attr.type == PERF_TYPE_TRACEPOINT);
1203+
#endif
1204+
#ifdef CONFIG_UPROBE_EVENTS
1205+
if (flags & TRACE_EVENT_FL_UPROBE)
1206+
err = bpf_get_uprobe_info(event, fd_type, buf,
1207+
probe_offset,
1208+
event->attr.type == PERF_TYPE_TRACEPOINT);
1209+
#endif
1210+
}
1211+
1212+
return err;
1213+
}

kernel/trace/trace_kprobe.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
12871287
head, NULL);
12881288
}
12891289
NOKPROBE_SYMBOL(kretprobe_perf_func);
1290+
1291+
int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
1292+
const char **symbol, u64 *probe_offset,
1293+
u64 *probe_addr, bool perf_type_tracepoint)
1294+
{
1295+
const char *pevent = trace_event_name(event->tp_event);
1296+
const char *group = event->tp_event->class->system;
1297+
struct trace_kprobe *tk;
1298+
1299+
if (perf_type_tracepoint)
1300+
tk = find_trace_kprobe(pevent, group);
1301+
else
1302+
tk = event->tp_event->data;
1303+
if (!tk)
1304+
return -EINVAL;
1305+
1306+
*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
1307+
: BPF_FD_TYPE_KPROBE;
1308+
if (tk->symbol) {
1309+
*symbol = tk->symbol;
1310+
*probe_offset = tk->rp.kp.offset;
1311+
*probe_addr = 0;
1312+
} else {
1313+
*symbol = NULL;
1314+
*probe_offset = 0;
1315+
*probe_addr = (unsigned long)tk->rp.kp.addr;
1316+
}
1317+
return 0;
1318+
}
12901319
#endif /* CONFIG_PERF_EVENTS */
12911320

12921321
/*

kernel/trace/trace_uprobe.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
11611161
{
11621162
__uprobe_perf_func(tu, func, regs, ucb, dsize);
11631163
}
1164+
1165+
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
1166+
const char **filename, u64 *probe_offset,
1167+
bool perf_type_tracepoint)
1168+
{
1169+
const char *pevent = trace_event_name(event->tp_event);
1170+
const char *group = event->tp_event->class->system;
1171+
struct trace_uprobe *tu;
1172+
1173+
if (perf_type_tracepoint)
1174+
tu = find_probe_event(pevent, group);
1175+
else
1176+
tu = event->tp_event->data;
1177+
if (!tu)
1178+
return -EINVAL;
1179+
1180+
*fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
1181+
: BPF_FD_TYPE_UPROBE;
1182+
*filename = tu->filename;
1183+
*probe_offset = tu->offset;
1184+
return 0;
1185+
}
11641186
#endif /* CONFIG_PERF_EVENTS */
11651187

11661188
static int

0 commit comments

Comments
 (0)