Skip to content

Commit e87c6bc

Browse files
yonghong-songdavem330
authored andcommitted
bpf: permit multiple bpf attachments for a single perf event
This patch enables multiple bpf attachments for a kprobe/uprobe/tracepoint single trace event. Each trace_event keeps a list of attached perf events. When an event happens, all attached bpf programs will be executed based on the order of attachment. A global bpf_event_mutex lock is introduced to protect prog_array attaching and detaching. An alternative will be introduce a mutex lock in every trace_event_call structure, but it takes a lot of extra memory. So a global bpf_event_mutex lock is a good compromise. The bpf prog detachment involves allocation of memory. If the allocation fails, a dummy do-nothing program will replace to-be-detached program in-place. Signed-off-by: Yonghong Song <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 0b4c684 commit e87c6bc

File tree

9 files changed

+255
-56
lines changed

9 files changed

+255
-56
lines changed

include/linux/bpf.h

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
273273
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
274274
__u32 __user *prog_ids, u32 cnt);
275275

276-
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
276+
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
277+
struct bpf_prog *old_prog);
278+
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
279+
struct bpf_prog *exclude_prog,
280+
struct bpf_prog *include_prog,
281+
struct bpf_prog_array **new_array);
282+
283+
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
277284
({ \
278-
struct bpf_prog **_prog; \
285+
struct bpf_prog **_prog, *__prog; \
286+
struct bpf_prog_array *_array; \
279287
u32 _ret = 1; \
280288
rcu_read_lock(); \
281-
_prog = rcu_dereference(array)->progs; \
282-
for (; *_prog; _prog++) \
283-
_ret &= func(*_prog, ctx); \
289+
_array = rcu_dereference(array); \
290+
if (unlikely(check_non_null && !_array))\
291+
goto _out; \
292+
_prog = _array->progs; \
293+
while ((__prog = READ_ONCE(*_prog))) { \
294+
_ret &= func(__prog, ctx); \
295+
_prog++; \
296+
} \
297+
_out: \
284298
rcu_read_unlock(); \
285299
_ret; \
286300
})
287301

302+
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
303+
__BPF_PROG_RUN_ARRAY(array, ctx, func, false)
304+
305+
#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
306+
__BPF_PROG_RUN_ARRAY(array, ctx, func, true)
307+
288308
#ifdef CONFIG_BPF_SYSCALL
289309
DECLARE_PER_CPU(int, bpf_prog_active);
290310

include/linux/trace_events.h

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,14 +271,37 @@ struct trace_event_call {
271271
#ifdef CONFIG_PERF_EVENTS
272272
int perf_refcount;
273273
struct hlist_head __percpu *perf_events;
274-
struct bpf_prog *prog;
275-
struct perf_event *bpf_prog_owner;
274+
struct bpf_prog_array __rcu *prog_array;
276275

277276
int (*perf_perm)(struct trace_event_call *,
278277
struct perf_event *);
279278
#endif
280279
};
281280

281+
#ifdef CONFIG_PERF_EVENTS
282+
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
283+
{
284+
/*
285+
* This inline function checks whether call->prog_array
286+
* is valid or not. The function is called in various places,
287+
* outside rcu_read_lock/unlock, as a heuristic to speed up execution.
288+
*
289+
* If this function returns true, and later call->prog_array
290+
* becomes false inside rcu_read_lock/unlock region,
291+
* we bail out then. If this function return false,
292+
* there is a risk that we might miss a few events if the checking
293+
* were delayed until inside rcu_read_lock/unlock region and
294+
* call->prog_array happened to become non-NULL then.
295+
*
296+
* Here, READ_ONCE() is used instead of rcu_access_pointer().
297+
* rcu_access_pointer() requires the actual definition of
298+
* "struct bpf_prog_array" while READ_ONCE() only needs
299+
* a declaration of the same type.
300+
*/
301+
return !!READ_ONCE(call->prog_array);
302+
}
303+
#endif
304+
282305
static inline const char *
283306
trace_event_name(struct trace_event_call *call)
284307
{
@@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
435458
}
436459

437460
#ifdef CONFIG_BPF_EVENTS
438-
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
461+
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
462+
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
463+
void perf_event_detach_bpf_prog(struct perf_event *event);
439464
#else
440-
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
465+
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
441466
{
442467
return 1;
443468
}
469+
470+
static inline int
471+
perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
472+
{
473+
return -EOPNOTSUPP;
474+
}
475+
476+
static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }
477+
444478
#endif
445479

446480
enum {
@@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
511545
{
512546
perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
513547
}
548+
514549
#endif
515550

516551
#endif /* _LINUX_TRACE_EVENT_H */

include/trace/perf.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto) \
3434
struct trace_event_call *event_call = __data; \
3535
struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
3636
struct trace_event_raw_##call *entry; \
37-
struct bpf_prog *prog = event_call->prog; \
3837
struct pt_regs *__regs; \
3938
u64 __count = 1; \
4039
struct task_struct *__task = NULL; \
@@ -46,8 +45,9 @@ perf_trace_##call(void *__data, proto) \
4645
__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
4746
\
4847
head = this_cpu_ptr(event_call->perf_events); \
49-
if (!prog && __builtin_constant_p(!__task) && !__task && \
50-
hlist_empty(head)) \
48+
if (!bpf_prog_array_valid(event_call) && \
49+
__builtin_constant_p(!__task) && !__task && \
50+
hlist_empty(head)) \
5151
return; \
5252
\
5353
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\

kernel/bpf/core.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,6 +1394,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
13941394
}
13951395
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
13961396

1397+
static unsigned int __bpf_prog_ret1(const void *ctx,
1398+
const struct bpf_insn *insn)
1399+
{
1400+
return 1;
1401+
}
1402+
1403+
static struct bpf_prog_dummy {
1404+
struct bpf_prog prog;
1405+
} dummy_bpf_prog = {
1406+
.prog = {
1407+
.bpf_func = __bpf_prog_ret1,
1408+
},
1409+
};
1410+
13971411
/* to avoid allocating empty bpf_prog_array for cgroups that
13981412
* don't have bpf program attached use one global 'empty_prog_array'
13991413
* It will not be modified the caller of bpf_prog_array_alloc()
@@ -1463,6 +1477,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
14631477
return 0;
14641478
}
14651479

1480+
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
1481+
struct bpf_prog *old_prog)
1482+
{
1483+
struct bpf_prog **prog = progs->progs;
1484+
1485+
for (; *prog; prog++)
1486+
if (*prog == old_prog) {
1487+
WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
1488+
break;
1489+
}
1490+
}
1491+
1492+
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
1493+
struct bpf_prog *exclude_prog,
1494+
struct bpf_prog *include_prog,
1495+
struct bpf_prog_array **new_array)
1496+
{
1497+
int new_prog_cnt, carry_prog_cnt = 0;
1498+
struct bpf_prog **existing_prog;
1499+
struct bpf_prog_array *array;
1500+
int new_prog_idx = 0;
1501+
1502+
/* Figure out how many existing progs we need to carry over to
1503+
* the new array.
1504+
*/
1505+
if (old_array) {
1506+
existing_prog = old_array->progs;
1507+
for (; *existing_prog; existing_prog++) {
1508+
if (*existing_prog != exclude_prog &&
1509+
*existing_prog != &dummy_bpf_prog.prog)
1510+
carry_prog_cnt++;
1511+
if (*existing_prog == include_prog)
1512+
return -EEXIST;
1513+
}
1514+
}
1515+
1516+
/* How many progs (not NULL) will be in the new array? */
1517+
new_prog_cnt = carry_prog_cnt;
1518+
if (include_prog)
1519+
new_prog_cnt += 1;
1520+
1521+
/* Do we have any prog (not NULL) in the new array? */
1522+
if (!new_prog_cnt) {
1523+
*new_array = NULL;
1524+
return 0;
1525+
}
1526+
1527+
/* +1 as the end of prog_array is marked with NULL */
1528+
array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
1529+
if (!array)
1530+
return -ENOMEM;
1531+
1532+
/* Fill in the new prog array */
1533+
if (carry_prog_cnt) {
1534+
existing_prog = old_array->progs;
1535+
for (; *existing_prog; existing_prog++)
1536+
if (*existing_prog != exclude_prog &&
1537+
*existing_prog != &dummy_bpf_prog.prog)
1538+
array->progs[new_prog_idx++] = *existing_prog;
1539+
}
1540+
if (include_prog)
1541+
array->progs[new_prog_idx++] = include_prog;
1542+
array->progs[new_prog_idx] = NULL;
1543+
*new_array = array;
1544+
return 0;
1545+
}
1546+
14661547
static void bpf_prog_free_deferred(struct work_struct *work)
14671548
{
14681549
struct bpf_prog_aux *aux;

kernel/events/core.c

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7954,11 +7954,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
79547954
struct pt_regs *regs, struct hlist_head *head,
79557955
struct task_struct *task)
79567956
{
7957-
struct bpf_prog *prog = call->prog;
7958-
7959-
if (prog) {
7957+
if (bpf_prog_array_valid(call)) {
79607958
*(struct pt_regs **)raw_data = regs;
7961-
if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
7959+
if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
79627960
perf_swevent_put_recursion_context(rctx);
79637961
return;
79647962
}
@@ -8147,13 +8145,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
81478145
{
81488146
bool is_kprobe, is_tracepoint, is_syscall_tp;
81498147
struct bpf_prog *prog;
8148+
int ret;
81508149

81518150
if (event->attr.type != PERF_TYPE_TRACEPOINT)
81528151
return perf_event_set_bpf_handler(event, prog_fd);
81538152

8154-
if (event->tp_event->prog)
8155-
return -EEXIST;
8156-
81578153
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
81588154
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
81598155
is_syscall_tp = is_syscall_trace_event(event->tp_event);
@@ -8181,26 +8177,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
81818177
return -EACCES;
81828178
}
81838179
}
8184-
event->tp_event->prog = prog;
8185-
event->tp_event->bpf_prog_owner = event;
81868180

8187-
return 0;
8181+
ret = perf_event_attach_bpf_prog(event, prog);
8182+
if (ret)
8183+
bpf_prog_put(prog);
8184+
return ret;
81888185
}
81898186

81908187
static void perf_event_free_bpf_prog(struct perf_event *event)
81918188
{
8192-
struct bpf_prog *prog;
8193-
81948189
if (event->attr.type != PERF_TYPE_TRACEPOINT) {
81958190
perf_event_free_bpf_handler(event);
81968191
return;
81978192
}
8198-
8199-
prog = event->tp_event->prog;
8200-
if (prog && event->tp_event->bpf_prog_owner == event) {
8201-
event->tp_event->prog = NULL;
8202-
bpf_prog_put(prog);
8203-
}
8193+
perf_event_detach_bpf_prog(event);
82048194
}
82058195

82068196
#else

0 commit comments

Comments
 (0)