Skip to content

Commit 3135a91

Browse files
yonghong-songSomasundaram Krishnasamy
authored andcommitted
bpf: permit multiple bpf attachments for a single perf event
This patch enables multiple bpf attachments for a kprobe/uprobe/tracepoint single trace event. Each trace_event keeps a list of attached perf events. When an event happens, all attached bpf programs will be executed based on the order of attachment. A global bpf_event_mutex lock is introduced to protect prog_array attaching and detaching. An alternative will be introduce a mutex lock in every trace_event_call structure, but it takes a lot of extra memory. So a global bpf_event_mutex lock is a good compromise. The bpf prog detachment involves allocation of memory. If the allocation fails, a dummy do-nothing program will replace to-be-detached program in-place. Signed-off-by: Yonghong Song <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Signed-off-by: David S. Miller <[email protected]> (cherry picked from commit e87c6bc) Orabug: 31667601 Signed-off-by: Alan Maguire <[email protected]> Reviewed-by: Mark Haywood <[email protected]> Conflicts: include/trace/perf.h KABI-related fixes to replace/deprecate fields. Signed-off-by: Somasundaram Krishnasamy <[email protected]>
1 parent e1cae2d commit 3135a91

File tree

9 files changed

+257
-56
lines changed

9 files changed

+257
-56
lines changed

include/linux/bpf.h

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -279,18 +279,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
279279
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
280280
__u32 __user *prog_ids, u32 cnt);
281281

282-
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
282+
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
283+
struct bpf_prog *old_prog);
284+
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
285+
struct bpf_prog *exclude_prog,
286+
struct bpf_prog *include_prog,
287+
struct bpf_prog_array **new_array);
288+
289+
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
283290
({ \
284-
struct bpf_prog **_prog; \
291+
struct bpf_prog **_prog, *__prog; \
292+
struct bpf_prog_array *_array; \
285293
u32 _ret = 1; \
286294
rcu_read_lock(); \
287-
_prog = rcu_dereference(array)->progs; \
288-
for (; *_prog; _prog++) \
289-
_ret &= func(*_prog, ctx); \
295+
_array = rcu_dereference(array); \
296+
if (unlikely(check_non_null && !_array))\
297+
goto _out; \
298+
_prog = _array->progs; \
299+
while ((__prog = READ_ONCE(*_prog))) { \
300+
_ret &= func(__prog, ctx); \
301+
_prog++; \
302+
} \
303+
_out: \
290304
rcu_read_unlock(); \
291305
_ret; \
292306
})
293307

308+
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
309+
__BPF_PROG_RUN_ARRAY(array, ctx, func, false)
310+
311+
#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
312+
__BPF_PROG_RUN_ARRAY(array, ctx, func, true)
313+
294314
#ifdef CONFIG_BPF_SYSCALL
295315
DECLARE_PER_CPU(int, bpf_prog_active);
296316

include/linux/trace_events.h

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -272,14 +272,39 @@ struct trace_event_call {
272272
#ifdef CONFIG_PERF_EVENTS
273273
int perf_refcount;
274274
struct hlist_head __percpu *perf_events;
275-
struct bpf_prog *prog;
276-
struct perf_event *bpf_prog_owner;
275+
UEK_KABI_REPLACE(struct bpf_prog *prog,
276+
struct bpf_prog_array __rcu *prog_array)
277+
UEK_KABI_DEPRECATE(struct perf_event *, bpf_prog_owner)
277278

278279
int (*perf_perm)(struct trace_event_call *,
279280
struct perf_event *);
280281
#endif
281282
};
282283

284+
#ifdef CONFIG_PERF_EVENTS
285+
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
286+
{
287+
/*
288+
* This inline function checks whether call->prog_array
289+
* is valid or not. The function is called in various places,
290+
* outside rcu_read_lock/unlock, as a heuristic to speed up execution.
291+
*
292+
* If this function returns true, and later call->prog_array
293+
* becomes false inside rcu_read_lock/unlock region,
294+
* we bail out then. If this function return false,
295+
* there is a risk that we might miss a few events if the checking
296+
* were delayed until inside rcu_read_lock/unlock region and
297+
* call->prog_array happened to become non-NULL then.
298+
*
299+
* Here, READ_ONCE() is used instead of rcu_access_pointer().
300+
* rcu_access_pointer() requires the actual definition of
301+
* "struct bpf_prog_array" while READ_ONCE() only needs
302+
* a declaration of the same type.
303+
*/
304+
return !!READ_ONCE(call->prog_array);
305+
}
306+
#endif
307+
283308
static inline const char *
284309
trace_event_name(struct trace_event_call *call)
285310
{
@@ -436,12 +461,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
436461
}
437462

438463
#ifdef CONFIG_BPF_EVENTS
439-
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
464+
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
465+
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
466+
void perf_event_detach_bpf_prog(struct perf_event *event);
440467
#else
441-
static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
468+
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
442469
{
443470
return 1;
444471
}
472+
473+
static inline int
474+
perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
475+
{
476+
return -EOPNOTSUPP;
477+
}
478+
479+
static inline void perf_event_detach_bpf_prog(struct perf_event *event) { }
480+
445481
#endif
446482

447483
enum {
@@ -512,6 +548,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
512548
{
513549
perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
514550
}
551+
515552
#endif
516553

517554
#endif /* _LINUX_TRACE_EVENT_H */

include/trace/perf.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ perf_trace_##call(void *__data, proto) \
3535
struct trace_event_call *event_call = __data; \
3636
struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
3737
struct trace_event_raw_##call *entry; \
38-
struct bpf_prog *prog = event_call->prog; \
3938
struct pt_regs *__regs; \
4039
u64 __count = 1; \
4140
struct task_struct *__task = NULL; \
@@ -47,8 +46,9 @@ perf_trace_##call(void *__data, proto) \
4746
__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
4847
\
4948
head = this_cpu_ptr(event_call->perf_events); \
50-
if (!prog && __builtin_constant_p(!__task) && !__task && \
51-
hlist_empty(head)) \
49+
if (!bpf_prog_array_valid(event_call) && \
50+
__builtin_constant_p(!__task) && !__task && \
51+
hlist_empty(head)) \
5252
return; \
5353
\
5454
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\

kernel/bpf/core.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1472,6 +1472,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
14721472
}
14731473
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
14741474

1475+
static unsigned int __bpf_prog_ret1(const void *ctx,
1476+
const struct bpf_insn *insn)
1477+
{
1478+
return 1;
1479+
}
1480+
1481+
static struct bpf_prog_dummy {
1482+
struct bpf_prog prog;
1483+
} dummy_bpf_prog = {
1484+
.prog = {
1485+
.bpf_func = __bpf_prog_ret1,
1486+
},
1487+
};
1488+
14751489
/* to avoid allocating empty bpf_prog_array for cgroups that
14761490
* don't have bpf program attached use one global 'empty_prog_array'
14771491
* It will not be modified the caller of bpf_prog_array_alloc()
@@ -1541,6 +1555,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
15411555
return 0;
15421556
}
15431557

1558+
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
1559+
struct bpf_prog *old_prog)
1560+
{
1561+
struct bpf_prog **prog = progs->progs;
1562+
1563+
for (; *prog; prog++)
1564+
if (*prog == old_prog) {
1565+
WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
1566+
break;
1567+
}
1568+
}
1569+
1570+
int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
1571+
struct bpf_prog *exclude_prog,
1572+
struct bpf_prog *include_prog,
1573+
struct bpf_prog_array **new_array)
1574+
{
1575+
int new_prog_cnt, carry_prog_cnt = 0;
1576+
struct bpf_prog **existing_prog;
1577+
struct bpf_prog_array *array;
1578+
int new_prog_idx = 0;
1579+
1580+
/* Figure out how many existing progs we need to carry over to
1581+
* the new array.
1582+
*/
1583+
if (old_array) {
1584+
existing_prog = old_array->progs;
1585+
for (; *existing_prog; existing_prog++) {
1586+
if (*existing_prog != exclude_prog &&
1587+
*existing_prog != &dummy_bpf_prog.prog)
1588+
carry_prog_cnt++;
1589+
if (*existing_prog == include_prog)
1590+
return -EEXIST;
1591+
}
1592+
}
1593+
1594+
/* How many progs (not NULL) will be in the new array? */
1595+
new_prog_cnt = carry_prog_cnt;
1596+
if (include_prog)
1597+
new_prog_cnt += 1;
1598+
1599+
/* Do we have any prog (not NULL) in the new array? */
1600+
if (!new_prog_cnt) {
1601+
*new_array = NULL;
1602+
return 0;
1603+
}
1604+
1605+
/* +1 as the end of prog_array is marked with NULL */
1606+
array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
1607+
if (!array)
1608+
return -ENOMEM;
1609+
1610+
/* Fill in the new prog array */
1611+
if (carry_prog_cnt) {
1612+
existing_prog = old_array->progs;
1613+
for (; *existing_prog; existing_prog++)
1614+
if (*existing_prog != exclude_prog &&
1615+
*existing_prog != &dummy_bpf_prog.prog)
1616+
array->progs[new_prog_idx++] = *existing_prog;
1617+
}
1618+
if (include_prog)
1619+
array->progs[new_prog_idx++] = include_prog;
1620+
array->progs[new_prog_idx] = NULL;
1621+
*new_array = array;
1622+
return 0;
1623+
}
1624+
15441625
static void bpf_prog_free_deferred(struct work_struct *work)
15451626
{
15461627
struct bpf_prog_aux *aux;

kernel/events/core.c

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8038,11 +8038,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
80388038
struct pt_regs *regs, struct hlist_head *head,
80398039
struct task_struct *task)
80408040
{
8041-
struct bpf_prog *prog = call->prog;
8042-
8043-
if (prog) {
8041+
if (bpf_prog_array_valid(call)) {
80448042
*(struct pt_regs **)raw_data = regs;
8045-
if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) {
8043+
if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
80468044
perf_swevent_put_recursion_context(rctx);
80478045
return;
80488046
}
@@ -8232,13 +8230,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
82328230
{
82338231
bool is_kprobe, is_tracepoint, is_syscall_tp;
82348232
struct bpf_prog *prog;
8233+
int ret;
82358234

82368235
if (event->attr.type != PERF_TYPE_TRACEPOINT)
82378236
return perf_event_set_bpf_handler(event, prog_fd);
82388237

8239-
if (event->tp_event->prog)
8240-
return -EEXIST;
8241-
82428238
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
82438239
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
82448240
is_syscall_tp = is_syscall_trace_event(event->tp_event);
@@ -8266,26 +8262,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
82668262
return -EACCES;
82678263
}
82688264
}
8269-
event->tp_event->prog = prog;
8270-
event->tp_event->bpf_prog_owner = event;
82718265

8272-
return 0;
8266+
ret = perf_event_attach_bpf_prog(event, prog);
8267+
if (ret)
8268+
bpf_prog_put(prog);
8269+
return ret;
82738270
}
82748271

82758272
static void perf_event_free_bpf_prog(struct perf_event *event)
82768273
{
8277-
struct bpf_prog *prog;
8278-
82798274
if (event->attr.type != PERF_TYPE_TRACEPOINT) {
82808275
perf_event_free_bpf_handler(event);
82818276
return;
82828277
}
8283-
8284-
prog = event->tp_event->prog;
8285-
if (prog && event->tp_event->bpf_prog_owner == event) {
8286-
event->tp_event->prog = NULL;
8287-
bpf_prog_put(prog);
8288-
}
8278+
perf_event_detach_bpf_prog(event);
82898279
}
82908280

82918281
#else

0 commit comments

Comments
 (0)