Skip to content

Commit 555c8a8

Browse files
borkmanndavem330
authored andcommitted
bpf: avoid stack copy and use skb ctx for event output
This work addresses a couple of issues bpf_skb_event_output() helper currently has: i) We need two copies instead of just a single one for the skb data when it should be part of a sample. The data can be non-linear and thus needs to be extracted via bpf_skb_load_bytes() helper first, and then copied once again into the ring buffer slot. ii) Since bpf_skb_load_bytes() currently needs to be used first, the helper needs to see a constant size on the passed stack buffer to make sure BPF verifier can do sanity checks on it during verification time. Thus, just passing skb->len (or any other non-constant value) wouldn't work, but changing bpf_skb_load_bytes() is also not the proper solution, since the two copies are generally still needed. iii) bpf_skb_load_bytes() is just for rather small buffers like headers, since they need to sit on the limited BPF stack anyway. Instead of working around in bpf_skb_load_bytes(), this work improves the bpf_skb_event_output() helper to address all 3 at once. We can make use of the passed in skb context that we have in the helper anyway, and use some of the reserved flag bits as a length argument. The helper will use the new __output_custom() facility from perf side with bpf_skb_copy() as callback helper to walk and extract the data. It will pass the data for setup to bpf_event_output(), which generates and pushes the raw record with an additional frag part. The linear data used in the first frag of the record serves as programmatically defined meta data passed along with the appended sample. Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 8e7a392 commit 555c8a8

File tree

5 files changed

+69
-22
lines changed

5 files changed

+69
-22
lines changed

include/linux/bpf.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,12 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
209209
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
210210

211211
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
212-
const struct bpf_func_proto *bpf_get_event_output_proto(void);
212+
213+
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
214+
unsigned long len);
215+
216+
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
217+
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
213218

214219
#ifdef CONFIG_BPF_SYSCALL
215220
DECLARE_PER_CPU(int, bpf_prog_active);

include/uapi/linux/bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,8 @@ enum bpf_func_id {
401401
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
402402
#define BPF_F_INDEX_MASK 0xffffffffULL
403403
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
404+
/* BPF_FUNC_perf_event_output for sk_buff input context. */
405+
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
404406

405407
/* user accessible mirror of in-kernel sk_buff.
406408
* new fields can only be added to the end of this structure

kernel/bpf/core.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,9 +1054,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
10541054
return NULL;
10551055
}
10561056

1057-
const struct bpf_func_proto * __weak bpf_get_event_output_proto(void)
1057+
u64 __weak
1058+
bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
1059+
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
10581060
{
1059-
return NULL;
1061+
return -ENOTSUPP;
10601062
}
10611063

10621064
/* Always built-in helper functions. */

kernel/trace/bpf_trace.c

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -298,29 +298,26 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
298298

299299
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
300300

301-
static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
301+
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
302+
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
302303
{
303304
struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
305+
struct perf_raw_frag frag = {
306+
.copy = ctx_copy,
307+
.size = ctx_size,
308+
.data = ctx,
309+
};
310+
struct perf_raw_record raw = {
311+
.frag = {
312+
.next = ctx_size ? &frag : NULL,
313+
.size = meta_size,
314+
.data = meta,
315+
},
316+
};
304317

305318
perf_fetch_caller_regs(regs);
306319

307-
return bpf_perf_event_output((long)regs, r2, flags, r4, size);
308-
}
309-
310-
static const struct bpf_func_proto bpf_event_output_proto = {
311-
.func = bpf_event_output,
312-
.gpl_only = true,
313-
.ret_type = RET_INTEGER,
314-
.arg1_type = ARG_PTR_TO_CTX,
315-
.arg2_type = ARG_CONST_MAP_PTR,
316-
.arg3_type = ARG_ANYTHING,
317-
.arg4_type = ARG_PTR_TO_STACK,
318-
.arg5_type = ARG_CONST_STACK_SIZE,
319-
};
320-
321-
const struct bpf_func_proto *bpf_get_event_output_proto(void)
322-
{
323-
return &bpf_event_output_proto;
320+
return __bpf_perf_event_output(regs, map, flags, &raw);
324321
}
325322

326323
static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)

net/core/filter.c

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2025,6 +2025,47 @@ bool bpf_helper_changes_skb_data(void *func)
20252025
return false;
20262026
}
20272027

2028+
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
2029+
unsigned long len)
2030+
{
2031+
void *ptr = skb_header_pointer(skb, 0, len, dst_buff);
2032+
2033+
if (unlikely(!ptr))
2034+
return len;
2035+
if (ptr != dst_buff)
2036+
memcpy(dst_buff, ptr, len);
2037+
2038+
return 0;
2039+
}
2040+
2041+
static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
2042+
u64 meta_size)
2043+
{
2044+
struct sk_buff *skb = (struct sk_buff *)(long) r1;
2045+
struct bpf_map *map = (struct bpf_map *)(long) r2;
2046+
u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
2047+
void *meta = (void *)(long) r4;
2048+
2049+
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
2050+
return -EINVAL;
2051+
if (unlikely(skb_size > skb->len))
2052+
return -EFAULT;
2053+
2054+
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
2055+
bpf_skb_copy);
2056+
}
2057+
2058+
static const struct bpf_func_proto bpf_skb_event_output_proto = {
2059+
.func = bpf_skb_event_output,
2060+
.gpl_only = true,
2061+
.ret_type = RET_INTEGER,
2062+
.arg1_type = ARG_PTR_TO_CTX,
2063+
.arg2_type = ARG_CONST_MAP_PTR,
2064+
.arg3_type = ARG_ANYTHING,
2065+
.arg4_type = ARG_PTR_TO_STACK,
2066+
.arg5_type = ARG_CONST_STACK_SIZE,
2067+
};
2068+
20282069
static unsigned short bpf_tunnel_key_af(u64 flags)
20292070
{
20302071
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
@@ -2357,7 +2398,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
23572398
case BPF_FUNC_get_hash_recalc:
23582399
return &bpf_get_hash_recalc_proto;
23592400
case BPF_FUNC_perf_event_output:
2360-
return bpf_get_event_output_proto();
2401+
return &bpf_skb_event_output_proto;
23612402
case BPF_FUNC_get_smp_processor_id:
23622403
return &bpf_get_smp_processor_id_proto;
23632404
#ifdef CONFIG_SOCK_CGROUP_DATA

0 commit comments

Comments
 (0)