Skip to content

Commit e980a07

Browse files
committed
Merge branch 'bpf-event-output-helper-improvements'
Daniel Borkmann says: ==================== BPF event output helper improvements This set adds improvements to the BPF event output helper to support non-linear data sampling, here specifically, for skb context. For details please see individual patches. The set is based against net-next tree. v1 -> v2: - Integrated and adapted Peter's diff into patch 1, updated the remaining ones accordingly. Thanks Peter! ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 7acef60 + 555c8a8 commit e980a07

File tree

10 files changed

+180
-63
lines changed

10 files changed

+180
-63
lines changed

arch/s390/kernel/perf_cpum_sf.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -979,12 +979,15 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
979979
struct pt_regs regs;
980980
struct perf_sf_sde_regs *sde_regs;
981981
struct perf_sample_data data;
982-
struct perf_raw_record raw;
982+
struct perf_raw_record raw = {
983+
.frag = {
984+
.size = sfr->size,
985+
.data = sfr,
986+
},
987+
};
983988

984989
/* Setup perf sample */
985990
perf_sample_data_init(&data, 0, event->hw.last_period);
986-
raw.size = sfr->size;
987-
raw.data = sfr;
988991
data.raw = &raw;
989992

990993
/* Setup pt_regs to look like an CPU-measurement external interrupt

arch/x86/events/amd/ibs.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -655,8 +655,12 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
655655
}
656656

657657
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
658-
raw.size = sizeof(u32) + ibs_data.size;
659-
raw.data = ibs_data.data;
658+
raw = (struct perf_raw_record){
659+
.frag = {
660+
.size = sizeof(u32) + ibs_data.size,
661+
.data = ibs_data.data,
662+
},
663+
};
660664
data.raw = &raw;
661665
}
662666

include/linux/bpf.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,12 @@ u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
209209
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
210210

211211
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
212-
const struct bpf_func_proto *bpf_get_event_output_proto(void);
212+
213+
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
214+
unsigned long len);
215+
216+
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
217+
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
213218

214219
#ifdef CONFIG_BPF_SYSCALL
215220
DECLARE_PER_CPU(int, bpf_prog_active);

include/linux/perf_event.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,22 @@ struct perf_callchain_entry_ctx {
6969
bool contexts_maxed;
7070
};
7171

72+
typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
73+
unsigned long len);
74+
75+
struct perf_raw_frag {
76+
union {
77+
struct perf_raw_frag *next;
78+
unsigned long pad;
79+
};
80+
perf_copy_f copy;
81+
void *data;
82+
u32 size;
83+
} __packed;
84+
7285
struct perf_raw_record {
86+
struct perf_raw_frag frag;
7387
u32 size;
74-
void *data;
7588
};
7689

7790
/*
@@ -1283,6 +1296,11 @@ extern void perf_restore_debug_store(void);
12831296
static inline void perf_restore_debug_store(void) { }
12841297
#endif
12851298

1299+
static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
1300+
{
1301+
return frag->pad < sizeof(u64);
1302+
}
1303+
12861304
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
12871305

12881306
/*

include/uapi/linux/bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,8 @@ enum bpf_func_id {
401401
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
402402
#define BPF_F_INDEX_MASK 0xffffffffULL
403403
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
404+
/* BPF_FUNC_perf_event_output for sk_buff input context. */
405+
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
404406

405407
/* user accessible mirror of in-kernel sk_buff.
406408
* new fields can only be added to the end of this structure

kernel/bpf/core.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,9 +1054,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
10541054
return NULL;
10551055
}
10561056

1057-
const struct bpf_func_proto * __weak bpf_get_event_output_proto(void)
1057+
u64 __weak
1058+
bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
1059+
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
10581060
{
1059-
return NULL;
1061+
return -ENOTSUPP;
10601062
}
10611063

10621064
/* Always built-in helper functions. */

kernel/events/core.c

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5553,16 +5553,26 @@ void perf_output_sample(struct perf_output_handle *handle,
55535553
}
55545554

55555555
if (sample_type & PERF_SAMPLE_RAW) {
5556-
if (data->raw) {
5557-
u32 raw_size = data->raw->size;
5558-
u32 real_size = round_up(raw_size + sizeof(u32),
5559-
sizeof(u64)) - sizeof(u32);
5560-
u64 zero = 0;
5561-
5562-
perf_output_put(handle, real_size);
5563-
__output_copy(handle, data->raw->data, raw_size);
5564-
if (real_size - raw_size)
5565-
__output_copy(handle, &zero, real_size - raw_size);
5556+
struct perf_raw_record *raw = data->raw;
5557+
5558+
if (raw) {
5559+
struct perf_raw_frag *frag = &raw->frag;
5560+
5561+
perf_output_put(handle, raw->size);
5562+
do {
5563+
if (frag->copy) {
5564+
__output_custom(handle, frag->copy,
5565+
frag->data, frag->size);
5566+
} else {
5567+
__output_copy(handle, frag->data,
5568+
frag->size);
5569+
}
5570+
if (perf_raw_frag_last(frag))
5571+
break;
5572+
frag = frag->next;
5573+
} while (1);
5574+
if (frag->pad)
5575+
__output_skip(handle, NULL, frag->pad);
55665576
} else {
55675577
struct {
55685578
u32 size;
@@ -5687,14 +5697,28 @@ void perf_prepare_sample(struct perf_event_header *header,
56875697
}
56885698

56895699
if (sample_type & PERF_SAMPLE_RAW) {
5690-
int size = sizeof(u32);
5691-
5692-
if (data->raw)
5693-
size += data->raw->size;
5694-
else
5695-
size += sizeof(u32);
5700+
struct perf_raw_record *raw = data->raw;
5701+
int size;
5702+
5703+
if (raw) {
5704+
struct perf_raw_frag *frag = &raw->frag;
5705+
u32 sum = 0;
5706+
5707+
do {
5708+
sum += frag->size;
5709+
if (perf_raw_frag_last(frag))
5710+
break;
5711+
frag = frag->next;
5712+
} while (1);
5713+
5714+
size = round_up(sum + sizeof(u32), sizeof(u64));
5715+
raw->size = size - sizeof(u32);
5716+
frag->pad = raw->size - sum;
5717+
} else {
5718+
size = sizeof(u64);
5719+
}
56965720

5697-
header->size += round_up(size, sizeof(u64));
5721+
header->size += size;
56985722
}
56995723

57005724
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@ -7331,7 +7355,7 @@ static struct pmu perf_swevent = {
73317355
static int perf_tp_filter_match(struct perf_event *event,
73327356
struct perf_sample_data *data)
73337357
{
7334-
void *record = data->raw->data;
7358+
void *record = data->raw->frag.data;
73357359

73367360
/* only top level events have filters set */
73377361
if (event->parent)
@@ -7387,8 +7411,10 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
73877411
struct perf_event *event;
73887412

73897413
struct perf_raw_record raw = {
7390-
.size = entry_size,
7391-
.data = record,
7414+
.frag = {
7415+
.size = entry_size,
7416+
.data = record,
7417+
},
73927418
};
73937419

73947420
perf_sample_data_init(&data, 0, 0);

kernel/events/internal.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,7 @@ static inline unsigned long perf_aux_size(struct ring_buffer *rb)
123123
return rb->aux_nr_pages << PAGE_SHIFT;
124124
}
125125

126-
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
127-
static inline unsigned long \
128-
func_name(struct perf_output_handle *handle, \
129-
const void *buf, unsigned long len) \
126+
#define __DEFINE_OUTPUT_COPY_BODY(memcpy_func) \
130127
{ \
131128
unsigned long size, written; \
132129
\
@@ -152,6 +149,17 @@ func_name(struct perf_output_handle *handle, \
152149
return len; \
153150
}
154151

152+
#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \
153+
static inline unsigned long \
154+
func_name(struct perf_output_handle *handle, \
155+
const void *buf, unsigned long len) \
156+
__DEFINE_OUTPUT_COPY_BODY(memcpy_func)
157+
158+
static inline unsigned long
159+
__output_custom(struct perf_output_handle *handle, perf_copy_f copy_func,
160+
const void *buf, unsigned long len)
161+
__DEFINE_OUTPUT_COPY_BODY(copy_func)
162+
155163
static inline unsigned long
156164
memcpy_common(void *dst, const void *src, unsigned long n)
157165
{

kernel/trace/bpf_trace.c

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -233,24 +233,17 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
233233
.arg2_type = ARG_ANYTHING,
234234
};
235235

236-
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
236+
static __always_inline u64
237+
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
238+
u64 flags, struct perf_raw_record *raw)
237239
{
238-
struct pt_regs *regs = (struct pt_regs *) (long) r1;
239-
struct bpf_map *map = (struct bpf_map *) (long) r2;
240240
struct bpf_array *array = container_of(map, struct bpf_array, map);
241241
unsigned int cpu = smp_processor_id();
242242
u64 index = flags & BPF_F_INDEX_MASK;
243-
void *data = (void *) (long) r4;
244243
struct perf_sample_data sample_data;
245244
struct bpf_event_entry *ee;
246245
struct perf_event *event;
247-
struct perf_raw_record raw = {
248-
.size = size,
249-
.data = data,
250-
};
251246

252-
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
253-
return -EINVAL;
254247
if (index == BPF_F_CURRENT_CPU)
255248
index = cpu;
256249
if (unlikely(index >= array->map.max_entries))
@@ -269,11 +262,29 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
269262
return -EOPNOTSUPP;
270263

271264
perf_sample_data_init(&sample_data, 0, 0);
272-
sample_data.raw = &raw;
265+
sample_data.raw = raw;
273266
perf_event_output(event, &sample_data, regs);
274267
return 0;
275268
}
276269

270+
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
271+
{
272+
struct pt_regs *regs = (struct pt_regs *)(long) r1;
273+
struct bpf_map *map = (struct bpf_map *)(long) r2;
274+
void *data = (void *)(long) r4;
275+
struct perf_raw_record raw = {
276+
.frag = {
277+
.size = size,
278+
.data = data,
279+
},
280+
};
281+
282+
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
283+
return -EINVAL;
284+
285+
return __bpf_perf_event_output(regs, map, flags, &raw);
286+
}
287+
277288
static const struct bpf_func_proto bpf_perf_event_output_proto = {
278289
.func = bpf_perf_event_output,
279290
.gpl_only = true,
@@ -287,29 +298,26 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
287298

288299
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
289300

290-
static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
301+
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
302+
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
291303
{
292304
struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
305+
struct perf_raw_frag frag = {
306+
.copy = ctx_copy,
307+
.size = ctx_size,
308+
.data = ctx,
309+
};
310+
struct perf_raw_record raw = {
311+
.frag = {
312+
.next = ctx_size ? &frag : NULL,
313+
.size = meta_size,
314+
.data = meta,
315+
},
316+
};
293317

294318
perf_fetch_caller_regs(regs);
295319

296-
return bpf_perf_event_output((long)regs, r2, flags, r4, size);
297-
}
298-
299-
static const struct bpf_func_proto bpf_event_output_proto = {
300-
.func = bpf_event_output,
301-
.gpl_only = true,
302-
.ret_type = RET_INTEGER,
303-
.arg1_type = ARG_PTR_TO_CTX,
304-
.arg2_type = ARG_CONST_MAP_PTR,
305-
.arg3_type = ARG_ANYTHING,
306-
.arg4_type = ARG_PTR_TO_STACK,
307-
.arg5_type = ARG_CONST_STACK_SIZE,
308-
};
309-
310-
const struct bpf_func_proto *bpf_get_event_output_proto(void)
311-
{
312-
return &bpf_event_output_proto;
320+
return __bpf_perf_event_output(regs, map, flags, &raw);
313321
}
314322

315323
static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)

net/core/filter.c

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2025,6 +2025,47 @@ bool bpf_helper_changes_skb_data(void *func)
20252025
return false;
20262026
}
20272027

2028+
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
2029+
unsigned long len)
2030+
{
2031+
void *ptr = skb_header_pointer(skb, 0, len, dst_buff);
2032+
2033+
if (unlikely(!ptr))
2034+
return len;
2035+
if (ptr != dst_buff)
2036+
memcpy(dst_buff, ptr, len);
2037+
2038+
return 0;
2039+
}
2040+
2041+
static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
2042+
u64 meta_size)
2043+
{
2044+
struct sk_buff *skb = (struct sk_buff *)(long) r1;
2045+
struct bpf_map *map = (struct bpf_map *)(long) r2;
2046+
u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
2047+
void *meta = (void *)(long) r4;
2048+
2049+
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
2050+
return -EINVAL;
2051+
if (unlikely(skb_size > skb->len))
2052+
return -EFAULT;
2053+
2054+
return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
2055+
bpf_skb_copy);
2056+
}
2057+
2058+
static const struct bpf_func_proto bpf_skb_event_output_proto = {
2059+
.func = bpf_skb_event_output,
2060+
.gpl_only = true,
2061+
.ret_type = RET_INTEGER,
2062+
.arg1_type = ARG_PTR_TO_CTX,
2063+
.arg2_type = ARG_CONST_MAP_PTR,
2064+
.arg3_type = ARG_ANYTHING,
2065+
.arg4_type = ARG_PTR_TO_STACK,
2066+
.arg5_type = ARG_CONST_STACK_SIZE,
2067+
};
2068+
20282069
static unsigned short bpf_tunnel_key_af(u64 flags)
20292070
{
20302071
return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
@@ -2357,7 +2398,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
23572398
case BPF_FUNC_get_hash_recalc:
23582399
return &bpf_get_hash_recalc_proto;
23592400
case BPF_FUNC_perf_event_output:
2360-
return bpf_get_event_output_proto();
2401+
return &bpf_skb_event_output_proto;
23612402
case BPF_FUNC_get_smp_processor_id:
23622403
return &bpf_get_smp_processor_id_proto;
23632404
#ifdef CONFIG_SOCK_CGROUP_DATA

0 commit comments

Comments
 (0)