Skip to content

Commit 943e398

Browse files
author
Alexei Starovoitov
committed
Merge branch 'flow_dissector-input-flags'
Stanislav Fomichev says: ==================== C flow dissector supports input flags that tell it to customize parsing by either stopping early or trying to parse as deep as possible. BPF flow dissector always parses as deep as possible which is sub-optimal. Pass input flags to the BPF flow dissector as well so it can make the same decisions. Series outline: * remove unused FLOW_DISSECTOR_F_STOP_AT_L3 flag * export FLOW_DISSECTOR_F_XXX flags as uapi and pass them to BPF flow dissector * add documentation for the export flags * support input flags in BPF_PROG_TEST_RUN via ctx_{in,out} * sync uapi to tools * support FLOW_DISSECTOR_F_PARSE_1ST_FRAG in selftest * support FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL in kernel and selftest * support FLOW_DISSECTOR_F_STOP_AT_ENCAP in selftest Pros: * makes BPF flow dissector faster by avoiding burning extra cycles * existing BPF progs continue to work by ignoring the flags and always parsing as deep as possible Cons: * new UAPI which we need to support (OTOH, if we need to deprecate some flags, we can just stop setting them upon calling BPF programs) Some numbers (with .repeat = 4000000 in test_flow_dissector): test_flow_dissector:PASS:ipv4-frag 35 nsec test_flow_dissector:PASS:ipv4-frag 35 nsec test_flow_dissector:PASS:ipv4-no-frag 32 nsec test_flow_dissector:PASS:ipv4-no-frag 32 nsec test_flow_dissector:PASS:ipv6-frag 39 nsec test_flow_dissector:PASS:ipv6-frag 39 nsec test_flow_dissector:PASS:ipv6-no-frag 36 nsec test_flow_dissector:PASS:ipv6-no-frag 36 nsec test_flow_dissector:PASS:ipv6-flow-label 36 nsec test_flow_dissector:PASS:ipv6-flow-label 36 nsec test_flow_dissector:PASS:ipv6-no-flow-label 33 nsec test_flow_dissector:PASS:ipv6-no-flow-label 33 nsec test_flow_dissector:PASS:ipip-encap 38 nsec test_flow_dissector:PASS:ipip-encap 38 nsec test_flow_dissector:PASS:ipip-no-encap 32 nsec test_flow_dissector:PASS:ipip-no-encap 32 nsec The improvement is around 10%, but it's in a tight cache-hot BPF_PROG_TEST_RUN loop. ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 03cd1d1 + e853ae7 commit 943e398

File tree

8 files changed

+368
-14
lines changed

8 files changed

+368
-14
lines changed

Documentation/bpf/prog_flow_dissector.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ The inputs are:
2626
* ``nhoff`` - initial offset of the networking header
2727
* ``thoff`` - initial offset of the transport header, initialized to nhoff
2828
* ``n_proto`` - L3 protocol type, parsed out of L2 header
29+
* ``flags`` - optional flags
2930

3031
Flow dissector BPF program should fill out the rest of the ``struct
3132
bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be
@@ -101,6 +102,23 @@ can be called for both cases and would have to be written carefully to
101102
handle both cases.
102103

103104

105+
Flags
106+
=====
107+
108+
``flow_keys->flags`` might contain optional input flags that work as follows:
109+
110+
* ``BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG`` - tells BPF flow dissector to
111+
continue parsing first fragment; the default expected behavior is that
112+
flow dissector returns as soon as it finds out that the packet is fragmented;
113+
used by ``eth_get_headlen`` to estimate length of all headers for GRO.
114+
* ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to
115+
stop parsing as soon as it reaches IPv6 flow label; used by
116+
``___skb_get_hash`` and ``__skb_get_hash_symmetric`` to get flow hash.
117+
* ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop
118+
parsing as soon as it reaches encapsulated headers; used by routing
119+
infrastructure.
120+
121+
104122
Reference Implementation
105123
========================
106124

include/linux/skbuff.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,7 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
12711271

12721272
struct bpf_flow_dissector;
12731273
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
1274-
__be16 proto, int nhoff, int hlen);
1274+
__be16 proto, int nhoff, int hlen, unsigned int flags);
12751275

12761276
bool __skb_flow_dissect(const struct net *net,
12771277
const struct sk_buff *skb,

include/uapi/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3507,6 +3507,10 @@ enum bpf_task_fd_type {
35073507
BPF_FD_TYPE_URETPROBE, /* filename + offset */
35083508
};
35093509

3510+
#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
3511+
#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
3512+
#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
3513+
35103514
struct bpf_flow_keys {
35113515
__u16 nhoff;
35123516
__u16 thoff;
@@ -3528,6 +3532,8 @@ struct bpf_flow_keys {
35283532
__u32 ipv6_dst[4]; /* in6_addr; network order */
35293533
};
35303534
};
3535+
__u32 flags;
3536+
__be32 flow_label;
35313537
};
35323538

35333539
struct bpf_func_info {

net/bpf/test_run.c

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -377,16 +377,34 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
377377
return ret;
378378
}
379379

380+
static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
381+
{
382+
/* make sure the fields we don't use are zeroed */
383+
if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
384+
return -EINVAL;
385+
386+
/* flags is allowed */
387+
388+
if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) +
389+
FIELD_SIZEOF(struct bpf_flow_keys, flags),
390+
sizeof(struct bpf_flow_keys)))
391+
return -EINVAL;
392+
393+
return 0;
394+
}
395+
380396
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
381397
const union bpf_attr *kattr,
382398
union bpf_attr __user *uattr)
383399
{
384400
u32 size = kattr->test.data_size_in;
385401
struct bpf_flow_dissector ctx = {};
386402
u32 repeat = kattr->test.repeat;
403+
struct bpf_flow_keys *user_ctx;
387404
struct bpf_flow_keys flow_keys;
388405
u64 time_start, time_spent = 0;
389406
const struct ethhdr *eth;
407+
unsigned int flags = 0;
390408
u32 retval, duration;
391409
void *data;
392410
int ret;
@@ -395,9 +413,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
395413
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
396414
return -EINVAL;
397415

398-
if (kattr->test.ctx_in || kattr->test.ctx_out)
399-
return -EINVAL;
400-
401416
if (size < ETH_HLEN)
402417
return -EINVAL;
403418

@@ -410,6 +425,18 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
410425
if (!repeat)
411426
repeat = 1;
412427

428+
user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
429+
if (IS_ERR(user_ctx)) {
430+
kfree(data);
431+
return PTR_ERR(user_ctx);
432+
}
433+
if (user_ctx) {
434+
ret = verify_user_bpf_flow_keys(user_ctx);
435+
if (ret)
436+
goto out;
437+
flags = user_ctx->flags;
438+
}
439+
413440
ctx.flow_keys = &flow_keys;
414441
ctx.data = data;
415442
ctx.data_end = (__u8 *)data + size;
@@ -419,7 +446,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
419446
time_start = ktime_get_ns();
420447
for (i = 0; i < repeat; i++) {
421448
retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
422-
size);
449+
size, flags);
423450

424451
if (signal_pending(current)) {
425452
preempt_enable();
@@ -450,8 +477,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
450477

451478
ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
452479
retval, duration);
480+
if (!ret)
481+
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
482+
sizeof(struct bpf_flow_keys));
453483

454484
out:
485+
kfree(user_ctx);
455486
kfree(data);
456487
return ret;
457488
}

net/core/flow_dissector.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
737737
struct flow_dissector_key_basic *key_basic;
738738
struct flow_dissector_key_addrs *key_addrs;
739739
struct flow_dissector_key_ports *key_ports;
740+
struct flow_dissector_key_tags *key_tags;
740741

741742
key_control = skb_flow_dissector_target(flow_dissector,
742743
FLOW_DISSECTOR_KEY_CONTROL,
@@ -781,10 +782,18 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
781782
key_ports->src = flow_keys->sport;
782783
key_ports->dst = flow_keys->dport;
783784
}
785+
786+
if (dissector_uses_key(flow_dissector,
787+
FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
788+
key_tags = skb_flow_dissector_target(flow_dissector,
789+
FLOW_DISSECTOR_KEY_FLOW_LABEL,
790+
target_container);
791+
key_tags->flow_label = ntohl(flow_keys->flow_label);
792+
}
784793
}
785794

786795
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
787-
__be16 proto, int nhoff, int hlen)
796+
__be16 proto, int nhoff, int hlen, unsigned int flags)
788797
{
789798
struct bpf_flow_keys *flow_keys = ctx->flow_keys;
790799
u32 result;
@@ -795,6 +804,14 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
795804
flow_keys->nhoff = nhoff;
796805
flow_keys->thoff = flow_keys->nhoff;
797806

807+
BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG !=
808+
(int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG);
809+
BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL !=
810+
(int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
811+
BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP !=
812+
(int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
813+
flow_keys->flags = flags;
814+
798815
preempt_disable();
799816
result = BPF_PROG_RUN(prog, ctx);
800817
preempt_enable();
@@ -914,7 +931,7 @@ bool __skb_flow_dissect(const struct net *net,
914931
}
915932

916933
ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
917-
hlen);
934+
hlen, flags);
918935
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
919936
target_container);
920937
rcu_read_unlock();

tools/include/uapi/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3504,6 +3504,10 @@ enum bpf_task_fd_type {
35043504
BPF_FD_TYPE_URETPROBE, /* filename + offset */
35053505
};
35063506

3507+
#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
3508+
#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
3509+
#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
3510+
35073511
struct bpf_flow_keys {
35083512
__u16 nhoff;
35093513
__u16 thoff;
@@ -3525,6 +3529,8 @@ struct bpf_flow_keys {
35253529
__u32 ipv6_dst[4]; /* in6_addr; network order */
35263530
};
35273531
};
3532+
__u32 flags;
3533+
__be32 flow_label;
35283534
};
35293535

35303536
struct bpf_func_info {

0 commit comments

Comments
 (0)