Skip to content

Commit f1c89c0

Browse files
committed
Merge branch 'bpf-next'
Daniel Borkmann says: ==================== BPF helper improvements and cleanups This set adds various improvements to BPF helpers, a cleanup to use skb_pkt_type_ok() helper, addition of bpf_skb_change_tail(), a follow up for event output helper and removing ifdefs around the cgroupv2 helper bits. For details please see individual patches. The set is based against net-next tree, but requires a merge of net into net-next first. Thanks a lot! ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents b340402 + 54fd9c2 commit f1c89c0

File tree

4 files changed

+204
-12
lines changed

4 files changed

+204
-12
lines changed

include/linux/skbuff.h

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2295,7 +2295,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
22952295

22962296
int ___pskb_trim(struct sk_buff *skb, unsigned int len);
22972297

2298-
static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
2298+
static inline void __skb_set_length(struct sk_buff *skb, unsigned int len)
22992299
{
23002300
if (unlikely(skb_is_nonlinear(skb))) {
23012301
WARN_ON(1);
@@ -2305,6 +2305,11 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
23052305
skb_set_tail_pointer(skb, len);
23062306
}
23072307

2308+
static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
2309+
{
2310+
__skb_set_length(skb, len);
2311+
}
2312+
23082313
void skb_trim(struct sk_buff *skb, unsigned int len);
23092314

23102315
static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
@@ -2335,6 +2340,20 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
23352340
BUG_ON(err);
23362341
}
23372342

2343+
static inline int __skb_grow(struct sk_buff *skb, unsigned int len)
2344+
{
2345+
unsigned int diff = len - skb->len;
2346+
2347+
if (skb_tailroom(skb) < diff) {
2348+
int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb),
2349+
GFP_ATOMIC);
2350+
if (ret)
2351+
return ret;
2352+
}
2353+
__skb_set_length(skb, len);
2354+
return 0;
2355+
}
2356+
23382357
/**
23392358
* skb_orphan - orphan a buffer
23402359
* @skb: buffer to orphan
@@ -2938,6 +2957,21 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
29382957
return __pskb_trim(skb, len);
29392958
}
29402959

2960+
static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
2961+
{
2962+
if (skb->ip_summed == CHECKSUM_COMPLETE)
2963+
skb->ip_summed = CHECKSUM_NONE;
2964+
__skb_trim(skb, len);
2965+
return 0;
2966+
}
2967+
2968+
static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
2969+
{
2970+
if (skb->ip_summed == CHECKSUM_COMPLETE)
2971+
skb->ip_summed = CHECKSUM_NONE;
2972+
return __skb_grow(skb, len);
2973+
}
2974+
29412975
#define skb_queue_walk(queue, skb) \
29422976
for (skb = (queue)->next; \
29432977
skb != (struct sk_buff *)(queue); \
@@ -3726,6 +3760,13 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
37263760
return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
37273761
}
37283762

3763+
static inline void skb_gso_reset(struct sk_buff *skb)
3764+
{
3765+
skb_shinfo(skb)->gso_size = 0;
3766+
skb_shinfo(skb)->gso_segs = 0;
3767+
skb_shinfo(skb)->gso_type = 0;
3768+
}
3769+
37293770
void __skb_warn_lro_forwarding(const struct sk_buff *skb);
37303771

37313772
static inline bool skb_warn_if_lro(const struct sk_buff *skb)

include/net/sock.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,16 @@ static inline bool sk_stream_is_writeable(const struct sock *sk)
11141114
sk_stream_memory_free(sk);
11151115
}
11161116

1117+
static inline int sk_under_cgroup_hierarchy(struct sock *sk,
1118+
struct cgroup *ancestor)
1119+
{
1120+
#ifdef CONFIG_SOCK_CGROUP_DATA
1121+
return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data),
1122+
ancestor);
1123+
#else
1124+
return -ENOTSUPP;
1125+
#endif
1126+
}
11171127

11181128
static inline bool sk_has_memory_pressure(const struct sock *sk)
11191129
{

include/uapi/linux/bpf.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,17 @@ enum bpf_func_id {
386386
*/
387387
BPF_FUNC_current_task_under_cgroup,
388388

389+
/**
390+
* bpf_skb_change_tail(skb, len, flags)
391+
* The helper will resize the skb to the given new size,
392+
* to be used f.e. with control messages.
393+
* @skb: pointer to skb
394+
* @len: new skb length
395+
* @flags: reserved
396+
* Return: 0 on success or negative error
397+
*/
398+
BPF_FUNC_skb_change_tail,
399+
389400
__BPF_FUNC_MAX_ID,
390401
};
391402

net/core/filter.c

Lines changed: 141 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,14 +1350,18 @@ struct bpf_scratchpad {
13501350

13511351
static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
13521352

1353+
static inline int __bpf_try_make_writable(struct sk_buff *skb,
1354+
unsigned int write_len)
1355+
{
1356+
return skb_ensure_writable(skb, write_len);
1357+
}
1358+
13531359
static inline int bpf_try_make_writable(struct sk_buff *skb,
13541360
unsigned int write_len)
13551361
{
1356-
int err;
1362+
int err = __bpf_try_make_writable(skb, write_len);
13571363

1358-
err = skb_ensure_writable(skb, write_len);
13591364
bpf_compute_data_end(skb);
1360-
13611365
return err;
13621366
}
13631367

@@ -1976,8 +1980,8 @@ static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
19761980
u32 pkt_type = r2;
19771981

19781982
/* We only allow a restricted subset to be changed for now. */
1979-
if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
1980-
pkt_type > PACKET_OTHERHOST))
1983+
if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
1984+
!skb_pkt_type_ok(pkt_type)))
19811985
return -EINVAL;
19821986

19831987
skb->pkt_type = pkt_type;
@@ -1992,6 +1996,92 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
19921996
.arg2_type = ARG_ANYTHING,
19931997
};
19941998

1999+
static u32 __bpf_skb_min_len(const struct sk_buff *skb)
2000+
{
2001+
u32 min_len = skb_network_offset(skb);
2002+
2003+
if (skb_transport_header_was_set(skb))
2004+
min_len = skb_transport_offset(skb);
2005+
if (skb->ip_summed == CHECKSUM_PARTIAL)
2006+
min_len = skb_checksum_start_offset(skb) +
2007+
skb->csum_offset + sizeof(__sum16);
2008+
return min_len;
2009+
}
2010+
2011+
static u32 __bpf_skb_max_len(const struct sk_buff *skb)
2012+
{
2013+
return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
2014+
65536;
2015+
}
2016+
2017+
static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
2018+
{
2019+
unsigned int old_len = skb->len;
2020+
int ret;
2021+
2022+
ret = __skb_grow_rcsum(skb, new_len);
2023+
if (!ret)
2024+
memset(skb->data + old_len, 0, new_len - old_len);
2025+
return ret;
2026+
}
2027+
2028+
static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
2029+
{
2030+
return __skb_trim_rcsum(skb, new_len);
2031+
}
2032+
2033+
static u64 bpf_skb_change_tail(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
2034+
{
2035+
struct sk_buff *skb = (struct sk_buff *)(long) r1;
2036+
u32 max_len = __bpf_skb_max_len(skb);
2037+
u32 min_len = __bpf_skb_min_len(skb);
2038+
u32 new_len = (u32) r2;
2039+
int ret;
2040+
2041+
if (unlikely(flags || new_len > max_len || new_len < min_len))
2042+
return -EINVAL;
2043+
if (skb->encapsulation)
2044+
return -ENOTSUPP;
2045+
2046+
/* The basic idea of this helper is that it's performing the
2047+
* needed work to either grow or trim an skb, and eBPF program
2048+
* rewrites the rest via helpers like bpf_skb_store_bytes(),
2049+
* bpf_lX_csum_replace() and others rather than passing a raw
2050+
* buffer here. This one is a slow path helper and intended
2051+
* for replies with control messages.
2052+
*
2053+
* Like in bpf_skb_change_proto(), we want to keep this rather
2054+
* minimal and without protocol specifics so that we are able
2055+
* to separate concerns as in bpf_skb_store_bytes() should only
2056+
* be the one responsible for writing buffers.
2057+
*
2058+
* It's really expected to be a slow path operation here for
2059+
* control message replies, so we're implicitly linearizing,
2060+
* uncloning and drop offloads from the skb by this.
2061+
*/
2062+
ret = __bpf_try_make_writable(skb, skb->len);
2063+
if (!ret) {
2064+
if (new_len > skb->len)
2065+
ret = bpf_skb_grow_rcsum(skb, new_len);
2066+
else if (new_len < skb->len)
2067+
ret = bpf_skb_trim_rcsum(skb, new_len);
2068+
if (!ret && skb_is_gso(skb))
2069+
skb_gso_reset(skb);
2070+
}
2071+
2072+
bpf_compute_data_end(skb);
2073+
return ret;
2074+
}
2075+
2076+
static const struct bpf_func_proto bpf_skb_change_tail_proto = {
2077+
.func = bpf_skb_change_tail,
2078+
.gpl_only = false,
2079+
.ret_type = RET_INTEGER,
2080+
.arg1_type = ARG_PTR_TO_CTX,
2081+
.arg2_type = ARG_ANYTHING,
2082+
.arg3_type = ARG_ANYTHING,
2083+
};
2084+
19952085
bool bpf_helper_changes_skb_data(void *func)
19962086
{
19972087
if (func == bpf_skb_vlan_push)
@@ -2002,6 +2092,8 @@ bool bpf_helper_changes_skb_data(void *func)
20022092
return true;
20032093
if (func == bpf_skb_change_proto)
20042094
return true;
2095+
if (func == bpf_skb_change_tail)
2096+
return true;
20052097
if (func == bpf_l3_csum_replace)
20062098
return true;
20072099
if (func == bpf_l4_csum_replace)
@@ -2282,7 +2374,6 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
22822374
}
22832375
}
22842376

2285-
#ifdef CONFIG_SOCK_CGROUP_DATA
22862377
static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
22872378
{
22882379
struct sk_buff *skb = (struct sk_buff *)(long)r1;
@@ -2303,7 +2394,7 @@ static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
23032394
if (unlikely(!cgrp))
23042395
return -EAGAIN;
23052396

2306-
return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
2397+
return sk_under_cgroup_hierarchy(sk, cgrp);
23072398
}
23082399

23092400
static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
@@ -2314,7 +2405,41 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
23142405
.arg2_type = ARG_CONST_MAP_PTR,
23152406
.arg3_type = ARG_ANYTHING,
23162407
};
2317-
#endif
2408+
2409+
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
2410+
unsigned long off, unsigned long len)
2411+
{
2412+
memcpy(dst_buff, src_buff + off, len);
2413+
return 0;
2414+
}
2415+
2416+
static u64 bpf_xdp_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
2417+
u64 meta_size)
2418+
{
2419+
struct xdp_buff *xdp = (struct xdp_buff *)(long) r1;
2420+
struct bpf_map *map = (struct bpf_map *)(long) r2;
2421+
u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
2422+
void *meta = (void *)(long) r4;
2423+
2424+
if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
2425+
return -EINVAL;
2426+
if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
2427+
return -EFAULT;
2428+
2429+
return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size,
2430+
bpf_xdp_copy);
2431+
}
2432+
2433+
static const struct bpf_func_proto bpf_xdp_event_output_proto = {
2434+
.func = bpf_xdp_event_output,
2435+
.gpl_only = true,
2436+
.ret_type = RET_INTEGER,
2437+
.arg1_type = ARG_PTR_TO_CTX,
2438+
.arg2_type = ARG_CONST_MAP_PTR,
2439+
.arg3_type = ARG_ANYTHING,
2440+
.arg4_type = ARG_PTR_TO_STACK,
2441+
.arg5_type = ARG_CONST_STACK_SIZE,
2442+
};
23182443

23192444
static const struct bpf_func_proto *
23202445
sk_filter_func_proto(enum bpf_func_id func_id)
@@ -2368,6 +2493,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
23682493
return &bpf_skb_change_proto_proto;
23692494
case BPF_FUNC_skb_change_type:
23702495
return &bpf_skb_change_type_proto;
2496+
case BPF_FUNC_skb_change_tail:
2497+
return &bpf_skb_change_tail_proto;
23712498
case BPF_FUNC_skb_get_tunnel_key:
23722499
return &bpf_skb_get_tunnel_key_proto;
23732500
case BPF_FUNC_skb_set_tunnel_key:
@@ -2386,10 +2513,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
23862513
return &bpf_skb_event_output_proto;
23872514
case BPF_FUNC_get_smp_processor_id:
23882515
return &bpf_get_smp_processor_id_proto;
2389-
#ifdef CONFIG_SOCK_CGROUP_DATA
23902516
case BPF_FUNC_skb_under_cgroup:
23912517
return &bpf_skb_under_cgroup_proto;
2392-
#endif
23932518
default:
23942519
return sk_filter_func_proto(func_id);
23952520
}
@@ -2398,7 +2523,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
23982523
static const struct bpf_func_proto *
23992524
xdp_func_proto(enum bpf_func_id func_id)
24002525
{
2401-
return sk_filter_func_proto(func_id);
2526+
switch (func_id) {
2527+
case BPF_FUNC_perf_event_output:
2528+
return &bpf_xdp_event_output_proto;
2529+
default:
2530+
return sk_filter_func_proto(func_id);
2531+
}
24022532
}
24032533

24042534
static bool __is_valid_access(int off, int size, enum bpf_access_type type)

0 commit comments

Comments
 (0)