Skip to content

Commit b5964b9

Browse files
joannekoongAlexei Starovoitov
authored andcommitted
bpf: Add skb dynptrs
Add skb dynptrs, which are dynptrs whose underlying pointer points to a skb. The dynptr acts on skb data. skb dynptrs have two main benefits. One is that they allow operations on sizes that are not statically known at compile-time (eg variable-sized accesses). Another is that parsing the packet data through dynptrs (instead of through direct access of skb->data and skb->data_end) can be more ergonomic and less brittle (eg does not need manual if checking for being within bounds of data_end). For bpf prog types that don't support writes on skb data, the dynptr is read-only (bpf_dynptr_write() will return an error) For reads and writes through the bpf_dynptr_read() and bpf_dynptr_write() interfaces, reading and writing from/to data in the head as well as from/to non-linear paged buffers is supported. Data slices through the bpf_dynptr_data API are not supported; instead bpf_dynptr_slice() and bpf_dynptr_slice_rdwr() (added in subsequent commit) should be used. For examples of how skb dynptrs can be used, please see the attached selftests. Signed-off-by: Joanne Koong <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent d96d937 commit b5964b9

File tree

8 files changed

+261
-19
lines changed

8 files changed

+261
-19
lines changed

include/linux/bpf.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -607,11 +607,14 @@ enum bpf_type_flag {
607607
*/
608608
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
609609

610+
/* DYNPTR points to sk_buff */
611+
DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS),
612+
610613
__BPF_TYPE_FLAG_MAX,
611614
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
612615
};
613616

614-
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
617+
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB)
615618

616619
/* Max number of base types. */
617620
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -1146,6 +1149,8 @@ enum bpf_dynptr_type {
11461149
BPF_DYNPTR_TYPE_LOCAL,
11471150
/* Underlying data is a ringbuf record */
11481151
BPF_DYNPTR_TYPE_RINGBUF,
1152+
/* Underlying data is a sk_buff */
1153+
BPF_DYNPTR_TYPE_SKB,
11491154
};
11501155

11511156
int bpf_dynptr_check_size(u32 size);
@@ -2846,6 +2851,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
28462851
struct bpf_insn *insn_buf,
28472852
struct bpf_prog *prog,
28482853
u32 *target_size);
2854+
int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
2855+
struct bpf_dynptr_kern *ptr);
28492856
#else
28502857
static inline bool bpf_sock_common_is_valid_access(int off, int size,
28512858
enum bpf_access_type type,
@@ -2867,6 +2874,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
28672874
{
28682875
return 0;
28692876
}
2877+
static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
2878+
struct bpf_dynptr_kern *ptr)
2879+
{
2880+
return -EOPNOTSUPP;
2881+
}
28702882
#endif
28712883

28722884
#ifdef CONFIG_INET

include/linux/filter.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1542,4 +1542,22 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index
15421542
return XDP_REDIRECT;
15431543
}
15441544

1545+
#ifdef CONFIG_NET
1546+
int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
1547+
int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
1548+
u32 len, u64 flags);
1549+
#else /* CONFIG_NET */
1550+
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
1551+
void *to, u32 len)
1552+
{
1553+
return -EOPNOTSUPP;
1554+
}
1555+
1556+
static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
1557+
const void *from, u32 len, u64 flags)
1558+
{
1559+
return -EOPNOTSUPP;
1560+
}
1561+
#endif /* CONFIG_NET */
1562+
15451563
#endif /* __LINUX_FILTER_H__ */

include/uapi/linux/bpf.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5325,18 +5325,27 @@ union bpf_attr {
53255325
* Description
53265326
* Write *len* bytes from *src* into *dst*, starting from *offset*
53275327
* into *dst*.
5328-
* *flags* is currently unused.
5328+
*
5329+
* *flags* must be 0 except for skb-type dynptrs.
5330+
*
5331+
* For skb-type dynptrs:
5332+
* * For *flags*, please see the flags accepted by
5333+
* **bpf_skb_store_bytes**\ ().
53295334
* Return
53305335
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
53315336
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
5332-
* is a read-only dynptr or if *flags* is not 0.
5337+
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
5338+
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
53335339
*
53345340
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
53355341
* Description
53365342
* Get a pointer to the underlying dynptr data.
53375343
*
53385344
* *len* must be a statically known value. The returned data slice
53395345
* is invalidated whenever the dynptr is invalidated.
5346+
*
5347+
* skb type dynptrs may not use bpf_dynptr_data. They should
5348+
* instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
53405349
* Return
53415350
* Pointer to the underlying dynptr data, NULL if the dynptr is
53425351
* read-only, if the dynptr is invalid, or if the offset and length

kernel/bpf/btf.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ enum btf_kfunc_hook {
207207
BTF_KFUNC_HOOK_TRACING,
208208
BTF_KFUNC_HOOK_SYSCALL,
209209
BTF_KFUNC_HOOK_FMODRET,
210+
BTF_KFUNC_HOOK_CGROUP_SKB,
211+
BTF_KFUNC_HOOK_SCHED_ACT,
212+
BTF_KFUNC_HOOK_SK_SKB,
213+
BTF_KFUNC_HOOK_SOCKET_FILTER,
214+
BTF_KFUNC_HOOK_LWT,
210215
BTF_KFUNC_HOOK_MAX,
211216
};
212217

@@ -7708,6 +7713,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
77087713
return BTF_KFUNC_HOOK_TRACING;
77097714
case BPF_PROG_TYPE_SYSCALL:
77107715
return BTF_KFUNC_HOOK_SYSCALL;
7716+
case BPF_PROG_TYPE_CGROUP_SKB:
7717+
return BTF_KFUNC_HOOK_CGROUP_SKB;
7718+
case BPF_PROG_TYPE_SCHED_ACT:
7719+
return BTF_KFUNC_HOOK_SCHED_ACT;
7720+
case BPF_PROG_TYPE_SK_SKB:
7721+
return BTF_KFUNC_HOOK_SK_SKB;
7722+
case BPF_PROG_TYPE_SOCKET_FILTER:
7723+
return BTF_KFUNC_HOOK_SOCKET_FILTER;
7724+
case BPF_PROG_TYPE_LWT_OUT:
7725+
case BPF_PROG_TYPE_LWT_IN:
7726+
case BPF_PROG_TYPE_LWT_XMIT:
7727+
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
7728+
return BTF_KFUNC_HOOK_LWT;
77117729
default:
77127730
return BTF_KFUNC_HOOK_MAX;
77137731
}

kernel/bpf/helpers.c

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,11 +1420,21 @@ static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
14201420
return ptr->size & DYNPTR_RDONLY_BIT;
14211421
}
14221422

1423+
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
1424+
{
1425+
ptr->size |= DYNPTR_RDONLY_BIT;
1426+
}
1427+
14231428
static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
14241429
{
14251430
ptr->size |= type << DYNPTR_TYPE_SHIFT;
14261431
}
14271432

1433+
static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
1434+
{
1435+
return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
1436+
}
1437+
14281438
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
14291439
{
14301440
return ptr->size & DYNPTR_SIZE_MASK;
@@ -1497,6 +1507,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
14971507
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
14981508
u32, offset, u64, flags)
14991509
{
1510+
enum bpf_dynptr_type type;
15001511
int err;
15011512

15021513
if (!src->data || flags)
@@ -1506,13 +1517,23 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern
15061517
if (err)
15071518
return err;
15081519

1509-
/* Source and destination may possibly overlap, hence use memmove to
1510-
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1511-
* pointing to overlapping PTR_TO_MAP_VALUE regions.
1512-
*/
1513-
memmove(dst, src->data + src->offset + offset, len);
1520+
type = bpf_dynptr_get_type(src);
15141521

1515-
return 0;
1522+
switch (type) {
1523+
case BPF_DYNPTR_TYPE_LOCAL:
1524+
case BPF_DYNPTR_TYPE_RINGBUF:
1525+
/* Source and destination may possibly overlap, hence use memmove to
1526+
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1527+
* pointing to overlapping PTR_TO_MAP_VALUE regions.
1528+
*/
1529+
memmove(dst, src->data + src->offset + offset, len);
1530+
return 0;
1531+
case BPF_DYNPTR_TYPE_SKB:
1532+
return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
1533+
default:
1534+
WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
1535+
return -EFAULT;
1536+
}
15161537
}
15171538

15181539
static const struct bpf_func_proto bpf_dynptr_read_proto = {
@@ -1529,22 +1550,36 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
15291550
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
15301551
u32, len, u64, flags)
15311552
{
1553+
enum bpf_dynptr_type type;
15321554
int err;
15331555

1534-
if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
1556+
if (!dst->data || bpf_dynptr_is_rdonly(dst))
15351557
return -EINVAL;
15361558

15371559
err = bpf_dynptr_check_off_len(dst, offset, len);
15381560
if (err)
15391561
return err;
15401562

1541-
/* Source and destination may possibly overlap, hence use memmove to
1542-
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1543-
* pointing to overlapping PTR_TO_MAP_VALUE regions.
1544-
*/
1545-
memmove(dst->data + dst->offset + offset, src, len);
1563+
type = bpf_dynptr_get_type(dst);
15461564

1547-
return 0;
1565+
switch (type) {
1566+
case BPF_DYNPTR_TYPE_LOCAL:
1567+
case BPF_DYNPTR_TYPE_RINGBUF:
1568+
if (flags)
1569+
return -EINVAL;
1570+
/* Source and destination may possibly overlap, hence use memmove to
1571+
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
1572+
* pointing to overlapping PTR_TO_MAP_VALUE regions.
1573+
*/
1574+
memmove(dst->data + dst->offset + offset, src, len);
1575+
return 0;
1576+
case BPF_DYNPTR_TYPE_SKB:
1577+
return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
1578+
flags);
1579+
default:
1580+
WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
1581+
return -EFAULT;
1582+
}
15481583
}
15491584

15501585
static const struct bpf_func_proto bpf_dynptr_write_proto = {
@@ -1560,6 +1595,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
15601595

15611596
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
15621597
{
1598+
enum bpf_dynptr_type type;
15631599
int err;
15641600

15651601
if (!ptr->data)
@@ -1572,7 +1608,19 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
15721608
if (bpf_dynptr_is_rdonly(ptr))
15731609
return 0;
15741610

1575-
return (unsigned long)(ptr->data + ptr->offset + offset);
1611+
type = bpf_dynptr_get_type(ptr);
1612+
1613+
switch (type) {
1614+
case BPF_DYNPTR_TYPE_LOCAL:
1615+
case BPF_DYNPTR_TYPE_RINGBUF:
1616+
return (unsigned long)(ptr->data + ptr->offset + offset);
1617+
case BPF_DYNPTR_TYPE_SKB:
1618+
/* skb dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
1619+
return 0;
1620+
default:
1621+
WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
1622+
return 0;
1623+
}
15761624
}
15771625

15781626
static const struct bpf_func_proto bpf_dynptr_data_proto = {

kernel/bpf/verifier.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
750750
return BPF_DYNPTR_TYPE_LOCAL;
751751
case DYNPTR_TYPE_RINGBUF:
752752
return BPF_DYNPTR_TYPE_RINGBUF;
753+
case DYNPTR_TYPE_SKB:
754+
return BPF_DYNPTR_TYPE_SKB;
753755
default:
754756
return BPF_DYNPTR_TYPE_INVALID;
755757
}
@@ -6295,6 +6297,9 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
62956297
case DYNPTR_TYPE_RINGBUF:
62966298
err_extra = "ringbuf";
62976299
break;
6300+
case DYNPTR_TYPE_SKB:
6301+
err_extra = "skb ";
6302+
break;
62986303
default:
62996304
err_extra = "<unknown>";
63006305
break;
@@ -6737,6 +6742,24 @@ static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state
67376742
return state->stack[spi].spilled_ptr.ref_obj_id;
67386743
}
67396744

6745+
static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
6746+
struct bpf_reg_state *reg)
6747+
{
6748+
struct bpf_func_state *state = func(env, reg);
6749+
int spi;
6750+
6751+
if (reg->type == CONST_PTR_TO_DYNPTR)
6752+
return reg->dynptr.type;
6753+
6754+
spi = __get_spi(reg->off);
6755+
if (spi < 0) {
6756+
verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
6757+
return BPF_DYNPTR_TYPE_INVALID;
6758+
}
6759+
6760+
return state->stack[spi].spilled_ptr.dynptr.type;
6761+
}
6762+
67406763
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
67416764
struct bpf_call_arg_meta *meta,
67426765
const struct bpf_func_proto *fn,
@@ -8383,6 +8406,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
83838406

83848407
break;
83858408
}
8409+
case BPF_FUNC_dynptr_write:
8410+
{
8411+
enum bpf_dynptr_type dynptr_type;
8412+
struct bpf_reg_state *reg;
8413+
8414+
reg = get_dynptr_arg_reg(env, fn, regs);
8415+
if (!reg)
8416+
return -EFAULT;
8417+
8418+
dynptr_type = dynptr_get_type(env, reg);
8419+
if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
8420+
return -EFAULT;
8421+
8422+
if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
8423+
/* this will trigger clear_all_pkt_pointers(), which will
8424+
* invalidate all dynptr slices associated with the skb
8425+
*/
8426+
changes_data = true;
8427+
8428+
break;
8429+
}
83868430
case BPF_FUNC_user_ringbuf_drain:
83878431
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
83888432
set_user_ringbuf_callback_state);
@@ -8898,6 +8942,7 @@ enum special_kfunc_type {
88988942
KF_bpf_rbtree_remove,
88998943
KF_bpf_rbtree_add,
89008944
KF_bpf_rbtree_first,
8945+
KF_bpf_dynptr_from_skb,
89018946
};
89028947

89038948
BTF_SET_START(special_kfunc_set)
@@ -8912,6 +8957,7 @@ BTF_ID(func, bpf_rdonly_cast)
89128957
BTF_ID(func, bpf_rbtree_remove)
89138958
BTF_ID(func, bpf_rbtree_add)
89148959
BTF_ID(func, bpf_rbtree_first)
8960+
BTF_ID(func, bpf_dynptr_from_skb)
89158961
BTF_SET_END(special_kfunc_set)
89168962

89178963
BTF_ID_LIST(special_kfunc_list)
@@ -8928,6 +8974,7 @@ BTF_ID(func, bpf_rcu_read_unlock)
89288974
BTF_ID(func, bpf_rbtree_remove)
89298975
BTF_ID(func, bpf_rbtree_add)
89308976
BTF_ID(func, bpf_rbtree_first)
8977+
BTF_ID(func, bpf_dynptr_from_skb)
89318978

89328979
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
89338980
{
@@ -9682,6 +9729,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
96829729
if (is_kfunc_arg_uninit(btf, &args[i]))
96839730
dynptr_arg_type |= MEM_UNINIT;
96849731

9732+
if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
9733+
dynptr_arg_type |= DYNPTR_TYPE_SKB;
9734+
96859735
ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
96869736
if (ret < 0)
96879737
return ret;
@@ -16356,6 +16406,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
1635616406
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
1635716407
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
1635816408
*cnt = 1;
16409+
} else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
16410+
bool seen_direct_write = env->seen_direct_write;
16411+
bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
16412+
16413+
if (is_rdonly)
16414+
insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
16415+
16416+
/* restore env->seen_direct_write to its original value, since
16417+
* may_access_direct_pkt_data mutates it
16418+
*/
16419+
env->seen_direct_write = seen_direct_write;
1635916420
}
1636016421
return 0;
1636116422
}

0 commit comments

Comments
 (0)