Skip to content

Commit e5cd3ab

Browse files
jrfastabborkmann
authored andcommitted
bpf: sockmap, refactor sockmap routines to work with hashmap
This patch only refactors the existing sockmap code. This will allow much of the psock initialization code path and bpf helper codes to work for both sockmap bpf map types that are backed by an array, the currently supported type, and the new hash backed bpf map type sockhash. Most the fallout comes from three changes, - Pushing bpf programs into an independent structure so we can use it from the htab struct in the next patch. - Generalizing helpers to use void *key instead of the hardcoded u32. - Instead of passing map/key through the metadata we now do the lookup inline. This avoids storing the key in the metadata which will be useful when keys can be longer than 4 bytes. We rename the sk pointers to sk_redir at this point as well to avoid any confusion between the current sk pointer and the redirect pointer sk_redir. Signed-off-by: John Fastabend <[email protected]> Acked-by: David S. Miller <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent f2467c2 commit e5cd3ab

File tree

4 files changed

+98
-87
lines changed

4 files changed

+98
-87
lines changed

include/linux/filter.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -515,9 +515,8 @@ struct sk_msg_buff {
515515
int sg_end;
516516
struct scatterlist sg_data[MAX_SKB_FRAGS];
517517
bool sg_copy[MAX_SKB_FRAGS];
518-
__u32 key;
519518
__u32 flags;
520-
struct bpf_map *map;
519+
struct sock *sk_redir;
521520
struct sk_buff *skb;
522521
struct list_head list;
523522
};

include/net/tcp.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -814,9 +814,8 @@ struct tcp_skb_cb {
814814
#endif
815815
} header; /* For incoming skbs */
816816
struct {
817-
__u32 key;
818817
__u32 flags;
819-
struct bpf_map *map;
818+
struct sock *sk_redir;
820819
void *data_end;
821820
} bpf;
822821
};

kernel/bpf/sockmap.c

Lines changed: 88 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,18 @@
4848
#define SOCK_CREATE_FLAG_MASK \
4949
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
5050

51-
struct bpf_stab {
52-
struct bpf_map map;
53-
struct sock **sock_map;
51+
struct bpf_sock_progs {
5452
struct bpf_prog *bpf_tx_msg;
5553
struct bpf_prog *bpf_parse;
5654
struct bpf_prog *bpf_verdict;
5755
};
5856

57+
struct bpf_stab {
58+
struct bpf_map map;
59+
struct sock **sock_map;
60+
struct bpf_sock_progs progs;
61+
};
62+
5963
enum smap_psock_state {
6064
SMAP_TX_RUNNING,
6165
};
@@ -461,7 +465,7 @@ static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
461465
static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
462466
{
463467
return ((_rc == SK_PASS) ?
464-
(md->map ? __SK_REDIRECT : __SK_PASS) :
468+
(md->sk_redir ? __SK_REDIRECT : __SK_PASS) :
465469
__SK_DROP);
466470
}
467471

@@ -1092,7 +1096,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
10921096
* when we orphan the skb so that we don't have the possibility
10931097
* to reference a stale map.
10941098
*/
1095-
TCP_SKB_CB(skb)->bpf.map = NULL;
1099+
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
10961100
skb->sk = psock->sock;
10971101
bpf_compute_data_pointers(skb);
10981102
preempt_disable();
@@ -1102,7 +1106,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
11021106

11031107
/* Moving return codes from UAPI namespace into internal namespace */
11041108
return rc == SK_PASS ?
1105-
(TCP_SKB_CB(skb)->bpf.map ? __SK_REDIRECT : __SK_PASS) :
1109+
(TCP_SKB_CB(skb)->bpf.sk_redir ? __SK_REDIRECT : __SK_PASS) :
11061110
__SK_DROP;
11071111
}
11081112

@@ -1372,7 +1376,6 @@ static int smap_init_sock(struct smap_psock *psock,
13721376
}
13731377

13741378
static void smap_init_progs(struct smap_psock *psock,
1375-
struct bpf_stab *stab,
13761379
struct bpf_prog *verdict,
13771380
struct bpf_prog *parse)
13781381
{
@@ -1450,14 +1453,13 @@ static void smap_gc_work(struct work_struct *w)
14501453
kfree(psock);
14511454
}
14521455

1453-
static struct smap_psock *smap_init_psock(struct sock *sock,
1454-
struct bpf_stab *stab)
1456+
static struct smap_psock *smap_init_psock(struct sock *sock, int node)
14551457
{
14561458
struct smap_psock *psock;
14571459

14581460
psock = kzalloc_node(sizeof(struct smap_psock),
14591461
GFP_ATOMIC | __GFP_NOWARN,
1460-
stab->map.numa_node);
1462+
node);
14611463
if (!psock)
14621464
return ERR_PTR(-ENOMEM);
14631465

@@ -1662,60 +1664,46 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
16621664
* - sock_map must use READ_ONCE and (cmp)xchg operations
16631665
* - BPF verdict/parse programs must use READ_ONCE and xchg operations
16641666
*/
1665-
static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
1666-
struct bpf_map *map,
1667-
void *key, u64 flags)
1667+
1668+
static int __sock_map_ctx_update_elem(struct bpf_map *map,
1669+
struct bpf_sock_progs *progs,
1670+
struct sock *sock,
1671+
struct sock **map_link,
1672+
void *key)
16681673
{
1669-
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
1670-
struct smap_psock_map_entry *e = NULL;
16711674
struct bpf_prog *verdict, *parse, *tx_msg;
1672-
struct sock *osock, *sock;
1675+
struct smap_psock_map_entry *e = NULL;
16731676
struct smap_psock *psock;
1674-
u32 i = *(u32 *)key;
16751677
bool new = false;
16761678
int err;
16771679

1678-
if (unlikely(flags > BPF_EXIST))
1679-
return -EINVAL;
1680-
1681-
if (unlikely(i >= stab->map.max_entries))
1682-
return -E2BIG;
1683-
1684-
sock = READ_ONCE(stab->sock_map[i]);
1685-
if (flags == BPF_EXIST && !sock)
1686-
return -ENOENT;
1687-
else if (flags == BPF_NOEXIST && sock)
1688-
return -EEXIST;
1689-
1690-
sock = skops->sk;
1691-
16921680
/* 1. If sock map has BPF programs those will be inherited by the
16931681
* sock being added. If the sock is already attached to BPF programs
16941682
* this results in an error.
16951683
*/
1696-
verdict = READ_ONCE(stab->bpf_verdict);
1697-
parse = READ_ONCE(stab->bpf_parse);
1698-
tx_msg = READ_ONCE(stab->bpf_tx_msg);
1684+
verdict = READ_ONCE(progs->bpf_verdict);
1685+
parse = READ_ONCE(progs->bpf_parse);
1686+
tx_msg = READ_ONCE(progs->bpf_tx_msg);
16991687

17001688
if (parse && verdict) {
17011689
/* bpf prog refcnt may be zero if a concurrent attach operation
17021690
* removes the program after the above READ_ONCE() but before
17031691
* we increment the refcnt. If this is the case abort with an
17041692
* error.
17051693
*/
1706-
verdict = bpf_prog_inc_not_zero(stab->bpf_verdict);
1694+
verdict = bpf_prog_inc_not_zero(progs->bpf_verdict);
17071695
if (IS_ERR(verdict))
17081696
return PTR_ERR(verdict);
17091697

1710-
parse = bpf_prog_inc_not_zero(stab->bpf_parse);
1698+
parse = bpf_prog_inc_not_zero(progs->bpf_parse);
17111699
if (IS_ERR(parse)) {
17121700
bpf_prog_put(verdict);
17131701
return PTR_ERR(parse);
17141702
}
17151703
}
17161704

17171705
if (tx_msg) {
1718-
tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg);
1706+
tx_msg = bpf_prog_inc_not_zero(progs->bpf_tx_msg);
17191707
if (IS_ERR(tx_msg)) {
17201708
if (verdict)
17211709
bpf_prog_put(verdict);
@@ -1748,7 +1736,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
17481736
goto out_progs;
17491737
}
17501738
} else {
1751-
psock = smap_init_psock(sock, stab);
1739+
psock = smap_init_psock(sock, map->numa_node);
17521740
if (IS_ERR(psock)) {
17531741
err = PTR_ERR(psock);
17541742
goto out_progs;
@@ -1763,7 +1751,6 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
17631751
err = -ENOMEM;
17641752
goto out_progs;
17651753
}
1766-
e->entry = &stab->sock_map[i];
17671754

17681755
/* 3. At this point we have a reference to a valid psock that is
17691756
* running. Attach any BPF programs needed.
@@ -1780,7 +1767,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
17801767
err = smap_init_sock(psock, sock);
17811768
if (err)
17821769
goto out_free;
1783-
smap_init_progs(psock, stab, verdict, parse);
1770+
smap_init_progs(psock, verdict, parse);
17841771
smap_start_sock(psock, sock);
17851772
}
17861773

@@ -1789,19 +1776,12 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
17891776
* it with. Because we can only have a single set of programs if
17901777
* old_sock has a strp we can stop it.
17911778
*/
1792-
list_add_tail(&e->list, &psock->maps);
1793-
write_unlock_bh(&sock->sk_callback_lock);
1794-
1795-
osock = xchg(&stab->sock_map[i], sock);
1796-
if (osock) {
1797-
struct smap_psock *opsock = smap_psock_sk(osock);
1798-
1799-
write_lock_bh(&osock->sk_callback_lock);
1800-
smap_list_remove(opsock, &stab->sock_map[i]);
1801-
smap_release_sock(opsock, osock);
1802-
write_unlock_bh(&osock->sk_callback_lock);
1779+
if (map_link) {
1780+
e->entry = map_link;
1781+
list_add_tail(&e->list, &psock->maps);
18031782
}
1804-
return 0;
1783+
write_unlock_bh(&sock->sk_callback_lock);
1784+
return err;
18051785
out_free:
18061786
smap_release_sock(psock, sock);
18071787
out_progs:
@@ -1816,23 +1796,69 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
18161796
return err;
18171797
}
18181798

1819-
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
1799+
static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
1800+
struct bpf_map *map,
1801+
void *key, u64 flags)
18201802
{
18211803
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
1804+
struct bpf_sock_progs *progs = &stab->progs;
1805+
struct sock *osock, *sock;
1806+
u32 i = *(u32 *)key;
1807+
int err;
1808+
1809+
if (unlikely(flags > BPF_EXIST))
1810+
return -EINVAL;
1811+
1812+
if (unlikely(i >= stab->map.max_entries))
1813+
return -E2BIG;
1814+
1815+
sock = READ_ONCE(stab->sock_map[i]);
1816+
if (flags == BPF_EXIST && !sock)
1817+
return -ENOENT;
1818+
else if (flags == BPF_NOEXIST && sock)
1819+
return -EEXIST;
1820+
1821+
sock = skops->sk;
1822+
err = __sock_map_ctx_update_elem(map, progs, sock, &stab->sock_map[i],
1823+
key);
1824+
if (err)
1825+
goto out;
1826+
1827+
osock = xchg(&stab->sock_map[i], sock);
1828+
if (osock) {
1829+
struct smap_psock *opsock = smap_psock_sk(osock);
1830+
1831+
write_lock_bh(&osock->sk_callback_lock);
1832+
smap_list_remove(opsock, &stab->sock_map[i]);
1833+
smap_release_sock(opsock, osock);
1834+
write_unlock_bh(&osock->sk_callback_lock);
1835+
}
1836+
out:
1837+
return 0;
1838+
}
1839+
1840+
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
1841+
{
1842+
struct bpf_sock_progs *progs;
18221843
struct bpf_prog *orig;
18231844

1824-
if (unlikely(map->map_type != BPF_MAP_TYPE_SOCKMAP))
1845+
if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
1846+
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
1847+
1848+
progs = &stab->progs;
1849+
} else {
18251850
return -EINVAL;
1851+
}
18261852

18271853
switch (type) {
18281854
case BPF_SK_MSG_VERDICT:
1829-
orig = xchg(&stab->bpf_tx_msg, prog);
1855+
orig = xchg(&progs->bpf_tx_msg, prog);
18301856
break;
18311857
case BPF_SK_SKB_STREAM_PARSER:
1832-
orig = xchg(&stab->bpf_parse, prog);
1858+
orig = xchg(&progs->bpf_parse, prog);
18331859
break;
18341860
case BPF_SK_SKB_STREAM_VERDICT:
1835-
orig = xchg(&stab->bpf_verdict, prog);
1861+
orig = xchg(&progs->bpf_verdict, prog);
18361862
break;
18371863
default:
18381864
return -EOPNOTSUPP;
@@ -1881,16 +1907,18 @@ static int sock_map_update_elem(struct bpf_map *map,
18811907
static void sock_map_release(struct bpf_map *map)
18821908
{
18831909
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
1910+
struct bpf_sock_progs *progs;
18841911
struct bpf_prog *orig;
18851912

1886-
orig = xchg(&stab->bpf_parse, NULL);
1913+
progs = &stab->progs;
1914+
orig = xchg(&progs->bpf_parse, NULL);
18871915
if (orig)
18881916
bpf_prog_put(orig);
1889-
orig = xchg(&stab->bpf_verdict, NULL);
1917+
orig = xchg(&progs->bpf_verdict, NULL);
18901918
if (orig)
18911919
bpf_prog_put(orig);
18921920

1893-
orig = xchg(&stab->bpf_tx_msg, NULL);
1921+
orig = xchg(&progs->bpf_tx_msg, NULL);
18941922
if (orig)
18951923
bpf_prog_put(orig);
18961924
}

net/core/filter.c

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,26 +2083,19 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
20832083
if (unlikely(flags & ~(BPF_F_INGRESS)))
20842084
return SK_DROP;
20852085

2086-
tcb->bpf.key = key;
20872086
tcb->bpf.flags = flags;
2088-
tcb->bpf.map = map;
2087+
tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
2088+
if (!tcb->bpf.sk_redir)
2089+
return SK_DROP;
20892090

20902091
return SK_PASS;
20912092
}
20922093

20932094
struct sock *do_sk_redirect_map(struct sk_buff *skb)
20942095
{
20952096
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2096-
struct sock *sk = NULL;
2097-
2098-
if (tcb->bpf.map) {
2099-
sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
21002097

2101-
tcb->bpf.key = 0;
2102-
tcb->bpf.map = NULL;
2103-
}
2104-
2105-
return sk;
2098+
return tcb->bpf.sk_redir;
21062099
}
21072100

21082101
static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
@@ -2122,25 +2115,17 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
21222115
if (unlikely(flags & ~(BPF_F_INGRESS)))
21232116
return SK_DROP;
21242117

2125-
msg->key = key;
21262118
msg->flags = flags;
2127-
msg->map = map;
2119+
msg->sk_redir = __sock_map_lookup_elem(map, key);
2120+
if (!msg->sk_redir)
2121+
return SK_DROP;
21282122

21292123
return SK_PASS;
21302124
}
21312125

21322126
struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
21332127
{
2134-
struct sock *sk = NULL;
2135-
2136-
if (msg->map) {
2137-
sk = __sock_map_lookup_elem(msg->map, msg->key);
2138-
2139-
msg->key = 0;
2140-
msg->map = NULL;
2141-
}
2142-
2143-
return sk;
2128+
return msg->sk_redir;
21442129
}
21452130

21462131
static const struct bpf_func_proto bpf_msg_redirect_map_proto = {

0 commit comments

Comments
 (0)