Skip to content

Commit b05545e

Browse files
jrfastabborkmann
authored andcommitted
bpf: sockmap, fix transition through disconnect without close
It is possible (via shutdown()) for TCP socks to go trough TCP_CLOSE state via tcp_disconnect() without actually calling tcp_close which would then call our bpf_tcp_close() callback. Because of this a user could disconnect a socket then put it in a LISTEN state which would break our assumptions about sockets always being ESTABLISHED state. To resolve this rely on the unhash hook, which is called in the disconnect case, to remove the sock from the sockmap. Reported-by: Eric Dumazet <[email protected]> Fixes: 1aa12bd ("bpf: sockmap, add sock close() hook to remove socks") Signed-off-by: John Fastabend <[email protected]> Acked-by: Yonghong Song <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 5607fff commit b05545e

File tree

1 file changed

+41
-19
lines changed

1 file changed

+41
-19
lines changed

kernel/bpf/sockmap.c

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ struct smap_psock {
132132
struct work_struct gc_work;
133133

134134
struct proto *sk_proto;
135+
void (*save_unhash)(struct sock *sk);
135136
void (*save_close)(struct sock *sk, long timeout);
136137
void (*save_data_ready)(struct sock *sk);
137138
void (*save_write_space)(struct sock *sk);
@@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
143144
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
144145
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
145146
int offset, size_t size, int flags);
147+
static void bpf_tcp_unhash(struct sock *sk);
146148
static void bpf_tcp_close(struct sock *sk, long timeout);
147149

148150
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
@@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
184186
struct proto *base)
185187
{
186188
prot[SOCKMAP_BASE] = *base;
189+
prot[SOCKMAP_BASE].unhash = bpf_tcp_unhash;
187190
prot[SOCKMAP_BASE].close = bpf_tcp_close;
188191
prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
189192
prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
@@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk)
217220
return -EBUSY;
218221
}
219222

223+
psock->save_unhash = sk->sk_prot->unhash;
220224
psock->save_close = sk->sk_prot->close;
221225
psock->sk_proto = sk->sk_prot;
222226

@@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
305309
return e;
306310
}
307311

308-
static void bpf_tcp_close(struct sock *sk, long timeout)
312+
static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
309313
{
310-
void (*close_fun)(struct sock *sk, long timeout);
311314
struct smap_psock_map_entry *e;
312315
struct sk_msg_buff *md, *mtmp;
313-
struct smap_psock *psock;
314316
struct sock *osk;
315317

316-
lock_sock(sk);
317-
rcu_read_lock();
318-
psock = smap_psock_sk(sk);
319-
if (unlikely(!psock)) {
320-
rcu_read_unlock();
321-
release_sock(sk);
322-
return sk->sk_prot->close(sk, timeout);
323-
}
324-
325-
/* The psock may be destroyed anytime after exiting the RCU critial
326-
* section so by the time we use close_fun the psock may no longer
327-
* be valid. However, bpf_tcp_close is called with the sock lock
328-
* held so the close hook and sk are still valid.
329-
*/
330-
close_fun = psock->save_close;
331-
332318
if (psock->cork) {
333319
free_start_sg(psock->sock, psock->cork, true);
334320
kfree(psock->cork);
@@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
379365
kfree(e);
380366
e = psock_map_pop(sk, psock);
381367
}
368+
}
369+
370+
static void bpf_tcp_unhash(struct sock *sk)
371+
{
372+
void (*unhash_fun)(struct sock *sk);
373+
struct smap_psock *psock;
374+
375+
rcu_read_lock();
376+
psock = smap_psock_sk(sk);
377+
if (unlikely(!psock)) {
378+
rcu_read_unlock();
379+
if (sk->sk_prot->unhash)
380+
sk->sk_prot->unhash(sk);
381+
return;
382+
}
383+
unhash_fun = psock->save_unhash;
384+
bpf_tcp_remove(sk, psock);
385+
rcu_read_unlock();
386+
unhash_fun(sk);
387+
}
388+
389+
static void bpf_tcp_close(struct sock *sk, long timeout)
390+
{
391+
void (*close_fun)(struct sock *sk, long timeout);
392+
struct smap_psock *psock;
393+
394+
lock_sock(sk);
395+
rcu_read_lock();
396+
psock = smap_psock_sk(sk);
397+
if (unlikely(!psock)) {
398+
rcu_read_unlock();
399+
release_sock(sk);
400+
return sk->sk_prot->close(sk, timeout);
401+
}
402+
close_fun = psock->save_close;
403+
bpf_tcp_remove(sk, psock);
382404
rcu_read_unlock();
383405
release_sock(sk);
384406
close_fun(sk, timeout);

0 commit comments

Comments
 (0)