Skip to content

Commit 1aa12bd

Browse files
jrfastabborkmann
authored andcommitted
bpf: sockmap, add sock close() hook to remove socks
The selftests test_maps program was leaving dangling BPF sockmap programs around because not all psock elements were removed from the map. The elements in turn hold a reference on the BPF program they are attached to causing BPF programs to stay open even after test_maps has completed. The original intent was that sk_state_change() would be called when TCP socks went through TCP_CLOSE state. However, because socks may be in SOCK_DEAD state or the sock may be a listening socket the event is not always triggered. To resolve this use the ULP infrastructure and register our own proto close() handler. This fixes the above case. Fixes: 174a79f ("bpf: sockmap with sk redirect support") Reported-by: Prashant Bhole <[email protected]> Signed-off-by: John Fastabend <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent b11a632 commit 1aa12bd

File tree

2 files changed

+103
-67
lines changed

2 files changed

+103
-67
lines changed

include/net/tcp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1985,6 +1985,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
19851985

19861986
enum {
19871987
TCP_ULP_TLS,
1988+
TCP_ULP_BPF,
19881989
};
19891990

19901991
struct tcp_ulp_ops {
@@ -2003,6 +2004,7 @@ struct tcp_ulp_ops {
20032004
int tcp_register_ulp(struct tcp_ulp_ops *type);
20042005
void tcp_unregister_ulp(struct tcp_ulp_ops *type);
20052006
int tcp_set_ulp(struct sock *sk, const char *name);
2007+
int tcp_set_ulp_id(struct sock *sk, const int ulp);
20062008
void tcp_get_available_ulp(char *buf, size_t len);
20072009
void tcp_cleanup_ulp(struct sock *sk);
20082010

kernel/bpf/sockmap.c

Lines changed: 101 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -86,22 +86,113 @@ struct smap_psock {
8686
struct work_struct tx_work;
8787
struct work_struct gc_work;
8888

89+
struct proto *sk_proto;
90+
void (*save_close)(struct sock *sk, long timeout);
8991
void (*save_data_ready)(struct sock *sk);
9092
void (*save_write_space)(struct sock *sk);
91-
void (*save_state_change)(struct sock *sk);
9293
};
9394

9495
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
9596
{
9697
return rcu_dereference_sk_user_data(sk);
9798
}
9899

100+
static struct proto tcp_bpf_proto;
101+
static int bpf_tcp_init(struct sock *sk)
102+
{
103+
struct smap_psock *psock;
104+
105+
rcu_read_lock();
106+
psock = smap_psock_sk(sk);
107+
if (unlikely(!psock)) {
108+
rcu_read_unlock();
109+
return -EINVAL;
110+
}
111+
112+
if (unlikely(psock->sk_proto)) {
113+
rcu_read_unlock();
114+
return -EBUSY;
115+
}
116+
117+
psock->save_close = sk->sk_prot->close;
118+
psock->sk_proto = sk->sk_prot;
119+
sk->sk_prot = &tcp_bpf_proto;
120+
rcu_read_unlock();
121+
return 0;
122+
}
123+
124+
static void bpf_tcp_release(struct sock *sk)
125+
{
126+
struct smap_psock *psock;
127+
128+
rcu_read_lock();
129+
psock = smap_psock_sk(sk);
130+
131+
if (likely(psock)) {
132+
sk->sk_prot = psock->sk_proto;
133+
psock->sk_proto = NULL;
134+
}
135+
rcu_read_unlock();
136+
}
137+
138+
static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
139+
140+
static void bpf_tcp_close(struct sock *sk, long timeout)
141+
{
142+
void (*close_fun)(struct sock *sk, long timeout);
143+
struct smap_psock_map_entry *e, *tmp;
144+
struct smap_psock *psock;
145+
struct sock *osk;
146+
147+
rcu_read_lock();
148+
psock = smap_psock_sk(sk);
149+
if (unlikely(!psock)) {
150+
rcu_read_unlock();
151+
return sk->sk_prot->close(sk, timeout);
152+
}
153+
154+
/* The psock may be destroyed anytime after exiting the RCU critial
155+
* section so by the time we use close_fun the psock may no longer
156+
* be valid. However, bpf_tcp_close is called with the sock lock
157+
* held so the close hook and sk are still valid.
158+
*/
159+
close_fun = psock->save_close;
160+
161+
write_lock_bh(&sk->sk_callback_lock);
162+
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
163+
osk = cmpxchg(e->entry, sk, NULL);
164+
if (osk == sk) {
165+
list_del(&e->list);
166+
smap_release_sock(psock, sk);
167+
}
168+
}
169+
write_unlock_bh(&sk->sk_callback_lock);
170+
rcu_read_unlock();
171+
close_fun(sk, timeout);
172+
}
173+
99174
enum __sk_action {
100175
__SK_DROP = 0,
101176
__SK_PASS,
102177
__SK_REDIRECT,
103178
};
104179

180+
static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
181+
.name = "bpf_tcp",
182+
.uid = TCP_ULP_BPF,
183+
.user_visible = false,
184+
.owner = NULL,
185+
.init = bpf_tcp_init,
186+
.release = bpf_tcp_release,
187+
};
188+
189+
static int bpf_tcp_ulp_register(void)
190+
{
191+
tcp_bpf_proto = tcp_prot;
192+
tcp_bpf_proto.close = bpf_tcp_close;
193+
return tcp_register_ulp(&bpf_tcp_ulp_ops);
194+
}
195+
105196
static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
106197
{
107198
struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict);
@@ -166,68 +257,6 @@ static void smap_report_sk_error(struct smap_psock *psock, int err)
166257
sk->sk_error_report(sk);
167258
}
168259

169-
static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
170-
171-
/* Called with lock_sock(sk) held */
172-
static void smap_state_change(struct sock *sk)
173-
{
174-
struct smap_psock_map_entry *e, *tmp;
175-
struct smap_psock *psock;
176-
struct socket_wq *wq;
177-
struct sock *osk;
178-
179-
rcu_read_lock();
180-
181-
/* Allowing transitions into an established syn_recv states allows
182-
* for early binding sockets to a smap object before the connection
183-
* is established.
184-
*/
185-
switch (sk->sk_state) {
186-
case TCP_SYN_SENT:
187-
case TCP_SYN_RECV:
188-
case TCP_ESTABLISHED:
189-
break;
190-
case TCP_CLOSE_WAIT:
191-
case TCP_CLOSING:
192-
case TCP_LAST_ACK:
193-
case TCP_FIN_WAIT1:
194-
case TCP_FIN_WAIT2:
195-
case TCP_LISTEN:
196-
break;
197-
case TCP_CLOSE:
198-
/* Only release if the map entry is in fact the sock in
199-
* question. There is a case where the operator deletes
200-
* the sock from the map, but the TCP sock is closed before
201-
* the psock is detached. Use cmpxchg to verify correct
202-
* sock is removed.
203-
*/
204-
psock = smap_psock_sk(sk);
205-
if (unlikely(!psock))
206-
break;
207-
write_lock_bh(&sk->sk_callback_lock);
208-
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
209-
osk = cmpxchg(e->entry, sk, NULL);
210-
if (osk == sk) {
211-
list_del(&e->list);
212-
smap_release_sock(psock, sk);
213-
}
214-
}
215-
write_unlock_bh(&sk->sk_callback_lock);
216-
break;
217-
default:
218-
psock = smap_psock_sk(sk);
219-
if (unlikely(!psock))
220-
break;
221-
smap_report_sk_error(psock, EPIPE);
222-
break;
223-
}
224-
225-
wq = rcu_dereference(sk->sk_wq);
226-
if (skwq_has_sleeper(wq))
227-
wake_up_interruptible_all(&wq->wait);
228-
rcu_read_unlock();
229-
}
230-
231260
static void smap_read_sock_strparser(struct strparser *strp,
232261
struct sk_buff *skb)
233262
{
@@ -322,10 +351,8 @@ static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
322351
return;
323352
sk->sk_data_ready = psock->save_data_ready;
324353
sk->sk_write_space = psock->save_write_space;
325-
sk->sk_state_change = psock->save_state_change;
326354
psock->save_data_ready = NULL;
327355
psock->save_write_space = NULL;
328-
psock->save_state_change = NULL;
329356
strp_stop(&psock->strp);
330357
psock->strp_enabled = false;
331358
}
@@ -350,6 +377,7 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
350377
if (psock->refcnt)
351378
return;
352379

380+
tcp_cleanup_ulp(sock);
353381
smap_stop_sock(psock, sock);
354382
clear_bit(SMAP_TX_RUNNING, &psock->state);
355383
rcu_assign_sk_user_data(sock, NULL);
@@ -427,10 +455,8 @@ static void smap_start_sock(struct smap_psock *psock, struct sock *sk)
427455
return;
428456
psock->save_data_ready = sk->sk_data_ready;
429457
psock->save_write_space = sk->sk_write_space;
430-
psock->save_state_change = sk->sk_state_change;
431458
sk->sk_data_ready = smap_data_ready;
432459
sk->sk_write_space = smap_write_space;
433-
sk->sk_state_change = smap_state_change;
434460
psock->strp_enabled = true;
435461
}
436462

@@ -509,6 +535,10 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
509535
if (attr->value_size > KMALLOC_MAX_SIZE)
510536
return ERR_PTR(-E2BIG);
511537

538+
err = bpf_tcp_ulp_register();
539+
if (err && err != -EEXIST)
540+
return ERR_PTR(err);
541+
512542
stab = kzalloc(sizeof(*stab), GFP_USER);
513543
if (!stab)
514544
return ERR_PTR(-ENOMEM);
@@ -754,6 +784,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
754784
goto out_progs;
755785
}
756786

787+
err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
788+
if (err)
789+
goto out_progs;
790+
757791
set_bit(SMAP_TX_RUNNING, &psock->state);
758792
}
759793

0 commit comments

Comments
 (0)