@@ -86,22 +86,113 @@ struct smap_psock {
86
86
struct work_struct tx_work ;
87
87
struct work_struct gc_work ;
88
88
89
+ struct proto * sk_proto ;
90
+ void (* save_close )(struct sock * sk , long timeout );
89
91
void (* save_data_ready )(struct sock * sk );
90
92
void (* save_write_space )(struct sock * sk );
91
- void (* save_state_change )(struct sock * sk );
92
93
};
93
94
94
95
static inline struct smap_psock * smap_psock_sk (const struct sock * sk )
95
96
{
96
97
return rcu_dereference_sk_user_data (sk );
97
98
}
98
99
100
+ static struct proto tcp_bpf_proto ;
101
+ static int bpf_tcp_init (struct sock * sk )
102
+ {
103
+ struct smap_psock * psock ;
104
+
105
+ rcu_read_lock ();
106
+ psock = smap_psock_sk (sk );
107
+ if (unlikely (!psock )) {
108
+ rcu_read_unlock ();
109
+ return - EINVAL ;
110
+ }
111
+
112
+ if (unlikely (psock -> sk_proto )) {
113
+ rcu_read_unlock ();
114
+ return - EBUSY ;
115
+ }
116
+
117
+ psock -> save_close = sk -> sk_prot -> close ;
118
+ psock -> sk_proto = sk -> sk_prot ;
119
+ sk -> sk_prot = & tcp_bpf_proto ;
120
+ rcu_read_unlock ();
121
+ return 0 ;
122
+ }
123
+
124
+ static void bpf_tcp_release (struct sock * sk )
125
+ {
126
+ struct smap_psock * psock ;
127
+
128
+ rcu_read_lock ();
129
+ psock = smap_psock_sk (sk );
130
+
131
+ if (likely (psock )) {
132
+ sk -> sk_prot = psock -> sk_proto ;
133
+ psock -> sk_proto = NULL ;
134
+ }
135
+ rcu_read_unlock ();
136
+ }
137
+
138
+ static void smap_release_sock (struct smap_psock * psock , struct sock * sock );
139
+
140
+ static void bpf_tcp_close (struct sock * sk , long timeout )
141
+ {
142
+ void (* close_fun )(struct sock * sk , long timeout );
143
+ struct smap_psock_map_entry * e , * tmp ;
144
+ struct smap_psock * psock ;
145
+ struct sock * osk ;
146
+
147
+ rcu_read_lock ();
148
+ psock = smap_psock_sk (sk );
149
+ if (unlikely (!psock )) {
150
+ rcu_read_unlock ();
151
+ return sk -> sk_prot -> close (sk , timeout );
152
+ }
153
+
154
+ /* The psock may be destroyed anytime after exiting the RCU critial
155
+ * section so by the time we use close_fun the psock may no longer
156
+ * be valid. However, bpf_tcp_close is called with the sock lock
157
+ * held so the close hook and sk are still valid.
158
+ */
159
+ close_fun = psock -> save_close ;
160
+
161
+ write_lock_bh (& sk -> sk_callback_lock );
162
+ list_for_each_entry_safe (e , tmp , & psock -> maps , list ) {
163
+ osk = cmpxchg (e -> entry , sk , NULL );
164
+ if (osk == sk ) {
165
+ list_del (& e -> list );
166
+ smap_release_sock (psock , sk );
167
+ }
168
+ }
169
+ write_unlock_bh (& sk -> sk_callback_lock );
170
+ rcu_read_unlock ();
171
+ close_fun (sk , timeout );
172
+ }
173
+
99
174
enum __sk_action {
100
175
__SK_DROP = 0 ,
101
176
__SK_PASS ,
102
177
__SK_REDIRECT ,
103
178
};
104
179
180
+ static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
181
+ .name = "bpf_tcp" ,
182
+ .uid = TCP_ULP_BPF ,
183
+ .user_visible = false,
184
+ .owner = NULL ,
185
+ .init = bpf_tcp_init ,
186
+ .release = bpf_tcp_release ,
187
+ };
188
+
189
+ static int bpf_tcp_ulp_register (void )
190
+ {
191
+ tcp_bpf_proto = tcp_prot ;
192
+ tcp_bpf_proto .close = bpf_tcp_close ;
193
+ return tcp_register_ulp (& bpf_tcp_ulp_ops );
194
+ }
195
+
105
196
static int smap_verdict_func (struct smap_psock * psock , struct sk_buff * skb )
106
197
{
107
198
struct bpf_prog * prog = READ_ONCE (psock -> bpf_verdict );
@@ -166,68 +257,6 @@ static void smap_report_sk_error(struct smap_psock *psock, int err)
166
257
sk -> sk_error_report (sk );
167
258
}
168
259
169
- static void smap_release_sock (struct smap_psock * psock , struct sock * sock );
170
-
171
- /* Called with lock_sock(sk) held */
172
- static void smap_state_change (struct sock * sk )
173
- {
174
- struct smap_psock_map_entry * e , * tmp ;
175
- struct smap_psock * psock ;
176
- struct socket_wq * wq ;
177
- struct sock * osk ;
178
-
179
- rcu_read_lock ();
180
-
181
- /* Allowing transitions into an established syn_recv states allows
182
- * for early binding sockets to a smap object before the connection
183
- * is established.
184
- */
185
- switch (sk -> sk_state ) {
186
- case TCP_SYN_SENT :
187
- case TCP_SYN_RECV :
188
- case TCP_ESTABLISHED :
189
- break ;
190
- case TCP_CLOSE_WAIT :
191
- case TCP_CLOSING :
192
- case TCP_LAST_ACK :
193
- case TCP_FIN_WAIT1 :
194
- case TCP_FIN_WAIT2 :
195
- case TCP_LISTEN :
196
- break ;
197
- case TCP_CLOSE :
198
- /* Only release if the map entry is in fact the sock in
199
- * question. There is a case where the operator deletes
200
- * the sock from the map, but the TCP sock is closed before
201
- * the psock is detached. Use cmpxchg to verify correct
202
- * sock is removed.
203
- */
204
- psock = smap_psock_sk (sk );
205
- if (unlikely (!psock ))
206
- break ;
207
- write_lock_bh (& sk -> sk_callback_lock );
208
- list_for_each_entry_safe (e , tmp , & psock -> maps , list ) {
209
- osk = cmpxchg (e -> entry , sk , NULL );
210
- if (osk == sk ) {
211
- list_del (& e -> list );
212
- smap_release_sock (psock , sk );
213
- }
214
- }
215
- write_unlock_bh (& sk -> sk_callback_lock );
216
- break ;
217
- default :
218
- psock = smap_psock_sk (sk );
219
- if (unlikely (!psock ))
220
- break ;
221
- smap_report_sk_error (psock , EPIPE );
222
- break ;
223
- }
224
-
225
- wq = rcu_dereference (sk -> sk_wq );
226
- if (skwq_has_sleeper (wq ))
227
- wake_up_interruptible_all (& wq -> wait );
228
- rcu_read_unlock ();
229
- }
230
-
231
260
static void smap_read_sock_strparser (struct strparser * strp ,
232
261
struct sk_buff * skb )
233
262
{
@@ -322,10 +351,8 @@ static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
322
351
return ;
323
352
sk -> sk_data_ready = psock -> save_data_ready ;
324
353
sk -> sk_write_space = psock -> save_write_space ;
325
- sk -> sk_state_change = psock -> save_state_change ;
326
354
psock -> save_data_ready = NULL ;
327
355
psock -> save_write_space = NULL ;
328
- psock -> save_state_change = NULL ;
329
356
strp_stop (& psock -> strp );
330
357
psock -> strp_enabled = false;
331
358
}
@@ -350,6 +377,7 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
350
377
if (psock -> refcnt )
351
378
return ;
352
379
380
+ tcp_cleanup_ulp (sock );
353
381
smap_stop_sock (psock , sock );
354
382
clear_bit (SMAP_TX_RUNNING , & psock -> state );
355
383
rcu_assign_sk_user_data (sock , NULL );
@@ -427,10 +455,8 @@ static void smap_start_sock(struct smap_psock *psock, struct sock *sk)
427
455
return ;
428
456
psock -> save_data_ready = sk -> sk_data_ready ;
429
457
psock -> save_write_space = sk -> sk_write_space ;
430
- psock -> save_state_change = sk -> sk_state_change ;
431
458
sk -> sk_data_ready = smap_data_ready ;
432
459
sk -> sk_write_space = smap_write_space ;
433
- sk -> sk_state_change = smap_state_change ;
434
460
psock -> strp_enabled = true;
435
461
}
436
462
@@ -509,6 +535,10 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
509
535
if (attr -> value_size > KMALLOC_MAX_SIZE )
510
536
return ERR_PTR (- E2BIG );
511
537
538
+ err = bpf_tcp_ulp_register ();
539
+ if (err && err != - EEXIST )
540
+ return ERR_PTR (err );
541
+
512
542
stab = kzalloc (sizeof (* stab ), GFP_USER );
513
543
if (!stab )
514
544
return ERR_PTR (- ENOMEM );
@@ -754,6 +784,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
754
784
goto out_progs ;
755
785
}
756
786
787
+ err = tcp_set_ulp_id (sock , TCP_ULP_BPF );
788
+ if (err )
789
+ goto out_progs ;
790
+
757
791
set_bit (SMAP_TX_RUNNING , & psock -> state );
758
792
}
759
793
0 commit comments