Skip to content

Commit 91b5b21

Browse files
Lawrence Brakmodavem330
authored andcommitted
bpf: Add support for changing congestion control
Added support for changing congestion control for SOCK_OPS bpf programs through the setsockopt bpf helper function. It also adds a new SOCK_OPS op, BPF_SOCK_OPS_NEEDS_ECN, that is needed for congestion controls, like dctcp, that need to enable ECN in the SYN packets. Signed-off-by: Lawrence Brakmo <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent d992536 commit 91b5b21

File tree

7 files changed

+58
-17
lines changed

7 files changed

+58
-17
lines changed

include/net/tcp.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
10041004
void tcp_get_available_congestion_control(char *buf, size_t len);
10051005
void tcp_get_allowed_congestion_control(char *buf, size_t len);
10061006
int tcp_set_allowed_congestion_control(char *allowed);
1007-
int tcp_set_congestion_control(struct sock *sk, const char *name);
1007+
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
1008+
void tcp_reinit_congestion_control(struct sock *sk,
1009+
const struct tcp_congestion_ops *ca);
10081010
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
10091011
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
10101012

@@ -2078,4 +2080,9 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
20782080
rwnd = 0;
20792081
return rwnd;
20802082
}
2083+
2084+
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
2085+
{
2086+
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
2087+
}
20812088
#endif /* _TCP_H */

include/uapi/linux/bpf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,9 @@ enum {
778778
* passive connection is
779779
* established
780780
*/
781+
BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
782+
* needs ECN
783+
*/
781784
};
782785

783786
#endif /* _UAPI__LINUX_BPF_H__ */

net/core/filter.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2719,8 +2719,24 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
27192719
}
27202720
} else if (level == SOL_TCP &&
27212721
sk->sk_prot->setsockopt == tcp_setsockopt) {
2722-
/* Place holder */
2722+
#ifdef CONFIG_INET
2723+
if (optname == TCP_CONGESTION) {
2724+
char name[TCP_CA_NAME_MAX];
2725+
2726+
strncpy(name, optval, min_t(long, optlen,
2727+
TCP_CA_NAME_MAX-1));
2728+
name[TCP_CA_NAME_MAX-1] = 0;
2729+
ret = tcp_set_congestion_control(sk, name, false);
2730+
if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
2731+
/* replacing an existing ca */
2732+
tcp_reinit_congestion_control(sk,
2733+
inet_csk(sk)->icsk_ca_ops);
2734+
} else {
2735+
ret = -EINVAL;
2736+
}
2737+
#else
27232738
ret = -EINVAL;
2739+
#endif
27242740
} else {
27252741
ret = -EINVAL;
27262742
}

net/ipv4/tcp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
24812481
name[val] = 0;
24822482

24832483
lock_sock(sk);
2484-
err = tcp_set_congestion_control(sk, name);
2484+
err = tcp_set_congestion_control(sk, name, true);
24852485
release_sock(sk);
24862486
return err;
24872487
}

net/ipv4/tcp_cong.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
189189
INET_ECN_dontxmit(sk);
190190
}
191191

192-
static void tcp_reinit_congestion_control(struct sock *sk,
193-
const struct tcp_congestion_ops *ca)
192+
void tcp_reinit_congestion_control(struct sock *sk,
193+
const struct tcp_congestion_ops *ca)
194194
{
195195
struct inet_connection_sock *icsk = inet_csk(sk);
196196

@@ -333,8 +333,12 @@ int tcp_set_allowed_congestion_control(char *val)
333333
return ret;
334334
}
335335

336-
/* Change congestion control for socket */
337-
int tcp_set_congestion_control(struct sock *sk, const char *name)
336+
/* Change congestion control for socket. If load is false, then it is the
337+
* responsibility of the caller to call tcp_init_congestion_control or
338+
* tcp_reinit_congestion_control (if the current congestion control was
339+
* already initialized.
340+
*/
341+
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
338342
{
339343
struct inet_connection_sock *icsk = inet_csk(sk);
340344
const struct tcp_congestion_ops *ca;
@@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
344348
return -EPERM;
345349

346350
rcu_read_lock();
347-
ca = __tcp_ca_find_autoload(name);
351+
if (!load)
352+
ca = tcp_ca_find(name);
353+
else
354+
ca = __tcp_ca_find_autoload(name);
348355
/* No change asking for existing value */
349356
if (ca == icsk->icsk_ca_ops) {
350357
icsk->icsk_ca_setsockopt = 1;
351358
goto out;
352359
}
353-
if (!ca)
360+
if (!ca) {
354361
err = -ENOENT;
355-
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
356-
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
362+
} else if (!load) {
363+
icsk->icsk_ca_ops = ca;
364+
if (!try_module_get(ca->owner))
365+
err = -EBUSY;
366+
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
367+
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
357368
err = -EPERM;
358-
else if (!try_module_get(ca->owner))
369+
} else if (!try_module_get(ca->owner)) {
359370
err = -EBUSY;
360-
else
371+
} else {
361372
tcp_reinit_congestion_control(sk, ca);
373+
}
362374
out:
363375
rcu_read_unlock();
364376
return err;

net/ipv4/tcp_input.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6191,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
61916191
ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
61926192

61936193
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6194-
(ecn_ok_dst & DST_FEATURE_ECN_CA))
6194+
(ecn_ok_dst & DST_FEATURE_ECN_CA) ||
6195+
tcp_bpf_ca_needs_ecn((struct sock *)req))
61956196
inet_rsk(req)->ecn_ok = 1;
61966197
}
61976198

net/ipv4/tcp_output.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,16 +316,18 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
316316
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
317317
if (!(tp->ecn_flags & TCP_ECN_OK))
318318
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
319-
else if (tcp_ca_needs_ecn(sk))
319+
else if (tcp_ca_needs_ecn(sk) ||
320+
tcp_bpf_ca_needs_ecn(sk))
320321
INET_ECN_xmit(sk);
321322
}
322323

323324
/* Packet ECN state for a SYN. */
324325
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
325326
{
326327
struct tcp_sock *tp = tcp_sk(sk);
328+
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
327329
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
328-
tcp_ca_needs_ecn(sk);
330+
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
329331

330332
if (!use_ecn) {
331333
const struct dst_entry *dst = __sk_dst_get(sk);
@@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
339341
if (use_ecn) {
340342
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
341343
tp->ecn_flags = TCP_ECN_OK;
342-
if (tcp_ca_needs_ecn(sk))
344+
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
343345
INET_ECN_xmit(sk);
344346
}
345347
}

0 commit comments

Comments
 (0)