Skip to content

Commit 8919a9b

Browse files
nealcardwellAlexei Starovoitov
authored andcommitted
tcp: Only init congestion control if not initialized already
Change tcp_init_transfer() to only initialize congestion control if it has not been initialized already. With this new approach, we can arrange things so that if the EBPF code sets the congestion control by calling setsockopt(TCP_CONGESTION) then tcp_init_transfer() will not re-initialize the CC module. This is an approach that has the following beneficial properties: (1) This allows CC module customizations made by the EBPF called in tcp_init_transfer() to persist, and not be wiped out by a later call to tcp_init_congestion_control() in tcp_init_transfer(). (2) Does not flip the order of EBPF and CC init, to avoid causing bugs for existing code upstream that depends on the current order. (3) Does not cause 2 initializations for for CC in the case where the EBPF called in tcp_init_transfer() wants to set the CC to a new CC algorithm. (4) Allows follow-on simplifications to the code in net/core/filter.c and net/ipv4/tcp_cong.c, which currently both have some complexity to special-case CC initialization to avoid double CC initialization if EBPF sets the CC. Signed-off-by: Neal Cardwell <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Yuchung Cheng <[email protected]> Acked-by: Kevin Yang <[email protected]> Cc: Lawrence Brakmo <[email protected]>
1 parent 18841da commit 8919a9b

File tree

4 files changed

+8
-3
lines changed

4 files changed

+8
-3
lines changed

include/net/inet_connection_sock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ struct inet_connection_sock {
9696
void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
9797
struct hlist_node icsk_listen_portaddr_node;
9898
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
99-
__u8 icsk_ca_state:6,
99+
__u8 icsk_ca_state:5,
100+
icsk_ca_initialized:1,
100101
icsk_ca_setsockopt:1,
101102
icsk_ca_dst_locked:1;
102103
__u8 icsk_retransmits;

net/ipv4/tcp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2698,6 +2698,7 @@ int tcp_disconnect(struct sock *sk, int flags)
26982698
if (icsk->icsk_ca_ops->release)
26992699
icsk->icsk_ca_ops->release(sk);
27002700
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
2701+
icsk->icsk_ca_initialized = 0;
27012702
tcp_set_ca_state(sk, TCP_CA_Open);
27022703
tp->is_sack_reneg = 0;
27032704
tcp_clear_retrans(tp);

net/ipv4/tcp_cong.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ void tcp_assign_congestion_control(struct sock *sk)
176176

177177
void tcp_init_congestion_control(struct sock *sk)
178178
{
179-
const struct inet_connection_sock *icsk = inet_csk(sk);
179+
struct inet_connection_sock *icsk = inet_csk(sk);
180180

181181
tcp_sk(sk)->prior_ssthresh = 0;
182182
if (icsk->icsk_ca_ops->init)
@@ -185,6 +185,7 @@ void tcp_init_congestion_control(struct sock *sk)
185185
INET_ECN_xmit(sk);
186186
else
187187
INET_ECN_dontxmit(sk);
188+
icsk->icsk_ca_initialized = 1;
188189
}
189190

190191
static void tcp_reinit_congestion_control(struct sock *sk,

net/ipv4/tcp_input.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5894,8 +5894,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
58945894
tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
58955895
tp->snd_cwnd_stamp = tcp_jiffies32;
58965896

5897+
icsk->icsk_ca_initialized = 0;
58975898
bpf_skops_established(sk, bpf_op, skb);
5898-
tcp_init_congestion_control(sk);
5899+
if (!icsk->icsk_ca_initialized)
5900+
tcp_init_congestion_control(sk);
58995901
tcp_init_buffer_space(sk);
59005902
}
59015903

0 commit comments

Comments
 (0)