Skip to content

Commit 55d8694

Browse files
Florian Westphaldavem330
authored andcommitted
net: tcp: assign tcp cong_ops when tcp sk is created
Split assignment and initialization from one into two functions. This is required by followup patches that add Datacenter TCP (DCTCP) congestion control algorithm - we need to be able to determine if the connection is moderated by DCTCP before the 3WHS has finished. As we walk the available congestion control list during the assignment, we are always guaranteed to have Reno present as it's fixed compiled-in. Therefore, since we're doing the early assignment, we don't have a real use for the Reno alias tcp_init_congestion_ops anymore and can thus remove it. Actual usage of the congestion control operations are being made after the 3WHS has finished, in some cases however we can access get_info() via diag if implemented, therefore we need to zero out the private area for those modules. Joint work with Daniel Borkmann and Glenn Judd. Signed-off-by: Florian Westphal <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Signed-off-by: Glenn Judd <[email protected]> Acked-by: Stephen Hemminger <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 53dfd50 commit 55d8694

File tree

4 files changed

+27
-32
lines changed

4 files changed

+27
-32
lines changed

include/net/tcp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,7 @@ struct tcp_congestion_ops {
824824
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
825825
void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
826826

827+
void tcp_assign_congestion_control(struct sock *sk);
827828
void tcp_init_congestion_control(struct sock *sk);
828829
void tcp_cleanup_congestion_control(struct sock *sk);
829830
int tcp_set_default_congestion_control(const char *name);
@@ -835,7 +836,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name);
835836
int tcp_slow_start(struct tcp_sock *tp, u32 acked);
836837
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
837838

838-
extern struct tcp_congestion_ops tcp_init_congestion_ops;
839839
u32 tcp_reno_ssthresh(struct sock *sk);
840840
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
841841
extern struct tcp_congestion_ops tcp_reno;

net/ipv4/tcp.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ void tcp_init_sock(struct sock *sk)
405405

406406
tp->reordering = sysctl_tcp_reordering;
407407
tcp_enable_early_retrans(tp);
408-
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
408+
tcp_assign_congestion_control(sk);
409409

410410
tp->tsoffset = 0;
411411

@@ -3258,8 +3258,6 @@ void __init tcp_init(void)
32583258
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
32593259

32603260
tcp_metrics_init();
3261-
3262-
tcp_register_congestion_control(&tcp_reno);
3263-
3261+
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
32643262
tcp_tasklet_init();
32653263
}

net/ipv4/tcp_cong.c

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -74,24 +74,34 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
7474
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
7575

7676
/* Assign choice of congestion control. */
77-
void tcp_init_congestion_control(struct sock *sk)
77+
void tcp_assign_congestion_control(struct sock *sk)
7878
{
7979
struct inet_connection_sock *icsk = inet_csk(sk);
8080
struct tcp_congestion_ops *ca;
8181

82-
/* if no choice made yet assign the current value set as default */
83-
if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
84-
rcu_read_lock();
85-
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
86-
if (try_module_get(ca->owner)) {
87-
icsk->icsk_ca_ops = ca;
88-
break;
89-
}
90-
91-
/* fallback to next available */
82+
rcu_read_lock();
83+
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
84+
if (likely(try_module_get(ca->owner))) {
85+
icsk->icsk_ca_ops = ca;
86+
goto out;
9287
}
93-
rcu_read_unlock();
88+
/* Fallback to next available. The last really
89+
* guaranteed fallback is Reno from this list.
90+
*/
9491
}
92+
out:
93+
rcu_read_unlock();
94+
95+
/* Clear out private data before diag gets it and
96+
* the ca has not been initialized.
97+
*/
98+
if (ca->get_info)
99+
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
100+
}
101+
102+
void tcp_init_congestion_control(struct sock *sk)
103+
{
104+
const struct inet_connection_sock *icsk = inet_csk(sk);
95105

96106
if (icsk->icsk_ca_ops->init)
97107
icsk->icsk_ca_ops->init(sk);
@@ -345,15 +355,3 @@ struct tcp_congestion_ops tcp_reno = {
345355
.ssthresh = tcp_reno_ssthresh,
346356
.cong_avoid = tcp_reno_cong_avoid,
347357
};
348-
349-
/* Initial congestion control used (until SYN)
350-
* really reno under another name so we can tell difference
351-
* during tcp_set_default_congestion_control
352-
*/
353-
struct tcp_congestion_ops tcp_init_congestion_ops = {
354-
.name = "",
355-
.owner = THIS_MODULE,
356-
.ssthresh = tcp_reno_ssthresh,
357-
.cong_avoid = tcp_reno_cong_avoid,
358-
};
359-
EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);

net/ipv4/tcp_minisocks.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
451451
newtp->snd_cwnd = TCP_INIT_CWND;
452452
newtp->snd_cwnd_cnt = 0;
453453

454-
if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
455-
!try_module_get(newicsk->icsk_ca_ops->owner))
456-
newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
454+
if (!try_module_get(newicsk->icsk_ca_ops->owner))
455+
tcp_assign_congestion_control(newsk);
457456

458457
tcp_set_ca_state(newsk, TCP_CA_Open);
459458
tcp_init_xmit_timers(newsk);

0 commit comments

Comments
 (0)