Skip to content

Commit 9ad7c04

Browse files
hkchudavem330
authored andcommitted
tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side
This patch lowers the default initRTO from 3secs to 1sec per RFC2988bis. It falls back to 3secs if the SYN or SYN-ACK packet has been retransmitted, AND the TCP timestamp option is not on. It also adds support to take RTT sample during 3WHS on the passive open side, just like its active open counterpart, and uses it, if valid, to seed the initRTO for the data transmission phase. The patch also resets ssthresh to its initial default at the beginning of the data transmission phase, and reduces cwnd to 1 if there has been MORE THAN ONE retransmission during 3WHS per RFC5681. Signed-off-by: H.K. Jerry Chu <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent aee80b5 commit 9ad7c04

File tree

8 files changed

+55
-27
lines changed

8 files changed

+55
-27
lines changed

include/linux/tcp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ struct tcp_request_sock {
282282
#endif
283283
u32 rcv_isn;
284284
u32 snt_isn;
285+
u32 snt_synack; /* synack sent time */
285286
};
286287

287288
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)

include/net/tcp.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,13 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
122122
#endif
123123
#define TCP_RTO_MAX ((unsigned)(120*HZ))
124124
#define TCP_RTO_MIN ((unsigned)(HZ/5))
125-
#define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */
125+
#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC2988bis initial RTO value */
126+
#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
127+
* used as a fallback RTO for the
128+
* initial data transmission if no
129+
* valid RTT sample has been acquired,
130+
* most likely due to retrans in 3WHS.
131+
*/
126132

127133
#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
128134
* for local resources.
@@ -295,7 +301,7 @@ static inline void tcp_synq_overflow(struct sock *sk)
295301
static inline int tcp_synq_no_recent_overflow(const struct sock *sk)
296302
{
297303
unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
298-
return time_after(jiffies, last_overflow + TCP_TIMEOUT_INIT);
304+
return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK);
299305
}
300306

301307
extern struct proto tcp_prot;
@@ -508,6 +514,7 @@ extern void tcp_initialize_rcv_mss(struct sock *sk);
508514
extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
509515
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
510516
extern void tcp_mtup_init(struct sock *sk);
517+
extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
511518

512519
static inline void tcp_bound_rto(const struct sock *sk)
513520
{

net/ipv4/syncookies.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
316316
ireq->wscale_ok = tcp_opt.wscale_ok;
317317
ireq->tstamp_ok = tcp_opt.saw_tstamp;
318318
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
319+
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
319320

320321
/* We throwed the options of the initial SYN away, so we hope
321322
* the ACK carries the same options again (see RFC1122 4.2.3.8)

net/ipv4/tcp_input.c

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -880,17 +880,19 @@ static void tcp_init_metrics(struct sock *sk)
880880
tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
881881
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
882882
tp->snd_ssthresh = tp->snd_cwnd_clamp;
883+
} else {
884+
/* ssthresh may have been reduced unnecessarily during.
885+
* 3WHS. Restore it back to its initial default.
886+
*/
887+
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
883888
}
884889
if (dst_metric(dst, RTAX_REORDERING) &&
885890
tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
886891
tcp_disable_fack(tp);
887892
tp->reordering = dst_metric(dst, RTAX_REORDERING);
888893
}
889894

890-
if (dst_metric(dst, RTAX_RTT) == 0)
891-
goto reset;
892-
893-
if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
895+
if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
894896
goto reset;
895897

896898
/* Initial rtt is determined from SYN,SYN-ACK.
@@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk)
916918
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
917919
}
918920
tcp_set_rto(sk);
919-
if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {
920921
reset:
921-
/* Play conservative. If timestamps are not
922-
* supported, TCP will fail to recalculate correct
923-
* rtt, if initial rto is too small. FORGET ALL AND RESET!
922+
if (tp->srtt == 0) {
923+
/* RFC2988bis: We've failed to get a valid RTT sample from
924+
* 3WHS. This is most likely due to retransmission,
925+
* including spurious one. Reset the RTO back to 3secs
926+
* from the more aggressive 1sec to avoid more spurious
927+
* retransmission.
924928
*/
925-
if (!tp->rx_opt.saw_tstamp && tp->srtt) {
926-
tp->srtt = 0;
927-
tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
928-
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
929-
}
929+
tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
930+
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
930931
}
931-
tp->snd_cwnd = tcp_init_cwnd(tp, dst);
932+
/* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
933+
* retransmitted. In light of RFC2988bis' more aggressive 1sec
934+
* initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
935+
* retransmission has occurred.
936+
*/
937+
if (tp->total_retrans > 1)
938+
tp->snd_cwnd = 1;
939+
else
940+
tp->snd_cwnd = tcp_init_cwnd(tp, dst);
932941
tp->snd_cwnd_stamp = tcp_time_stamp;
933942
}
934943

@@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
31123121
tcp_xmit_retransmit_queue(sk);
31133122
}
31143123

3115-
static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3124+
void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
31163125
{
31173126
tcp_rtt_estimator(sk, seq_rtt);
31183127
tcp_set_rto(sk);
31193128
inet_csk(sk)->icsk_backoff = 0;
31203129
}
3130+
EXPORT_SYMBOL(tcp_valid_rtt_meas);
31213131

31223132
/* Read draft-ietf-tcplw-high-performance before mucking
31233133
* with this code. (Supersedes RFC1323)
@@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
58065816
tp->rx_opt.snd_wscale;
58075817
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
58085818

5809-
/* tcp_ack considers this ACK as duplicate
5810-
* and does not calculate rtt.
5811-
* Force it here.
5812-
*/
5813-
tcp_ack_update_rtt(sk, 0, 0);
5814-
58155819
if (tp->rx_opt.tstamp_ok)
58165820
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
58175821

net/ipv4/tcp_ipv4.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
429429
break;
430430

431431
icsk->icsk_backoff--;
432-
inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
433-
icsk->icsk_backoff;
432+
inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
433+
TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
434434
tcp_bound_rto(sk);
435435

436436
skb = tcp_write_queue_head(sk);
@@ -1384,6 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
13841384
isn = tcp_v4_init_sequence(skb);
13851385
}
13861386
tcp_rsk(req)->snt_isn = isn;
1387+
tcp_rsk(req)->snt_synack = tcp_time_stamp;
13871388

13881389
if (tcp_v4_send_synack(sk, dst, req,
13891390
(struct request_values *)&tmp_ext) ||
@@ -1458,6 +1459,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
14581459
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
14591460

14601461
tcp_initialize_rcv_mss(newsk);
1462+
if (tcp_rsk(req)->snt_synack)
1463+
tcp_valid_rtt_meas(newsk,
1464+
tcp_time_stamp - tcp_rsk(req)->snt_synack);
1465+
newtp->total_retrans = req->retrans;
14611466

14621467
#ifdef CONFIG_TCP_MD5SIG
14631468
/* Copy over the MD5 key from the original socket */
@@ -1854,7 +1859,7 @@ static int tcp_v4_init_sock(struct sock *sk)
18541859
* algorithms that we must have the following bandaid to talk
18551860
* efficiently to them. -DaveM
18561861
*/
1857-
tp->snd_cwnd = 2;
1862+
tp->snd_cwnd = TCP_INIT_CWND;
18581863

18591864
/* See draft-stevens-tcpca-spec-01 for discussion of the
18601865
* initialization of these values.

net/ipv4/tcp_minisocks.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
486486
* algorithms that we must have the following bandaid to talk
487487
* efficiently to them. -DaveM
488488
*/
489-
newtp->snd_cwnd = 2;
489+
newtp->snd_cwnd = TCP_INIT_CWND;
490490
newtp->snd_cwnd_cnt = 0;
491491
newtp->bytes_acked = 0;
492492

@@ -720,6 +720,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
720720
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
721721
return NULL;
722722
}
723+
if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
724+
tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
725+
else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
726+
tcp_rsk(req)->snt_synack = 0;
723727

724728
/* OK, ACK is valid, create big socket and
725729
* feed this segment to it. It will repeat all

net/ipv6/syncookies.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
223223
ireq->wscale_ok = tcp_opt.wscale_ok;
224224
ireq->tstamp_ok = tcp_opt.saw_tstamp;
225225
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
226+
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
226227
treq->rcv_isn = ntohl(th->seq) - 1;
227228
treq->snt_isn = cookie;
228229

net/ipv6/tcp_ipv6.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,6 +1341,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
13411341
}
13421342
have_isn:
13431343
tcp_rsk(req)->snt_isn = isn;
1344+
tcp_rsk(req)->snt_synack = tcp_time_stamp;
13441345

13451346
security_inet_conn_request(sk, skb, req);
13461347

@@ -1509,6 +1510,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
15091510
tcp_sync_mss(newsk, dst_mtu(dst));
15101511
newtp->advmss = dst_metric_advmss(dst);
15111512
tcp_initialize_rcv_mss(newsk);
1513+
if (tcp_rsk(req)->snt_synack)
1514+
tcp_valid_rtt_meas(newsk,
1515+
tcp_time_stamp - tcp_rsk(req)->snt_synack);
1516+
newtp->total_retrans = req->retrans;
15121517

15131518
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
15141519
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;

0 commit comments

Comments
 (0)