Skip to content

Commit cff04e2

Browse files
edumazetdavem330
authored andcommitted
tcp_cubic: switch bictcp_clock() to usec resolution
Current 1ms clock feeds ca->round_start, ca->delay_min, ca->last_ack. This is quite problematic for data-center flows, where delay_min is way below 1 ms. This means Hystart Train detection triggers every time jiffies value is updated, since "((s32)(now - ca->round_start) > ca->delay_min >> 4)" expression becomes true. This kind of random behavior can be solved by reusing the existing usec timestamp that TCP keeps in tp->tcp_mstamp Note that a followup patch will tweak things a bit, because during slow start, GRO aggregation on receivers naturally increases the RTT as TSO packets gradually come to ~64KB size. To recap, right after this patch CUBIC Hystart train detection is more aggressive, since short RTT flows might exit slow start at cwnd = 20, instead of being possibly unbounded. Following patch will address this problem. Signed-off-by: Eric Dumazet <[email protected]> Acked-by: Neal Cardwell <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 35821fc commit cff04e2

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

net/ipv4/tcp_cubic.c

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@
4040

4141
/* Number of delay samples for detecting the increase of delay */
4242
#define HYSTART_MIN_SAMPLES 8
43-
#define HYSTART_DELAY_MIN (4U<<3)
44-
#define HYSTART_DELAY_MAX (16U<<3)
43+
#define HYSTART_DELAY_MIN (4000U) /* 4 ms */
44+
#define HYSTART_DELAY_MAX (16000U) /* 16 ms */
4545
#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
4646

4747
static int fast_convergence __read_mostly = 1;
@@ -53,7 +53,7 @@ static int tcp_friendliness __read_mostly = 1;
5353
static int hystart __read_mostly = 1;
5454
static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
5555
static int hystart_low_window __read_mostly = 16;
56-
static int hystart_ack_delta __read_mostly = 2;
56+
static int hystart_ack_delta_us __read_mostly = 2000;
5757

5858
static u32 cube_rtt_scale __read_mostly;
5959
static u32 beta_scale __read_mostly;
@@ -77,8 +77,8 @@ MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
7777
" 1: packet-train 2: delay 3: both packet-train and delay");
7878
module_param(hystart_low_window, int, 0644);
7979
MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
80-
module_param(hystart_ack_delta, int, 0644);
81-
MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
80+
module_param(hystart_ack_delta_us, int, 0644);
81+
MODULE_PARM_DESC(hystart_ack_delta_us, "spacing between ack's indicating train (usecs)");
8282

8383
/* BIC TCP Parameters */
8484
struct bictcp {
@@ -89,7 +89,7 @@ struct bictcp {
8989
u32 bic_origin_point;/* origin point of bic function */
9090
u32 bic_K; /* time to origin point
9191
from the beginning of the current epoch */
92-
u32 delay_min; /* min delay (msec << 3) */
92+
u32 delay_min; /* min delay (usec) */
9393
u32 epoch_start; /* beginning of an epoch */
9494
u32 ack_cnt; /* number of acks */
9595
u32 tcp_cwnd; /* estimated tcp cwnd */
@@ -117,21 +117,17 @@ static inline void bictcp_reset(struct bictcp *ca)
117117
ca->found = 0;
118118
}
119119

120-
static inline u32 bictcp_clock(void)
120+
static inline u32 bictcp_clock_us(const struct sock *sk)
121121
{
122-
#if HZ < 1000
123-
return ktime_to_ms(ktime_get_real());
124-
#else
125-
return jiffies_to_msecs(jiffies);
126-
#endif
122+
return tcp_sk(sk)->tcp_mstamp;
127123
}
128124

129125
static inline void bictcp_hystart_reset(struct sock *sk)
130126
{
131127
struct tcp_sock *tp = tcp_sk(sk);
132128
struct bictcp *ca = inet_csk_ca(sk);
133129

134-
ca->round_start = ca->last_ack = bictcp_clock();
130+
ca->round_start = ca->last_ack = bictcp_clock_us(sk);
135131
ca->end_seq = tp->snd_nxt;
136132
ca->curr_rtt = ~0U;
137133
ca->sample_cnt = 0;
@@ -276,7 +272,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
276272
*/
277273

278274
t = (s32)(tcp_jiffies32 - ca->epoch_start);
279-
t += msecs_to_jiffies(ca->delay_min >> 3);
275+
t += usecs_to_jiffies(ca->delay_min);
280276
/* change the unit from HZ to bictcp_HZ */
281277
t <<= BICTCP_HZ;
282278
do_div(t, HZ);
@@ -382,12 +378,12 @@ static void hystart_update(struct sock *sk, u32 delay)
382378
struct bictcp *ca = inet_csk_ca(sk);
383379

384380
if (hystart_detect & HYSTART_ACK_TRAIN) {
385-
u32 now = bictcp_clock();
381+
u32 now = bictcp_clock_us(sk);
386382

387383
/* first detection parameter - ack-train detection */
388-
if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
384+
if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) {
389385
ca->last_ack = now;
390-
if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
386+
if ((s32)(now - ca->round_start) > ca->delay_min >> 1) {
391387
ca->found = 1;
392388
NET_INC_STATS(sock_net(sk),
393389
LINUX_MIB_TCPHYSTARTTRAINDETECT);
@@ -421,9 +417,6 @@ static void hystart_update(struct sock *sk, u32 delay)
421417
}
422418
}
423419

424-
/* Track delayed acknowledgment ratio using sliding window
425-
* ratio = (15*ratio + sample) / 16
426-
*/
427420
static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
428421
{
429422
const struct tcp_sock *tp = tcp_sk(sk);
@@ -438,7 +431,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
438431
if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
439432
return;
440433

441-
delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
434+
delay = sample->rtt_us;
442435
if (delay == 0)
443436
delay = 1;
444437

0 commit comments

Comments
 (0)