Skip to content

Commit a58598a

Browse files
committed
Merge branch 'tcp_bbr-TCP-BBR-changes-for-EDT-pacing-model'
Neal Cardwell says: ==================== tcp_bbr: TCP BBR changes for EDT pacing model Two small patches for TCP BBR to follow up with Eric's recent work to change the TCP and fq pacing machinery to an "earliest departure time" (EDT) model: - The first patch adjusts the TCP BBR logic to work with the new "earliest departure time" (EDT) pacing model. - The second patch adjusts the TCP BBR logic to centralize the setting of gain values, to simplify the code and prepare for future changes. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents cb10c7c + cf33e25 commit a58598a

File tree

1 file changed

+65
-12
lines changed

1 file changed

+65
-12
lines changed

net/ipv4/tcp_bbr.c

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,39 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
369369
return cwnd;
370370
}
371371

372+
/* With pacing at lower layers, there's often less data "in the network" than
373+
* "in flight". With TSQ and departure time pacing at lower layers (e.g. fq),
374+
* we often have several skbs queued in the pacing layer with a pre-scheduled
375+
* earliest departure time (EDT). BBR adapts its pacing rate based on the
376+
* inflight level that it estimates has already been "baked in" by previous
377+
* departure time decisions. We calculate a rough estimate of the number of our
378+
* packets that might be in the network at the earliest departure time for the
379+
* next skb scheduled:
380+
* in_network_at_edt = inflight_at_edt - (EDT - now) * bw
381+
* If we're increasing inflight, then we want to know if the transmit of the
382+
* EDT skb will push inflight above the target, so inflight_at_edt includes
383+
* bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight,
384+
* then estimate if inflight will sink too low just before the EDT transmit.
385+
*/
386+
static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
387+
{
388+
struct tcp_sock *tp = tcp_sk(sk);
389+
struct bbr *bbr = inet_csk_ca(sk);
390+
u64 now_ns, edt_ns, interval_us;
391+
u32 interval_delivered, inflight_at_edt;
392+
393+
now_ns = tp->tcp_clock_cache;
394+
edt_ns = max(tp->tcp_wstamp_ns, now_ns);
395+
interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC);
396+
interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE;
397+
inflight_at_edt = inflight_now;
398+
if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */
399+
inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */
400+
if (interval_delivered >= inflight_at_edt)
401+
return 0;
402+
return inflight_at_edt - interval_delivered;
403+
}
404+
372405
/* An optimization in BBR to reduce losses: On the first round of recovery, we
373406
* follow the packet conservation principle: send P packets per P packets acked.
374407
* After that, we slow-start and send at most 2*P packets per P packets acked.
@@ -460,7 +493,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
460493
if (bbr->pacing_gain == BBR_UNIT)
461494
return is_full_length; /* just use wall clock time */
462495

463-
inflight = rs->prior_in_flight; /* what was in-flight before ACK? */
496+
inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
464497
bw = bbr_max_bw(sk);
465498

466499
/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
@@ -488,8 +521,6 @@ static void bbr_advance_cycle_phase(struct sock *sk)
488521

489522
bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
490523
bbr->cycle_mstamp = tp->delivered_mstamp;
491-
bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
492-
bbr_pacing_gain[bbr->cycle_idx];
493524
}
494525

495526
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
@@ -507,17 +538,13 @@ static void bbr_reset_startup_mode(struct sock *sk)
507538
struct bbr *bbr = inet_csk_ca(sk);
508539

509540
bbr->mode = BBR_STARTUP;
510-
bbr->pacing_gain = bbr_high_gain;
511-
bbr->cwnd_gain = bbr_high_gain;
512541
}
513542

514543
static void bbr_reset_probe_bw_mode(struct sock *sk)
515544
{
516545
struct bbr *bbr = inet_csk_ca(sk);
517546

518547
bbr->mode = BBR_PROBE_BW;
519-
bbr->pacing_gain = BBR_UNIT;
520-
bbr->cwnd_gain = bbr_cwnd_gain;
521548
bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
522549
bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */
523550
}
@@ -735,13 +762,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
735762

736763
if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
737764
bbr->mode = BBR_DRAIN; /* drain queue we created */
738-
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
739-
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
740765
tcp_sk(sk)->snd_ssthresh =
741766
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
742767
} /* fall through to check if in-flight is already small: */
743768
if (bbr->mode == BBR_DRAIN &&
744-
tcp_packets_in_flight(tcp_sk(sk)) <=
769+
bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
745770
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
746771
bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */
747772
}
@@ -798,8 +823,6 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
798823
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
799824
!bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
800825
bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */
801-
bbr->pacing_gain = BBR_UNIT;
802-
bbr->cwnd_gain = BBR_UNIT;
803826
bbr_save_cwnd(sk); /* note cwnd so we can restore it */
804827
bbr->probe_rtt_done_stamp = 0;
805828
}
@@ -827,13 +850,43 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
827850
bbr->idle_restart = 0;
828851
}
829852

853+
static void bbr_update_gains(struct sock *sk)
854+
{
855+
struct bbr *bbr = inet_csk_ca(sk);
856+
857+
switch (bbr->mode) {
858+
case BBR_STARTUP:
859+
bbr->pacing_gain = bbr_high_gain;
860+
bbr->cwnd_gain = bbr_high_gain;
861+
break;
862+
case BBR_DRAIN:
863+
bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */
864+
bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */
865+
break;
866+
case BBR_PROBE_BW:
867+
bbr->pacing_gain = (bbr->lt_use_bw ?
868+
BBR_UNIT :
869+
bbr_pacing_gain[bbr->cycle_idx]);
870+
bbr->cwnd_gain = bbr_cwnd_gain;
871+
break;
872+
case BBR_PROBE_RTT:
873+
bbr->pacing_gain = BBR_UNIT;
874+
bbr->cwnd_gain = BBR_UNIT;
875+
break;
876+
default:
877+
WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
878+
break;
879+
}
880+
}
881+
830882
static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
831883
{
832884
bbr_update_bw(sk, rs);
833885
bbr_update_cycle_phase(sk, rs);
834886
bbr_check_full_bw_reached(sk, rs);
835887
bbr_check_drain(sk, rs);
836888
bbr_update_min_rtt(sk, rs);
889+
bbr_update_gains(sk);
837890
}
838891

839892
static void bbr_main(struct sock *sk, const struct rate_sample *rs)

0 commit comments

Comments
 (0)