@@ -369,6 +369,39 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
369
369
return cwnd ;
370
370
}
371
371
372
+ /* With pacing at lower layers, there's often less data "in the network" than
373
+ * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq),
374
+ * we often have several skbs queued in the pacing layer with a pre-scheduled
375
+ * earliest departure time (EDT). BBR adapts its pacing rate based on the
376
+ * inflight level that it estimates has already been "baked in" by previous
377
+ * departure time decisions. We calculate a rough estimate of the number of our
378
+ * packets that might be in the network at the earliest departure time for the
379
+ * next skb scheduled:
380
+ * in_network_at_edt = inflight_at_edt - (EDT - now) * bw
381
+ * If we're increasing inflight, then we want to know if the transmit of the
382
+ * EDT skb will push inflight above the target, so inflight_at_edt includes
383
+ * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight,
384
+ * then estimate if inflight will sink too low just before the EDT transmit.
385
+ */
386
+ static u32 bbr_packets_in_net_at_edt (struct sock * sk , u32 inflight_now )
387
+ {
388
+ struct tcp_sock * tp = tcp_sk (sk );
389
+ struct bbr * bbr = inet_csk_ca (sk );
390
+ u64 now_ns , edt_ns , interval_us ;
391
+ u32 interval_delivered , inflight_at_edt ;
392
+
393
+ now_ns = tp -> tcp_clock_cache ;
394
+ edt_ns = max (tp -> tcp_wstamp_ns , now_ns );
395
+ interval_us = div_u64 (edt_ns - now_ns , NSEC_PER_USEC );
396
+ interval_delivered = (u64 )bbr_bw (sk ) * interval_us >> BW_SCALE ;
397
+ inflight_at_edt = inflight_now ;
398
+ if (bbr -> pacing_gain > BBR_UNIT ) /* increasing inflight */
399
+ inflight_at_edt += bbr_tso_segs_goal (sk ); /* include EDT skb */
400
+ if (interval_delivered >= inflight_at_edt )
401
+ return 0 ;
402
+ return inflight_at_edt - interval_delivered ;
403
+ }
404
+
372
405
/* An optimization in BBR to reduce losses: On the first round of recovery, we
373
406
* follow the packet conservation principle: send P packets per P packets acked.
374
407
* After that, we slow-start and send at most 2*P packets per P packets acked.
@@ -460,7 +493,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
460
493
if (bbr -> pacing_gain == BBR_UNIT )
461
494
return is_full_length ; /* just use wall clock time */
462
495
463
- inflight = rs -> prior_in_flight ; /* what was in-flight before ACK? */
496
+ inflight = bbr_packets_in_net_at_edt ( sk , rs -> prior_in_flight );
464
497
bw = bbr_max_bw (sk );
465
498
466
499
/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
@@ -488,8 +521,6 @@ static void bbr_advance_cycle_phase(struct sock *sk)
488
521
489
522
bbr -> cycle_idx = (bbr -> cycle_idx + 1 ) & (CYCLE_LEN - 1 );
490
523
bbr -> cycle_mstamp = tp -> delivered_mstamp ;
491
- bbr -> pacing_gain = bbr -> lt_use_bw ? BBR_UNIT :
492
- bbr_pacing_gain [bbr -> cycle_idx ];
493
524
}
494
525
495
526
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
@@ -507,17 +538,13 @@ static void bbr_reset_startup_mode(struct sock *sk)
507
538
struct bbr * bbr = inet_csk_ca (sk );
508
539
509
540
bbr -> mode = BBR_STARTUP ;
510
- bbr -> pacing_gain = bbr_high_gain ;
511
- bbr -> cwnd_gain = bbr_high_gain ;
512
541
}
513
542
514
543
static void bbr_reset_probe_bw_mode (struct sock * sk )
515
544
{
516
545
struct bbr * bbr = inet_csk_ca (sk );
517
546
518
547
bbr -> mode = BBR_PROBE_BW ;
519
- bbr -> pacing_gain = BBR_UNIT ;
520
- bbr -> cwnd_gain = bbr_cwnd_gain ;
521
548
bbr -> cycle_idx = CYCLE_LEN - 1 - prandom_u32_max (bbr_cycle_rand );
522
549
bbr_advance_cycle_phase (sk ); /* flip to next phase of gain cycle */
523
550
}
@@ -735,13 +762,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
735
762
736
763
if (bbr -> mode == BBR_STARTUP && bbr_full_bw_reached (sk )) {
737
764
bbr -> mode = BBR_DRAIN ; /* drain queue we created */
738
- bbr -> pacing_gain = bbr_drain_gain ; /* pace slow to drain */
739
- bbr -> cwnd_gain = bbr_high_gain ; /* maintain cwnd */
740
765
tcp_sk (sk )-> snd_ssthresh =
741
766
bbr_target_cwnd (sk , bbr_max_bw (sk ), BBR_UNIT );
742
767
} /* fall through to check if in-flight is already small: */
743
768
if (bbr -> mode == BBR_DRAIN &&
744
- tcp_packets_in_flight (tcp_sk (sk )) <=
769
+ bbr_packets_in_net_at_edt ( sk , tcp_packets_in_flight (tcp_sk (sk ) )) <=
745
770
bbr_target_cwnd (sk , bbr_max_bw (sk ), BBR_UNIT ))
746
771
bbr_reset_probe_bw_mode (sk ); /* we estimate queue is drained */
747
772
}
@@ -798,8 +823,6 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
798
823
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
799
824
!bbr -> idle_restart && bbr -> mode != BBR_PROBE_RTT ) {
800
825
bbr -> mode = BBR_PROBE_RTT ; /* dip, drain queue */
801
- bbr -> pacing_gain = BBR_UNIT ;
802
- bbr -> cwnd_gain = BBR_UNIT ;
803
826
bbr_save_cwnd (sk ); /* note cwnd so we can restore it */
804
827
bbr -> probe_rtt_done_stamp = 0 ;
805
828
}
@@ -827,13 +850,43 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
827
850
bbr -> idle_restart = 0 ;
828
851
}
829
852
853
+ static void bbr_update_gains (struct sock * sk )
854
+ {
855
+ struct bbr * bbr = inet_csk_ca (sk );
856
+
857
+ switch (bbr -> mode ) {
858
+ case BBR_STARTUP :
859
+ bbr -> pacing_gain = bbr_high_gain ;
860
+ bbr -> cwnd_gain = bbr_high_gain ;
861
+ break ;
862
+ case BBR_DRAIN :
863
+ bbr -> pacing_gain = bbr_drain_gain ; /* slow, to drain */
864
+ bbr -> cwnd_gain = bbr_high_gain ; /* keep cwnd */
865
+ break ;
866
+ case BBR_PROBE_BW :
867
+ bbr -> pacing_gain = (bbr -> lt_use_bw ?
868
+ BBR_UNIT :
869
+ bbr_pacing_gain [bbr -> cycle_idx ]);
870
+ bbr -> cwnd_gain = bbr_cwnd_gain ;
871
+ break ;
872
+ case BBR_PROBE_RTT :
873
+ bbr -> pacing_gain = BBR_UNIT ;
874
+ bbr -> cwnd_gain = BBR_UNIT ;
875
+ break ;
876
+ default :
877
+ WARN_ONCE (1 , "BBR bad mode: %u\n" , bbr -> mode );
878
+ break ;
879
+ }
880
+ }
881
+
830
882
static void bbr_update_model (struct sock * sk , const struct rate_sample * rs )
831
883
{
832
884
bbr_update_bw (sk , rs );
833
885
bbr_update_cycle_phase (sk , rs );
834
886
bbr_check_full_bw_reached (sk , rs );
835
887
bbr_check_drain (sk , rs );
836
888
bbr_update_min_rtt (sk , rs );
889
+ bbr_update_gains (sk );
837
890
}
838
891
839
892
static void bbr_main (struct sock * sk , const struct rate_sample * rs )
0 commit comments