Skip to content

Commit 7787914

Browse files
edumazetdavem330
authored andcommitted
net_sched: sch_fq: add low_rate_threshold parameter
This commit adds to the fq module a low_rate_threshold parameter to insert a delay after all packets if the socket requests a pacing rate below the threshold. This helps achieve more precise control of the sending rate with low-rate paths, especially policers. The basic issue is that if a congestion control module detects a policer at a certain rate, it may want fq to be able to shape to that policed rate. That way the sender can avoid policer drops by having the packets arrive at the policer at or just under the policed rate. The default threshold of 550Kbps was chosen analytically so that for policers or links at 500Kbps or 512Kbps fq would very likely invoke this mechanism, even if the pacing rate was briefly slightly above the available bandwidth. This value was then empirically validated with two years of production testing on YouTube video servers. Signed-off-by: Van Jacobson <[email protected]> Signed-off-by: Neal Cardwell <[email protected]> Signed-off-by: Yuchung Cheng <[email protected]> Signed-off-by: Nandita Dukkipati <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: Soheil Hassas Yeganeh <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6403389 commit 7787914

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

include/uapi/linux/pkt_sched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,8 @@ enum {
792792

793793
TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */
794794

795+
TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
796+
795797
__TCA_FQ_MAX
796798
};
797799

net/sched/sch_fq.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ struct fq_sched_data {
9494
u32 flow_max_rate; /* optional max rate per flow */
9595
u32 flow_plimit; /* max packets per flow */
9696
u32 orphan_mask; /* mask for orphaned skb */
97+
u32 low_rate_threshold;
9798
struct rb_root *fq_root;
9899
u8 rate_enable;
99100
u8 fq_trees_log;
@@ -433,7 +434,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
433434
struct fq_flow_head *head;
434435
struct sk_buff *skb;
435436
struct fq_flow *f;
436-
u32 rate;
437+
u32 rate, plen;
437438

438439
skb = fq_dequeue_head(sch, &q->internal);
439440
if (skb)
@@ -482,7 +483,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
482483
prefetch(&skb->end);
483484
f->credit -= qdisc_pkt_len(skb);
484485

485-
if (f->credit > 0 || !q->rate_enable)
486+
if (!q->rate_enable)
486487
goto out;
487488

488489
/* Do not pace locally generated ack packets */
@@ -493,8 +494,15 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
493494
if (skb->sk)
494495
rate = min(skb->sk->sk_pacing_rate, rate);
495496

497+
if (rate <= q->low_rate_threshold) {
498+
f->credit = 0;
499+
plen = qdisc_pkt_len(skb);
500+
} else {
501+
plen = max(qdisc_pkt_len(skb), q->quantum);
502+
if (f->credit > 0)
503+
goto out;
504+
}
496505
if (rate != ~0U) {
497-
u32 plen = max(qdisc_pkt_len(skb), q->quantum);
498506
u64 len = (u64)plen * NSEC_PER_SEC;
499507

500508
if (likely(rate))
@@ -662,6 +670,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
662670
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
663671
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
664672
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
673+
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
665674
};
666675

667676
static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -716,6 +725,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
716725
if (tb[TCA_FQ_FLOW_MAX_RATE])
717726
q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
718727

728+
if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
729+
q->low_rate_threshold =
730+
nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
731+
719732
if (tb[TCA_FQ_RATE_ENABLE]) {
720733
u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
721734

@@ -781,6 +794,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
781794
q->fq_root = NULL;
782795
q->fq_trees_log = ilog2(1024);
783796
q->orphan_mask = 1024 - 1;
797+
q->low_rate_threshold = 550000 / 8;
784798
qdisc_watchdog_init(&q->watchdog, sch);
785799

786800
if (opt)
@@ -811,6 +825,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
811825
nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
812826
jiffies_to_usecs(q->flow_refill_delay)) ||
813827
nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
828+
nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
829+
q->low_rate_threshold) ||
814830
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
815831
goto nla_put_failure;
816832

0 commit comments

Comments
 (0)