Skip to content

Commit fefa569

Browse files
Eric Dumazetdavem330
authored andcommitted
net_sched: sch_fq: account for schedule/timers drifts
It looks like the following patch can make FQ very precise, even in VM or stressed hosts. It matters at high pacing rates. We take into account the difference between the time that was programmed when last packet was sent, and current time (a drift of tens of usecs is often observed) Add an EWMA of the unthrottle latency to help diagnostics. This latency is the difference between current time and oldest packet in delayed RB-tree. This accounts for the high resolution timer latency, but can be different under stress, as fq_check_throttled() can be opportunistically be called from a dequeue() called after an enqueue() for a different flow. Tested: // Start a 10Gbit flow $ netperf --google-pacing-rate 1250000000 -H lpaa24 -l 10000 -- -K bbr & Before patch : $ sar -n DEV 10 5 | grep eth0 | grep Average Average: eth0 17106.04 756876.84 1102.75 1119049.02 0.00 0.00 0.52 After patch : $ sar -n DEV 10 5 | grep eth0 | grep Average Average: eth0 17867.00 800245.90 1151.77 1183172.12 0.00 0.00 0.52 A new iproute2 tc can output the 'unthrottle latency' : $ tc -s qd sh dev eth0 | grep latency 0 gc, 0 highprio, 32490767 throttled, 2382 ns latency Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 429baa6 commit fefa569

File tree

2 files changed

+19
-4
lines changed

2 files changed

+19
-4
lines changed

include/uapi/linux/pkt_sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,7 @@ struct tc_fq_qd_stats {
811811
__u32 flows;
812812
__u32 inactive_flows;
813813
__u32 throttled_flows;
814-
__u32 pad;
814+
__u32 unthrottle_latency_ns;
815815
};
816816

817817
/* Heavy-Hitter Filter */

net/sched/sch_fq.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ struct fq_sched_data {
8686

8787
struct rb_root delayed; /* for rate limited flows */
8888
u64 time_next_delayed_flow;
89+
unsigned long unthrottle_latency_ns;
8990

9091
struct fq_flow internal; /* for non classified or high prio packets */
9192
u32 quantum;
@@ -408,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
408409

409410
static void fq_check_throttled(struct fq_sched_data *q, u64 now)
410411
{
412+
unsigned long sample;
411413
struct rb_node *p;
412414

413415
if (q->time_next_delayed_flow > now)
414416
return;
415417

418+
/* Update unthrottle latency EWMA.
419+
* This is cheap and can help diagnosing timer/latency problems.
420+
*/
421+
sample = (unsigned long)(now - q->time_next_delayed_flow);
422+
q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
423+
q->unthrottle_latency_ns += sample >> 3;
424+
416425
q->time_next_delayed_flow = ~0ULL;
417426
while ((p = rb_first(&q->delayed)) != NULL) {
418427
struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
@@ -515,7 +524,12 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
515524
len = NSEC_PER_SEC;
516525
q->stat_pkts_too_long++;
517526
}
518-
527+
/* Account for schedule/timers drifts.
528+
* f->time_next_packet was set when prior packet was sent,
529+
* and current time (@now) can be too late by tens of us.
530+
*/
531+
if (f->time_next_packet)
532+
len -= min(len/2, now - f->time_next_packet);
519533
f->time_next_packet = now + len;
520534
}
521535
out:
@@ -787,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
787801
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
788802
q->flow_refill_delay = msecs_to_jiffies(40);
789803
q->flow_max_rate = ~0U;
804+
q->time_next_delayed_flow = ~0ULL;
790805
q->rate_enable = 1;
791806
q->new_flows.first = NULL;
792807
q->old_flows.first = NULL;
@@ -854,8 +869,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
854869
st.flows = q->flows;
855870
st.inactive_flows = q->inactive_flows;
856871
st.throttled_flows = q->throttled_flows;
857-
st.pad = 0;
858-
872+
st.unthrottle_latency_ns = min_t(unsigned long,
873+
q->unthrottle_latency_ns, ~0U);
859874
sch_tree_unlock(sch);
860875

861876
return gnet_stats_copy_app(d, &st, sizeof(st));

0 commit comments

Comments
 (0)