Skip to content

Commit 39d0105

Browse files
edumazetdavem330
authored andcommitted
net_sched: sch_fq: add horizon attribute
QUIC servers would like to use SO_TXTIME, without having CAP_NET_ADMIN, to efficiently pace UDP packets. As far as sch_fq is concerned, we need to add safety checks, so that a buggy application does not fill the qdisc with packets having delivery time far in the future. This patch adds a configurable horizon (default: 10 seconds), and a configurable policy when a packet is beyond the horizon at enqueue() time: - either drop the packet (default policy) - or cap its delivery time to the horizon. $ tc -s -d qd sh dev eth0 qdisc fq 8022: root refcnt 257 limit 10000p flow_limit 100p buckets 1024 orphan_mask 1023 quantum 10Kb initial_quantum 51160b low_rate_threshold 550Kbit refill_delay 40.0ms timer_slack 10.000us horizon 10.000s Sent 1234215879 bytes 837099 pkt (dropped 21, overlimits 0 requeues 6) backlog 0b 0p requeues 6 flows 1191 (inactive 1177 throttled 0) gc 0 highprio 0 throttled 692 latency 11.480us pkts_too_long 0 alloc_errors 0 horizon_drops 21 horizon_caps 0 v2: fixed an overflow on 32bit kernels in fq_init(), reported by kbuild test robot <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Cc: Willem de Bruijn <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent bf6dba7 commit 39d0105

File tree

2 files changed

+60
-5
lines changed

2 files changed

+60
-5
lines changed

include/uapi/linux/pkt_sched.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -913,6 +913,10 @@ enum {
913913

914914
TCA_FQ_TIMER_SLACK, /* timer slack */
915915

916+
TCA_FQ_HORIZON, /* time horizon in us */
917+
918+
TCA_FQ_HORIZON_DROP, /* drop packets beyond horizon, or cap their EDT */
919+
916920
__TCA_FQ_MAX
917921
};
918922

@@ -932,6 +936,8 @@ struct tc_fq_qd_stats {
932936
__u32 throttled_flows;
933937
__u32 unthrottle_latency_ns;
934938
__u64 ce_mark; /* packets above ce_threshold */
939+
__u64 horizon_drops;
940+
__u64 horizon_caps;
935941
};
936942

937943
/* Heavy-Hitter Filter */

net/sched/sch_fq.c

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ struct fq_sched_data {
100100

101101
struct rb_root delayed; /* for rate limited flows */
102102
u64 time_next_delayed_flow;
103+
u64 ktime_cache; /* copy of last ktime_get_ns() */
103104
unsigned long unthrottle_latency_ns;
104105

105106
struct fq_flow internal; /* for non classified or high prio packets */
@@ -109,12 +110,13 @@ struct fq_sched_data {
109110
u32 flow_plimit; /* max packets per flow */
110111
unsigned long flow_max_rate; /* optional max rate per flow */
111112
u64 ce_threshold;
113+
u64 horizon; /* horizon in ns */
112114
u32 orphan_mask; /* mask for orphaned skb */
113115
u32 low_rate_threshold;
114116
struct rb_root *fq_root;
115117
u8 rate_enable;
116118
u8 fq_trees_log;
117-
119+
u8 horizon_drop;
118120
u32 flows;
119121
u32 inactive_flows;
120122
u32 throttled_flows;
@@ -123,6 +125,8 @@ struct fq_sched_data {
123125
u64 stat_internal_packets;
124126
u64 stat_throttled;
125127
u64 stat_ce_mark;
128+
u64 stat_horizon_drops;
129+
u64 stat_horizon_caps;
126130
u64 stat_flows_plimit;
127131
u64 stat_pkts_too_long;
128132
u64 stat_allocation_errors;
@@ -402,8 +406,6 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
402406
struct rb_node **p, *parent;
403407
struct sk_buff *head, *aux;
404408

405-
fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns();
406-
407409
head = flow->head;
408410
if (!head ||
409411
fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
@@ -431,6 +433,12 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
431433
rb_insert_color(&skb->rbnode, &flow->t_root);
432434
}
433435

436+
static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
437+
const struct fq_sched_data *q)
438+
{
439+
return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
440+
}
441+
434442
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
435443
struct sk_buff **to_free)
436444
{
@@ -440,6 +448,28 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
440448
if (unlikely(sch->q.qlen >= sch->limit))
441449
return qdisc_drop(skb, sch, to_free);
442450

451+
if (!skb->tstamp) {
452+
fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
453+
} else {
454+
/* Check if packet timestamp is too far in the future.
455+
* Try first if our cached value, to avoid ktime_get_ns()
456+
* cost in most cases.
457+
*/
458+
if (fq_packet_beyond_horizon(skb, q)) {
459+
/* Refresh our cache and check another time */
460+
q->ktime_cache = ktime_get_ns();
461+
if (fq_packet_beyond_horizon(skb, q)) {
462+
if (q->horizon_drop) {
463+
q->stat_horizon_drops++;
464+
return qdisc_drop(skb, sch, to_free);
465+
}
466+
q->stat_horizon_caps++;
467+
skb->tstamp = q->ktime_cache + q->horizon;
468+
}
469+
}
470+
fq_skb_cb(skb)->time_to_send = skb->tstamp;
471+
}
472+
443473
f = fq_classify(skb, q);
444474
if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
445475
q->stat_flows_plimit++;
@@ -512,7 +542,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
512542
goto out;
513543
}
514544

515-
now = ktime_get_ns();
545+
q->ktime_cache = now = ktime_get_ns();
516546
fq_check_throttled(q, now);
517547
begin:
518548
head = &q->new_flows;
@@ -765,6 +795,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
765795
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
766796
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
767797
[TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 },
798+
[TCA_FQ_HORIZON] = { .type = NLA_U32 },
799+
[TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 },
768800
};
769801

770802
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -854,7 +886,15 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
854886
if (tb[TCA_FQ_TIMER_SLACK])
855887
q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
856888

889+
if (tb[TCA_FQ_HORIZON])
890+
q->horizon = (u64)NSEC_PER_USEC *
891+
nla_get_u32(tb[TCA_FQ_HORIZON]);
892+
893+
if (tb[TCA_FQ_HORIZON_DROP])
894+
q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
895+
857896
if (!err) {
897+
858898
sch_tree_unlock(sch);
859899
err = fq_resize(sch, fq_log);
860900
sch_tree_lock(sch);
@@ -907,6 +947,9 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
907947

908948
q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
909949

950+
q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */
951+
q->horizon_drop = 1; /* by default, drop packets beyond horizon */
952+
910953
/* Default ce_threshold of 4294 seconds */
911954
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
912955

@@ -924,6 +967,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
924967
{
925968
struct fq_sched_data *q = qdisc_priv(sch);
926969
u64 ce_threshold = q->ce_threshold;
970+
u64 horizon = q->horizon;
927971
struct nlattr *opts;
928972

929973
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -933,6 +977,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
933977
/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
934978

935979
do_div(ce_threshold, NSEC_PER_USEC);
980+
do_div(horizon, NSEC_PER_USEC);
936981

937982
if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
938983
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
@@ -948,7 +993,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
948993
q->low_rate_threshold) ||
949994
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
950995
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
951-
nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack))
996+
nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
997+
nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
998+
nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
952999
goto nla_put_failure;
9531000

9541001
return nla_nest_end(skb, opts);
@@ -979,6 +1026,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
9791026
st.unthrottle_latency_ns = min_t(unsigned long,
9801027
q->unthrottle_latency_ns, ~0U);
9811028
st.ce_mark = q->stat_ce_mark;
1029+
st.horizon_drops = q->stat_horizon_drops;
1030+
st.horizon_caps = q->stat_horizon_caps;
9821031
sch_tree_unlock(sch);
9831032

9841033
return gnet_stats_copy_app(d, &st, sizeof(st));

0 commit comments

Comments
 (0)