Skip to content

Commit cec2975

Browse files
gautamramkdavem330
authored andcommitted
net: sched: pie: enable timestamp based delay calculation
RFC 8033 suggests an alternative approach to calculate the queue delay in PIE by using a timestamp on every enqueued packet. This patch adds an implementation of that approach and sets it as the default method to calculate queue delay. The previous method (based on Little's law) to calculate queue delay is set as optional. Signed-off-by: Gautam Ramakrishnan <[email protected]> Signed-off-by: Leslie Monis <[email protected]> Signed-off-by: Mohit P. Tahiliani <[email protected]> Acked-by: Dave Taht <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent f01b437 commit cec2975

File tree

2 files changed

+113
-29
lines changed

2 files changed

+113
-29
lines changed

include/uapi/linux/pkt_sched.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -950,19 +950,25 @@ enum {
950950
TCA_PIE_BETA,
951951
TCA_PIE_ECN,
952952
TCA_PIE_BYTEMODE,
953+
TCA_PIE_DQ_RATE_ESTIMATOR,
953954
__TCA_PIE_MAX
954955
};
955956
#define TCA_PIE_MAX (__TCA_PIE_MAX - 1)
956957

957958
struct tc_pie_xstats {
958-
__u64 prob; /* current probability */
959-
__u32 delay; /* current delay in ms */
960-
__u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */
961-
__u32 packets_in; /* total number of packets enqueued */
962-
__u32 dropped; /* packets dropped due to pie_action */
963-
__u32 overlimit; /* dropped due to lack of space in queue */
964-
__u32 maxq; /* maximum queue size */
965-
__u32 ecn_mark; /* packets marked with ecn*/
959+
__u64 prob; /* current probability */
960+
__u32 delay; /* current delay in ms */
961+
__u32 avg_dq_rate; /* current average dq_rate in
962+
* bits/pie_time
963+
*/
964+
__u32 dq_rate_estimating; /* is avg_dq_rate being calculated? */
965+
__u32 packets_in; /* total number of packets enqueued */
966+
__u32 dropped; /* packets dropped due to pie_action */
967+
__u32 overlimit; /* dropped due to lack of space
968+
* in queue
969+
*/
970+
__u32 maxq; /* maximum queue size */
971+
__u32 ecn_mark; /* packets marked with ecn*/
966972
};
967973

968974
/* CBS */

net/sched/sch_pie.c

Lines changed: 99 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#define QUEUE_THRESHOLD 16384
2424
#define DQCOUNT_INVALID -1
25+
#define DTIME_INVALID 0xffffffffffffffff
2526
#define MAX_PROB 0xffffffffffffffff
2627
#define PIE_SCALE 8
2728

@@ -34,6 +35,7 @@ struct pie_params {
3435
u32 beta; /* and are used for shift relative to 1 */
3536
bool ecn; /* true if ecn is enabled */
3637
bool bytemode; /* to scale drop early prob based on pkt size */
38+
u8 dq_rate_estimator; /* to calculate delay using Little's law */
3739
};
3840

3941
/* variables used */
@@ -77,11 +79,34 @@ static void pie_params_init(struct pie_params *params)
7779
params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */
7880
params->ecn = false;
7981
params->bytemode = false;
82+
params->dq_rate_estimator = false;
83+
}
84+
85+
/* private skb vars */
86+
struct pie_skb_cb {
87+
psched_time_t enqueue_time;
88+
};
89+
90+
static struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
91+
{
92+
qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb));
93+
return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data;
94+
}
95+
96+
static psched_time_t pie_get_enqueue_time(const struct sk_buff *skb)
97+
{
98+
return get_pie_cb(skb)->enqueue_time;
99+
}
100+
101+
static void pie_set_enqueue_time(struct sk_buff *skb)
102+
{
103+
get_pie_cb(skb)->enqueue_time = psched_get_time();
80104
}
81105

82106
static void pie_vars_init(struct pie_vars *vars)
83107
{
84108
vars->dq_count = DQCOUNT_INVALID;
109+
vars->dq_tstamp = DTIME_INVALID;
85110
vars->accu_prob = 0;
86111
vars->avg_dq_rate = 0;
87112
/* default of 150 ms in pschedtime */
@@ -172,6 +197,10 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
172197

173198
/* we can enqueue the packet */
174199
if (enqueue) {
200+
/* Set enqueue time only when dq_rate_estimator is disabled. */
201+
if (!q->params.dq_rate_estimator)
202+
pie_set_enqueue_time(skb);
203+
175204
q->stats.packets_in++;
176205
if (qdisc_qlen(sch) > q->stats.maxq)
177206
q->stats.maxq = qdisc_qlen(sch);
@@ -194,6 +223,7 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
194223
[TCA_PIE_BETA] = {.type = NLA_U32},
195224
[TCA_PIE_ECN] = {.type = NLA_U32},
196225
[TCA_PIE_BYTEMODE] = {.type = NLA_U32},
226+
[TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
197227
};
198228

199229
static int pie_change(struct Qdisc *sch, struct nlattr *opt,
@@ -247,6 +277,10 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
247277
if (tb[TCA_PIE_BYTEMODE])
248278
q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
249279

280+
if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
281+
q->params.dq_rate_estimator =
282+
nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);
283+
250284
/* Drop excess packets if new limit is lower */
251285
qlen = sch->q.qlen;
252286
while (sch->q.qlen > sch->limit) {
@@ -266,6 +300,28 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
266300
{
267301
struct pie_sched_data *q = qdisc_priv(sch);
268302
int qlen = sch->qstats.backlog; /* current queue size in bytes */
303+
psched_time_t now = psched_get_time();
304+
u32 dtime = 0;
305+
306+
/* If dq_rate_estimator is disabled, calculate qdelay using the
307+
* packet timestamp.
308+
*/
309+
if (!q->params.dq_rate_estimator) {
310+
q->vars.qdelay = now - pie_get_enqueue_time(skb);
311+
312+
if (q->vars.dq_tstamp != DTIME_INVALID)
313+
dtime = now - q->vars.dq_tstamp;
314+
315+
q->vars.dq_tstamp = now;
316+
317+
if (qlen == 0)
318+
q->vars.qdelay = 0;
319+
320+
if (dtime == 0)
321+
return;
322+
323+
goto burst_allowance_reduction;
324+
}
269325

270326
/* If current queue is about 10 packets or more and dq_count is unset
271327
* we have enough packets to calculate the drain rate. Save
@@ -289,10 +345,10 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
289345
q->vars.dq_count += skb->len;
290346

291347
if (q->vars.dq_count >= QUEUE_THRESHOLD) {
292-
psched_time_t now = psched_get_time();
293-
u32 dtime = now - q->vars.dq_tstamp;
294348
u32 count = q->vars.dq_count << PIE_SCALE;
295349

350+
dtime = now - q->vars.dq_tstamp;
351+
296352
if (dtime == 0)
297353
return;
298354

@@ -317,34 +373,45 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
317373
q->vars.dq_tstamp = psched_get_time();
318374
}
319375

320-
if (q->vars.burst_time > 0) {
321-
if (q->vars.burst_time > dtime)
322-
q->vars.burst_time -= dtime;
323-
else
324-
q->vars.burst_time = 0;
325-
}
376+
goto burst_allowance_reduction;
326377
}
327378
}
379+
380+
return;
381+
382+
burst_allowance_reduction:
383+
if (q->vars.burst_time > 0) {
384+
if (q->vars.burst_time > dtime)
385+
q->vars.burst_time -= dtime;
386+
else
387+
q->vars.burst_time = 0;
388+
}
328389
}
329390

330391
static void calculate_probability(struct Qdisc *sch)
331392
{
332393
struct pie_sched_data *q = qdisc_priv(sch);
333394
u32 qlen = sch->qstats.backlog; /* queue size in bytes */
334395
psched_time_t qdelay = 0; /* in pschedtime */
335-
psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
396+
psched_time_t qdelay_old = 0; /* in pschedtime */
336397
s64 delta = 0; /* determines the change in probability */
337398
u64 oldprob;
338399
u64 alpha, beta;
339400
u32 power;
340401
bool update_prob = true;
341402

342-
q->vars.qdelay_old = q->vars.qdelay;
403+
if (q->params.dq_rate_estimator) {
404+
qdelay_old = q->vars.qdelay;
405+
q->vars.qdelay_old = q->vars.qdelay;
343406

344-
if (q->vars.avg_dq_rate > 0)
345-
qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
346-
else
347-
qdelay = 0;
407+
if (q->vars.avg_dq_rate > 0)
408+
qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
409+
else
410+
qdelay = 0;
411+
} else {
412+
qdelay = q->vars.qdelay;
413+
qdelay_old = q->vars.qdelay_old;
414+
}
348415

349416
/* If qdelay is zero and qlen is not, it means qlen is very small, less
350417
* than dequeue_rate, so we do not update probabilty in this round
@@ -430,14 +497,18 @@ static void calculate_probability(struct Qdisc *sch)
430497
/* We restart the measurement cycle if the following conditions are met
431498
* 1. If the delay has been low for 2 consecutive Tupdate periods
432499
* 2. Calculated drop probability is zero
433-
* 3. We have atleast one estimate for the avg_dq_rate ie.,
434-
* is a non-zero value
500+
* 3. If average dq_rate_estimator is enabled, we have atleast one
501+
* estimate for the avg_dq_rate ie., is a non-zero value
435502
*/
436503
if ((q->vars.qdelay < q->params.target / 2) &&
437504
(q->vars.qdelay_old < q->params.target / 2) &&
438505
q->vars.prob == 0 &&
439-
q->vars.avg_dq_rate > 0)
506+
(!q->params.dq_rate_estimator || q->vars.avg_dq_rate > 0)) {
440507
pie_vars_init(&q->vars);
508+
}
509+
510+
if (!q->params.dq_rate_estimator)
511+
q->vars.qdelay_old = qdelay;
441512
}
442513

443514
static void pie_timer(struct timer_list *t)
@@ -497,7 +568,9 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
497568
nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
498569
nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
499570
nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
500-
nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode))
571+
nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
572+
nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
573+
q->params.dq_rate_estimator))
501574
goto nla_put_failure;
502575

503576
return nla_nest_end(skb, opts);
@@ -514,16 +587,21 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
514587
.prob = q->vars.prob,
515588
.delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
516589
NSEC_PER_USEC,
517-
/* unscale and return dq_rate in bytes per sec */
518-
.avg_dq_rate = q->vars.avg_dq_rate *
519-
(PSCHED_TICKS_PER_SEC) >> PIE_SCALE,
520590
.packets_in = q->stats.packets_in,
521591
.overlimit = q->stats.overlimit,
522592
.maxq = q->stats.maxq,
523593
.dropped = q->stats.dropped,
524594
.ecn_mark = q->stats.ecn_mark,
525595
};
526596

597+
/* avg_dq_rate is only valid if dq_rate_estimator is enabled */
598+
st.dq_rate_estimating = q->params.dq_rate_estimator;
599+
600+
/* unscale and return dq_rate in bytes per sec */
601+
if (q->params.dq_rate_estimator)
602+
st.avg_dq_rate = q->vars.avg_dq_rate *
603+
(PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
604+
527605
return gnet_stats_copy_app(d, &st, sizeof(st));
528606
}
529607

0 commit comments

Comments
 (0)