Skip to content

Commit 585d763

Browse files
vcgomesJeff Kirsher
authored andcommitted
net/sched: Introduce Credit Based Shaper (CBS) qdisc
This queueing discipline implements the shaper algorithm defined by the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L. It's primary usage is to apply some bandwidth reservation to user defined traffic classes, which are mapped to different queues via the mqprio qdisc. Only a simple software implementation is added for now. Signed-off-by: Vinicius Costa Gomes <[email protected]> Signed-off-by: Jesus Sanchez-Palencia <[email protected]> Tested-by: Henrik Austad <[email protected]> Signed-off-by: Jeff Kirsher <[email protected]>
1 parent 0f7787b commit 585d763

File tree

4 files changed

+324
-0
lines changed

4 files changed

+324
-0
lines changed

include/uapi/linux/pkt_sched.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,4 +904,23 @@ struct tc_pie_xstats {
904904
__u32 maxq; /* maximum queue size */
905905
__u32 ecn_mark; /* packets marked with ecn*/
906906
};
907+
908+
/* CBS */
909+
struct tc_cbs_qopt {
910+
__u8 offload;
911+
__u8 _pad[3];
912+
__s32 hicredit;
913+
__s32 locredit;
914+
__s32 idleslope;
915+
__s32 sendslope;
916+
};
917+
918+
enum {
919+
TCA_CBS_UNSPEC,
920+
TCA_CBS_PARMS,
921+
__TCA_CBS_MAX,
922+
};
923+
924+
#define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
925+
907926
#endif

net/sched/Kconfig

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,17 @@ config NET_SCH_TBF
172172
To compile this code as a module, choose M here: the
173173
module will be called sch_tbf.
174174

175+
config NET_SCH_CBS
176+
tristate "Credit Based Shaper (CBS)"
177+
---help---
178+
Say Y here if you want to use the Credit Based Shaper (CBS) packet
179+
scheduling algorithm.
180+
181+
See the top of <file:net/sched/sch_cbs.c> for more details.
182+
183+
To compile this code as a module, choose M here: the
184+
module will be called sch_cbs.
185+
175186
config NET_SCH_GRED
176187
tristate "Generic Random Early Detection (GRED)"
177188
---help---

net/sched/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
5252
obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
5353
obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
5454
obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
55+
obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
5556

5657
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
5758
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o

net/sched/sch_cbs.c

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
/*
2+
* net/sched/sch_cbs.c Credit Based Shaper
3+
*
4+
* This program is free software; you can redistribute it and/or
5+
* modify it under the terms of the GNU General Public License
6+
* as published by the Free Software Foundation; either version
7+
* 2 of the License, or (at your option) any later version.
8+
*
9+
* Authors: Vinicius Costa Gomes <[email protected]>
10+
*
11+
*/
12+
13+
/* Credit Based Shaper (CBS)
14+
* =========================
15+
*
16+
* This is a simple rate-limiting shaper aimed at TSN applications on
17+
* systems with known traffic workloads.
18+
*
19+
* Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
20+
* Section 8.6.8.2, and explained in more detail in the Annex L of the
21+
* same specification.
22+
*
23+
* There are four tunables to be considered:
24+
*
25+
* 'idleslope': Idleslope is the rate of credits that is
26+
* accumulated (in kilobits per second) when there is at least
27+
* one packet waiting for transmission. Packets are transmitted
28+
* when the current value of credits is equal or greater than
29+
* zero. When there is no packet to be transmitted the amount of
30+
* credits is set to zero. This is the main tunable of the CBS
31+
* algorithm.
32+
*
33+
* 'sendslope':
34+
* Sendslope is the rate of credits that is depleted (it should be a
35+
* negative number of kilobits per second) when a transmission is
36+
* ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
37+
* 8.6.8.2 item g):
38+
*
39+
* sendslope = idleslope - port_transmit_rate
40+
*
41+
* 'hicredit': Hicredit defines the maximum amount of credits (in
42+
* bytes) that can be accumulated. Hicredit depends on the
43+
* characteristics of interfering traffic,
44+
* 'max_interference_size' is the maximum size of any burst of
45+
* traffic that can delay the transmission of a frame that is
46+
* available for transmission for this traffic class, (IEEE
47+
* 802.1Q-2014 Annex L, Equation L-3):
48+
*
49+
* hicredit = max_interference_size * (idleslope / port_transmit_rate)
50+
*
51+
* 'locredit': Locredit is the minimum amount of credits that can
52+
* be reached. It is a function of the traffic flowing through
53+
* this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
54+
*
55+
* locredit = max_frame_size * (sendslope / port_transmit_rate)
56+
*/
57+
58+
#include <linux/module.h>
59+
#include <linux/types.h>
60+
#include <linux/kernel.h>
61+
#include <linux/string.h>
62+
#include <linux/errno.h>
63+
#include <linux/skbuff.h>
64+
#include <net/netlink.h>
65+
#include <net/sch_generic.h>
66+
#include <net/pkt_sched.h>
67+
68+
#define BYTES_PER_KBIT (1000LL / 8)
69+
70+
struct cbs_sched_data {
71+
s64 port_rate; /* in bytes/s */
72+
s64 last; /* timestamp in ns */
73+
s64 credits; /* in bytes */
74+
s32 locredit; /* in bytes */
75+
s32 hicredit; /* in bytes */
76+
s64 sendslope; /* in bytes/s */
77+
s64 idleslope; /* in bytes/s */
78+
struct qdisc_watchdog watchdog;
79+
int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
80+
struct sk_buff *(*dequeue)(struct Qdisc *sch);
81+
};
82+
83+
static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
84+
{
85+
struct cbs_sched_data *q = qdisc_priv(sch);
86+
87+
if (sch->q.qlen == 0 && q->credits > 0) {
88+
/* We need to stop accumulating credits when there's
89+
* no enqueued packets and q->credits is positive.
90+
*/
91+
q->credits = 0;
92+
q->last = ktime_get_ns();
93+
}
94+
95+
return qdisc_enqueue_tail(skb, sch);
96+
}
97+
98+
static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
99+
struct sk_buff **to_free)
100+
{
101+
struct cbs_sched_data *q = qdisc_priv(sch);
102+
103+
return q->enqueue(skb, sch);
104+
}
105+
106+
/* timediff is in ns, slope is in bytes/s */
107+
static s64 timediff_to_credits(s64 timediff, s64 slope)
108+
{
109+
return div64_s64(timediff * slope, NSEC_PER_SEC);
110+
}
111+
112+
static s64 delay_from_credits(s64 credits, s64 slope)
113+
{
114+
if (unlikely(slope == 0))
115+
return S64_MAX;
116+
117+
return div64_s64(-credits * NSEC_PER_SEC, slope);
118+
}
119+
120+
static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
121+
{
122+
if (unlikely(port_rate == 0))
123+
return S64_MAX;
124+
125+
return div64_s64(len * slope, port_rate);
126+
}
127+
128+
static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
129+
{
130+
struct cbs_sched_data *q = qdisc_priv(sch);
131+
s64 now = ktime_get_ns();
132+
struct sk_buff *skb;
133+
s64 credits;
134+
int len;
135+
136+
if (q->credits < 0) {
137+
credits = timediff_to_credits(now - q->last, q->idleslope);
138+
139+
credits = q->credits + credits;
140+
q->credits = min_t(s64, credits, q->hicredit);
141+
142+
if (q->credits < 0) {
143+
s64 delay;
144+
145+
delay = delay_from_credits(q->credits, q->idleslope);
146+
qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
147+
148+
q->last = now;
149+
150+
return NULL;
151+
}
152+
}
153+
154+
skb = qdisc_dequeue_head(sch);
155+
if (!skb)
156+
return NULL;
157+
158+
len = qdisc_pkt_len(skb);
159+
160+
/* As sendslope is a negative number, this will decrease the
161+
* amount of q->credits.
162+
*/
163+
credits = credits_from_len(len, q->sendslope, q->port_rate);
164+
credits += q->credits;
165+
166+
q->credits = max_t(s64, credits, q->locredit);
167+
q->last = now;
168+
169+
return skb;
170+
}
171+
172+
static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
173+
{
174+
struct cbs_sched_data *q = qdisc_priv(sch);
175+
176+
return q->dequeue(sch);
177+
}
178+
179+
static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
180+
[TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
181+
};
182+
183+
static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
184+
{
185+
struct cbs_sched_data *q = qdisc_priv(sch);
186+
struct net_device *dev = qdisc_dev(sch);
187+
struct nlattr *tb[TCA_CBS_MAX + 1];
188+
struct ethtool_link_ksettings ecmd;
189+
struct tc_cbs_qopt *qopt;
190+
s64 link_speed;
191+
int err;
192+
193+
err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
194+
if (err < 0)
195+
return err;
196+
197+
if (!tb[TCA_CBS_PARMS])
198+
return -EINVAL;
199+
200+
qopt = nla_data(tb[TCA_CBS_PARMS]);
201+
202+
if (qopt->offload)
203+
return -EOPNOTSUPP;
204+
205+
if (!__ethtool_get_link_ksettings(dev, &ecmd))
206+
link_speed = ecmd.base.speed;
207+
else
208+
link_speed = SPEED_1000;
209+
210+
q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
211+
212+
q->enqueue = cbs_enqueue_soft;
213+
q->dequeue = cbs_dequeue_soft;
214+
215+
q->hicredit = qopt->hicredit;
216+
q->locredit = qopt->locredit;
217+
q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
218+
q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
219+
220+
return 0;
221+
}
222+
223+
static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
224+
{
225+
struct cbs_sched_data *q = qdisc_priv(sch);
226+
227+
if (!opt)
228+
return -EINVAL;
229+
230+
qdisc_watchdog_init(&q->watchdog, sch);
231+
232+
return cbs_change(sch, opt);
233+
}
234+
235+
static void cbs_destroy(struct Qdisc *sch)
236+
{
237+
struct cbs_sched_data *q = qdisc_priv(sch);
238+
239+
qdisc_watchdog_cancel(&q->watchdog);
240+
}
241+
242+
static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
243+
{
244+
struct cbs_sched_data *q = qdisc_priv(sch);
245+
struct tc_cbs_qopt opt = { };
246+
struct nlattr *nest;
247+
248+
nest = nla_nest_start(skb, TCA_OPTIONS);
249+
if (!nest)
250+
goto nla_put_failure;
251+
252+
opt.hicredit = q->hicredit;
253+
opt.locredit = q->locredit;
254+
opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
255+
opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
256+
opt.offload = 0;
257+
258+
if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
259+
goto nla_put_failure;
260+
261+
return nla_nest_end(skb, nest);
262+
263+
nla_put_failure:
264+
nla_nest_cancel(skb, nest);
265+
return -1;
266+
}
267+
268+
static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
269+
.id = "cbs",
270+
.priv_size = sizeof(struct cbs_sched_data),
271+
.enqueue = cbs_enqueue,
272+
.dequeue = cbs_dequeue,
273+
.peek = qdisc_peek_dequeued,
274+
.init = cbs_init,
275+
.reset = qdisc_reset_queue,
276+
.destroy = cbs_destroy,
277+
.change = cbs_change,
278+
.dump = cbs_dump,
279+
.owner = THIS_MODULE,
280+
};
281+
282+
static int __init cbs_module_init(void)
283+
{
284+
return register_qdisc(&cbs_qdisc_ops);
285+
}
286+
287+
static void __exit cbs_module_exit(void)
288+
{
289+
unregister_qdisc(&cbs_qdisc_ops);
290+
}
291+
module_init(cbs_module_init)
292+
module_exit(cbs_module_exit)
293+
MODULE_LICENSE("GPL");

0 commit comments

Comments
 (0)