Skip to content

Commit 22e0f8b

Browse files
jrfastabdavem330
authored andcommitted
net: sched: make bstats per cpu and estimator RCU safe
In order to run qdisc's without locking statistics and estimators need to be handled correctly. To resolve bstats make the statistics per cpu. And because this is only needed for qdiscs that are running without locks which is not the case for most qdiscs in the near future only create percpu stats when qdiscs set the TCQ_F_CPUSTATS flag. Next because estimators use the bstats to calculate packets per second and bytes per second the estimator code paths are updated to use the per cpu statistics. Signed-off-by: John Fastabend <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 79cf79a commit 22e0f8b

19 files changed

+164
-51
lines changed

include/net/gen_stats.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
#include <linux/rtnetlink.h>
77
#include <linux/pkt_sched.h>
88

9+
struct gnet_stats_basic_cpu {
10+
struct gnet_stats_basic_packed bstats;
11+
struct u64_stats_sync syncp;
12+
};
13+
914
struct gnet_dump {
1015
spinlock_t * lock;
1116
struct sk_buff * skb;
@@ -27,7 +32,11 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
2732
spinlock_t *lock, struct gnet_dump *d);
2833

2934
int gnet_stats_copy_basic(struct gnet_dump *d,
35+
struct gnet_stats_basic_cpu __percpu *cpu,
3036
struct gnet_stats_basic_packed *b);
37+
void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
38+
struct gnet_stats_basic_cpu __percpu *cpu,
39+
struct gnet_stats_basic_packed *b);
3140
int gnet_stats_copy_rate_est(struct gnet_dump *d,
3241
const struct gnet_stats_basic_packed *b,
3342
struct gnet_stats_rate_est64 *r);
@@ -37,11 +46,13 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
3746
int gnet_stats_finish_copy(struct gnet_dump *d);
3847

3948
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
49+
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
4050
struct gnet_stats_rate_est64 *rate_est,
4151
spinlock_t *stats_lock, struct nlattr *opt);
4252
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
4353
struct gnet_stats_rate_est64 *rate_est);
4454
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
55+
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
4556
struct gnet_stats_rate_est64 *rate_est,
4657
spinlock_t *stats_lock, struct nlattr *opt);
4758
bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,

include/net/sch_generic.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/rcupdate.h>
77
#include <linux/pkt_sched.h>
88
#include <linux/pkt_cls.h>
9+
#include <linux/percpu.h>
910
#include <net/gen_stats.h>
1011
#include <net/rtnetlink.h>
1112

@@ -58,6 +59,7 @@ struct Qdisc {
5859
* multiqueue device.
5960
*/
6061
#define TCQ_F_WARN_NONWC (1 << 16)
62+
#define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */
6163
u32 limit;
6264
const struct Qdisc_ops *ops;
6365
struct qdisc_size_table __rcu *stab;
@@ -83,7 +85,10 @@ struct Qdisc {
8385
*/
8486
unsigned long state;
8587
struct sk_buff_head q;
86-
struct gnet_stats_basic_packed bstats;
88+
union {
89+
struct gnet_stats_basic_packed bstats;
90+
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
91+
} __packed;
8792
unsigned int __state;
8893
struct gnet_stats_queue qstats;
8994
struct rcu_head rcu_head;
@@ -487,6 +492,10 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
487492
return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
488493
}
489494

495+
static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
496+
{
497+
return q->flags & TCQ_F_CPUSTATS;
498+
}
490499

491500
static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
492501
const struct sk_buff *skb)
@@ -495,6 +504,17 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
495504
bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
496505
}
497506

507+
static inline void qdisc_bstats_update_cpu(struct Qdisc *sch,
508+
const struct sk_buff *skb)
509+
{
510+
struct gnet_stats_basic_cpu *bstats =
511+
this_cpu_ptr(sch->cpu_bstats);
512+
513+
u64_stats_update_begin(&bstats->syncp);
514+
bstats_update(&bstats->bstats, skb);
515+
u64_stats_update_end(&bstats->syncp);
516+
}
517+
498518
static inline void qdisc_bstats_update(struct Qdisc *sch,
499519
const struct sk_buff *skb)
500520
{

net/core/gen_estimator.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ struct gen_estimator
9191
u32 avpps;
9292
struct rcu_head e_rcu;
9393
struct rb_node node;
94+
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
95+
struct rcu_head head;
9496
};
9597

9698
struct gen_estimator_head
@@ -115,25 +117,24 @@ static void est_timer(unsigned long arg)
115117

116118
rcu_read_lock();
117119
list_for_each_entry_rcu(e, &elist[idx].list, list) {
118-
u64 nbytes;
120+
struct gnet_stats_basic_packed b = {0};
119121
u64 brate;
120-
u32 npackets;
121122
u32 rate;
122123

123124
spin_lock(e->stats_lock);
124125
read_lock(&est_lock);
125126
if (e->bstats == NULL)
126127
goto skip;
127128

128-
nbytes = e->bstats->bytes;
129-
npackets = e->bstats->packets;
130-
brate = (nbytes - e->last_bytes)<<(7 - idx);
131-
e->last_bytes = nbytes;
129+
__gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);
130+
131+
brate = (b.bytes - e->last_bytes)<<(7 - idx);
132+
e->last_bytes = b.bytes;
132133
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
133134
e->rate_est->bps = (e->avbps+0xF)>>5;
134135

135-
rate = (npackets - e->last_packets)<<(12 - idx);
136-
e->last_packets = npackets;
136+
rate = (b.packets - e->last_packets)<<(12 - idx);
137+
e->last_packets = b.packets;
137138
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
138139
e->rate_est->pps = (e->avpps+0x1FF)>>10;
139140
skip:
@@ -203,12 +204,14 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
203204
*
204205
*/
205206
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
207+
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
206208
struct gnet_stats_rate_est64 *rate_est,
207209
spinlock_t *stats_lock,
208210
struct nlattr *opt)
209211
{
210212
struct gen_estimator *est;
211213
struct gnet_estimator *parm = nla_data(opt);
214+
struct gnet_stats_basic_packed b = {0};
212215
int idx;
213216

214217
if (nla_len(opt) < sizeof(*parm))
@@ -221,15 +224,18 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
221224
if (est == NULL)
222225
return -ENOBUFS;
223226

227+
__gnet_stats_copy_basic(&b, cpu_bstats, bstats);
228+
224229
idx = parm->interval + 2;
225230
est->bstats = bstats;
226231
est->rate_est = rate_est;
227232
est->stats_lock = stats_lock;
228233
est->ewma_log = parm->ewma_log;
229-
est->last_bytes = bstats->bytes;
234+
est->last_bytes = b.bytes;
230235
est->avbps = rate_est->bps<<5;
231-
est->last_packets = bstats->packets;
236+
est->last_packets = b.packets;
232237
est->avpps = rate_est->pps<<10;
238+
est->cpu_bstats = cpu_bstats;
233239

234240
spin_lock_bh(&est_tree_lock);
235241
if (!elist[idx].timer.function) {
@@ -290,11 +296,12 @@ EXPORT_SYMBOL(gen_kill_estimator);
290296
* Returns 0 on success or a negative error code.
291297
*/
292298
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
299+
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
293300
struct gnet_stats_rate_est64 *rate_est,
294301
spinlock_t *stats_lock, struct nlattr *opt)
295302
{
296303
gen_kill_estimator(bstats, rate_est);
297-
return gen_new_estimator(bstats, rate_est, stats_lock, opt);
304+
return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
298305
}
299306
EXPORT_SYMBOL(gen_replace_estimator);
300307

net/core/gen_stats.c

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,43 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
9797
}
9898
EXPORT_SYMBOL(gnet_stats_start_copy);
9999

100+
static void
101+
__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
102+
struct gnet_stats_basic_cpu __percpu *cpu)
103+
{
104+
int i;
105+
106+
for_each_possible_cpu(i) {
107+
struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
108+
unsigned int start;
109+
__u64 bytes;
110+
__u32 packets;
111+
112+
do {
113+
start = u64_stats_fetch_begin_irq(&bcpu->syncp);
114+
bytes = bcpu->bstats.bytes;
115+
packets = bcpu->bstats.packets;
116+
} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
117+
118+
bstats->bytes += bcpu->bstats.bytes;
119+
bstats->packets += bcpu->bstats.packets;
120+
}
121+
}
122+
123+
void
124+
__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
125+
struct gnet_stats_basic_cpu __percpu *cpu,
126+
struct gnet_stats_basic_packed *b)
127+
{
128+
if (cpu) {
129+
__gnet_stats_copy_basic_cpu(bstats, cpu);
130+
} else {
131+
bstats->bytes = b->bytes;
132+
bstats->packets = b->packets;
133+
}
134+
}
135+
EXPORT_SYMBOL(__gnet_stats_copy_basic);
136+
100137
/**
101138
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
102139
* @d: dumping handle
@@ -109,19 +146,25 @@ EXPORT_SYMBOL(gnet_stats_start_copy);
109146
* if the room in the socket buffer was not sufficient.
110147
*/
111148
int
112-
gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
149+
gnet_stats_copy_basic(struct gnet_dump *d,
150+
struct gnet_stats_basic_cpu __percpu *cpu,
151+
struct gnet_stats_basic_packed *b)
113152
{
153+
struct gnet_stats_basic_packed bstats = {0};
154+
155+
__gnet_stats_copy_basic(&bstats, cpu, b);
156+
114157
if (d->compat_tc_stats) {
115-
d->tc_stats.bytes = b->bytes;
116-
d->tc_stats.packets = b->packets;
158+
d->tc_stats.bytes = bstats.bytes;
159+
d->tc_stats.packets = bstats.packets;
117160
}
118161

119162
if (d->tail) {
120163
struct gnet_stats_basic sb;
121164

122165
memset(&sb, 0, sizeof(sb));
123-
sb.bytes = b->bytes;
124-
sb.packets = b->packets;
166+
sb.bytes = bstats.bytes;
167+
sb.packets = bstats.packets;
125168
return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
126169
}
127170
return 0;

net/netfilter/xt_RATEEST.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
136136
cfg.est.interval = info->interval;
137137
cfg.est.ewma_log = info->ewma_log;
138138

139-
ret = gen_new_estimator(&est->bstats, &est->rstats,
139+
ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
140140
&est->lock, &cfg.opt);
141141
if (ret < 0)
142142
goto err2;

net/sched/act_api.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,8 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
252252
p->tcfc_tm.install = jiffies;
253253
p->tcfc_tm.lastuse = jiffies;
254254
if (est) {
255-
int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
255+
int err = gen_new_estimator(&p->tcfc_bstats, NULL,
256+
&p->tcfc_rate_est,
256257
&p->tcfc_lock, est);
257258
if (err) {
258259
kfree(p);
@@ -619,7 +620,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
619620
if (err < 0)
620621
goto errout;
621622

622-
if (gnet_stats_copy_basic(&d, &p->tcfc_bstats) < 0 ||
623+
if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 ||
623624
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
624625
&p->tcfc_rate_est) < 0 ||
625626
gnet_stats_copy_queue(&d, &p->tcfc_qstats) < 0)

net/sched/act_police.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
178178

179179
spin_lock_bh(&police->tcf_lock);
180180
if (est) {
181-
err = gen_replace_estimator(&police->tcf_bstats,
181+
err = gen_replace_estimator(&police->tcf_bstats, NULL,
182182
&police->tcf_rate_est,
183183
&police->tcf_lock, est);
184184
if (err)

net/sched/sch_api.c

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,13 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
942942
sch->handle = handle;
943943

944944
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
945+
if (qdisc_is_percpu_stats(sch)) {
946+
sch->cpu_bstats =
947+
alloc_percpu(struct gnet_stats_basic_cpu);
948+
if (!sch->cpu_bstats)
949+
goto err_out4;
950+
}
951+
945952
if (tca[TCA_STAB]) {
946953
stab = qdisc_get_stab(tca[TCA_STAB]);
947954
if (IS_ERR(stab)) {
@@ -964,8 +971,11 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
964971
else
965972
root_lock = qdisc_lock(sch);
966973

967-
err = gen_new_estimator(&sch->bstats, &sch->rate_est,
968-
root_lock, tca[TCA_RATE]);
974+
err = gen_new_estimator(&sch->bstats,
975+
sch->cpu_bstats,
976+
&sch->rate_est,
977+
root_lock,
978+
tca[TCA_RATE]);
969979
if (err)
970980
goto err_out4;
971981
}
@@ -984,6 +994,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
984994
return NULL;
985995

986996
err_out4:
997+
free_percpu(sch->cpu_bstats);
987998
/*
988999
* Any broken qdiscs that would require a ops->reset() here?
9891000
* The qdisc was never in action so it shouldn't be necessary.
@@ -1022,9 +1033,11 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
10221033
because change can't be undone. */
10231034
if (sch->flags & TCQ_F_MQROOT)
10241035
goto out;
1025-
gen_replace_estimator(&sch->bstats, &sch->rate_est,
1026-
qdisc_root_sleeping_lock(sch),
1027-
tca[TCA_RATE]);
1036+
gen_replace_estimator(&sch->bstats,
1037+
sch->cpu_bstats,
1038+
&sch->rate_est,
1039+
qdisc_root_sleeping_lock(sch),
1040+
tca[TCA_RATE]);
10281041
}
10291042
out:
10301043
return 0;
@@ -1299,6 +1312,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
12991312
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
13001313
u32 portid, u32 seq, u16 flags, int event)
13011314
{
1315+
struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
13021316
struct tcmsg *tcm;
13031317
struct nlmsghdr *nlh;
13041318
unsigned char *b = skb_tail_pointer(skb);
@@ -1334,7 +1348,10 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
13341348
if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
13351349
goto nla_put_failure;
13361350

1337-
if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1351+
if (qdisc_is_percpu_stats(q))
1352+
cpu_bstats = q->cpu_bstats;
1353+
1354+
if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 ||
13381355
gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
13391356
gnet_stats_copy_queue(&d, &q->qstats) < 0)
13401357
goto nla_put_failure;

net/sched/sch_atm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
639639

640640
flow->qstats.qlen = flow->q->q.qlen;
641641

642-
if (gnet_stats_copy_basic(d, &flow->bstats) < 0 ||
642+
if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 ||
643643
gnet_stats_copy_queue(d, &flow->qstats) < 0)
644644
return -1;
645645

0 commit comments

Comments
 (0)