Skip to content

Commit 34fe76a

Browse files
committed
Merge branch 'net-sched-fast-stats'
Eric Dumazet says: ==================== net: sched: faster stats gathering A while back, I sent one RFC patch using lockless stats gathering on 64bit arches. This patch series does it more cleanly, using a seqcount. Since qdisc/class stats are written at dequeue() time, we can ask the dequeue to change the seqcount, so that stats readers can avoid taking the root qdisc lock, and instead the typical read_seqcount_{begin|retry} guarded loop. This does not change fast path costs, as the seqcount increments are not more expensive than the bit manipulation, and allows readers to not freeze the fast path anymore. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 64151ae + edb09eb commit 34fe76a

File tree

29 files changed

+158
-85
lines changed

29 files changed

+158
-85
lines changed

Documentation/networking/gen_stats.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ struct mystruct {
2121
...
2222
};
2323

24-
Update statistics:
24+
Update statistics, in dequeue() methods only, (while owning qdisc->running)
2525
mystruct->tstats.packet++;
2626
mystruct->qstats.backlog += skb->pkt_len;
2727

drivers/net/bonding/bond_main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4610,6 +4610,7 @@ static int bond_check_params(struct bond_params *params)
46104610
static struct lock_class_key bonding_netdev_xmit_lock_key;
46114611
static struct lock_class_key bonding_netdev_addr_lock_key;
46124612
static struct lock_class_key bonding_tx_busylock_key;
4613+
static struct lock_class_key bonding_qdisc_running_key;
46134614

46144615
static void bond_set_lockdep_class_one(struct net_device *dev,
46154616
struct netdev_queue *txq,
@@ -4625,6 +4626,7 @@ static void bond_set_lockdep_class(struct net_device *dev)
46254626
&bonding_netdev_addr_lock_key);
46264627
netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL);
46274628
dev->qdisc_tx_busylock = &bonding_tx_busylock_key;
4629+
dev->qdisc_running_key = &bonding_qdisc_running_key;
46284630
}
46294631

46304632
/* Called from registration process */

drivers/net/ppp/ppp_generic.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,9 +1313,12 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64)
13131313
}
13141314

13151315
static struct lock_class_key ppp_tx_busylock;
1316+
static struct lock_class_key ppp_qdisc_running_key;
1317+
13161318
static int ppp_dev_init(struct net_device *dev)
13171319
{
13181320
dev->qdisc_tx_busylock = &ppp_tx_busylock;
1321+
dev->qdisc_running_key = &ppp_qdisc_running_key;
13191322
return 0;
13201323
}
13211324

drivers/net/team/team.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,6 +1577,7 @@ static const struct team_option team_options[] = {
15771577
static struct lock_class_key team_netdev_xmit_lock_key;
15781578
static struct lock_class_key team_netdev_addr_lock_key;
15791579
static struct lock_class_key team_tx_busylock_key;
1580+
static struct lock_class_key team_qdisc_running_key;
15801581

15811582
static void team_set_lockdep_class_one(struct net_device *dev,
15821583
struct netdev_queue *txq,
@@ -1590,6 +1591,7 @@ static void team_set_lockdep_class(struct net_device *dev)
15901591
lockdep_set_class(&dev->addr_list_lock, &team_netdev_addr_lock_key);
15911592
netdev_for_each_tx_queue(dev, team_set_lockdep_class_one, NULL);
15921593
dev->qdisc_tx_busylock = &team_tx_busylock_key;
1594+
dev->qdisc_running_key = &team_qdisc_running_key;
15931595
}
15941596

15951597
static int team_init(struct net_device *dev)

include/linux/netdevice.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1862,6 +1862,7 @@ struct net_device {
18621862
#endif
18631863
struct phy_device *phydev;
18641864
struct lock_class_key *qdisc_tx_busylock;
1865+
struct lock_class_key *qdisc_running_key;
18651866
bool proto_down;
18661867
};
18671868
#define to_net_dev(d) container_of(d, struct net_device, dev)

include/net/gen_stats.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
3333
spinlock_t *lock, struct gnet_dump *d,
3434
int padattr);
3535

36-
int gnet_stats_copy_basic(struct gnet_dump *d,
36+
int gnet_stats_copy_basic(const seqcount_t *running,
37+
struct gnet_dump *d,
3738
struct gnet_stats_basic_cpu __percpu *cpu,
3839
struct gnet_stats_basic_packed *b);
39-
void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
40+
void __gnet_stats_copy_basic(const seqcount_t *running,
41+
struct gnet_stats_basic_packed *bstats,
4042
struct gnet_stats_basic_cpu __percpu *cpu,
4143
struct gnet_stats_basic_packed *b);
4244
int gnet_stats_copy_rate_est(struct gnet_dump *d,
@@ -52,13 +54,15 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
5254
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
5355
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
5456
struct gnet_stats_rate_est64 *rate_est,
55-
spinlock_t *stats_lock, struct nlattr *opt);
57+
spinlock_t *stats_lock,
58+
seqcount_t *running, struct nlattr *opt);
5659
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
5760
struct gnet_stats_rate_est64 *rate_est);
5861
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
5962
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
6063
struct gnet_stats_rate_est64 *rate_est,
61-
spinlock_t *stats_lock, struct nlattr *opt);
64+
spinlock_t *stats_lock,
65+
seqcount_t *running, struct nlattr *opt);
6266
bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
6367
const struct gnet_stats_rate_est64 *rate_est);
6468
#endif

include/net/sch_generic.h

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,6 @@ enum qdisc_state_t {
2929
__QDISC_STATE_THROTTLED,
3030
};
3131

32-
/*
33-
* following bits are only changed while qdisc lock is held
34-
*/
35-
enum qdisc___state_t {
36-
__QDISC___STATE_RUNNING = 1,
37-
};
38-
3932
struct qdisc_size_table {
4033
struct rcu_head rcu;
4134
struct list_head list;
@@ -93,7 +86,7 @@ struct Qdisc {
9386
unsigned long state;
9487
struct sk_buff_head q;
9588
struct gnet_stats_basic_packed bstats;
96-
unsigned int __state;
89+
seqcount_t running;
9790
struct gnet_stats_queue qstats;
9891
struct rcu_head rcu_head;
9992
int padded;
@@ -104,20 +97,20 @@ struct Qdisc {
10497

10598
static inline bool qdisc_is_running(const struct Qdisc *qdisc)
10699
{
107-
return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false;
100+
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
108101
}
109102

110103
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
111104
{
112105
if (qdisc_is_running(qdisc))
113106
return false;
114-
qdisc->__state |= __QDISC___STATE_RUNNING;
107+
write_seqcount_begin(&qdisc->running);
115108
return true;
116109
}
117110

118111
static inline void qdisc_run_end(struct Qdisc *qdisc)
119112
{
120-
qdisc->__state &= ~__QDISC___STATE_RUNNING;
113+
write_seqcount_end(&qdisc->running);
121114
}
122115

123116
static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -321,6 +314,14 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
321314
return qdisc_lock(root);
322315
}
323316

317+
static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
318+
{
319+
struct Qdisc *root = qdisc_root_sleeping(qdisc);
320+
321+
ASSERT_RTNL();
322+
return &root->running;
323+
}
324+
324325
static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
325326
{
326327
return qdisc->dev_queue->dev;

net/bluetooth/6lowpan.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
629629

630630
static struct lock_class_key bt_tx_busylock;
631631
static struct lock_class_key bt_netdev_xmit_lock_key;
632+
static struct lock_class_key bt_qdisc_running_key;
632633

633634
static void bt_set_lockdep_class_one(struct net_device *dev,
634635
struct netdev_queue *txq,
@@ -641,6 +642,7 @@ static int bt_dev_init(struct net_device *dev)
641642
{
642643
netdev_for_each_tx_queue(dev, bt_set_lockdep_class_one, NULL);
643644
dev->qdisc_tx_busylock = &bt_tx_busylock;
645+
dev->qdisc_running_key = &bt_qdisc_running_key;
644646

645647
return 0;
646648
}

net/core/dev.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3075,7 +3075,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
30753075
/*
30763076
* Heuristic to force contended enqueues to serialize on a
30773077
* separate lock before trying to get qdisc main lock.
3078-
* This permits __QDISC___STATE_RUNNING owner to get the lock more
3078+
* This permits qdisc->running owner to get the lock more
30793079
* often and dequeue packets faster.
30803080
*/
30813081
contended = qdisc_is_running(q);

net/core/gen_estimator.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ struct gen_estimator
8484
struct gnet_stats_basic_packed *bstats;
8585
struct gnet_stats_rate_est64 *rate_est;
8686
spinlock_t *stats_lock;
87+
seqcount_t *running;
8788
int ewma_log;
8889
u32 last_packets;
8990
unsigned long avpps;
@@ -121,26 +122,28 @@ static void est_timer(unsigned long arg)
121122
unsigned long rate;
122123
u64 brate;
123124

124-
spin_lock(e->stats_lock);
125+
if (e->stats_lock)
126+
spin_lock(e->stats_lock);
125127
read_lock(&est_lock);
126128
if (e->bstats == NULL)
127129
goto skip;
128130

129-
__gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);
131+
__gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats);
130132

131133
brate = (b.bytes - e->last_bytes)<<(7 - idx);
132134
e->last_bytes = b.bytes;
133135
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
134-
e->rate_est->bps = (e->avbps+0xF)>>5;
136+
WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5);
135137

136138
rate = b.packets - e->last_packets;
137139
rate <<= (7 - idx);
138140
e->last_packets = b.packets;
139141
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
140-
e->rate_est->pps = (e->avpps + 0xF) >> 5;
142+
WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5);
141143
skip:
142144
read_unlock(&est_lock);
143-
spin_unlock(e->stats_lock);
145+
if (e->stats_lock)
146+
spin_unlock(e->stats_lock);
144147
}
145148

146149
if (!list_empty(&elist[idx].list))
@@ -194,6 +197,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
194197
* @cpu_bstats: bstats per cpu
195198
* @rate_est: rate estimator statistics
196199
* @stats_lock: statistics lock
200+
* @running: qdisc running seqcount
197201
* @opt: rate estimator configuration TLV
198202
*
199203
* Creates a new rate estimator with &bstats as source and &rate_est
@@ -209,6 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
209213
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
210214
struct gnet_stats_rate_est64 *rate_est,
211215
spinlock_t *stats_lock,
216+
seqcount_t *running,
212217
struct nlattr *opt)
213218
{
214219
struct gen_estimator *est;
@@ -226,12 +231,13 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
226231
if (est == NULL)
227232
return -ENOBUFS;
228233

229-
__gnet_stats_copy_basic(&b, cpu_bstats, bstats);
234+
__gnet_stats_copy_basic(running, &b, cpu_bstats, bstats);
230235

231236
idx = parm->interval + 2;
232237
est->bstats = bstats;
233238
est->rate_est = rate_est;
234239
est->stats_lock = stats_lock;
240+
est->running = running;
235241
est->ewma_log = parm->ewma_log;
236242
est->last_bytes = b.bytes;
237243
est->avbps = rate_est->bps<<5;
@@ -291,6 +297,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
291297
* @cpu_bstats: bstats per cpu
292298
* @rate_est: rate estimator statistics
293299
* @stats_lock: statistics lock
300+
* @running: qdisc running seqcount (might be NULL)
294301
* @opt: rate estimator configuration TLV
295302
*
296303
* Replaces the configuration of a rate estimator by calling
@@ -301,10 +308,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
301308
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
302309
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
303310
struct gnet_stats_rate_est64 *rate_est,
304-
spinlock_t *stats_lock, struct nlattr *opt)
311+
spinlock_t *stats_lock,
312+
seqcount_t *running, struct nlattr *opt)
305313
{
306314
gen_kill_estimator(bstats, rate_est);
307-
return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
315+
return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt);
308316
}
309317
EXPORT_SYMBOL(gen_replace_estimator);
310318

net/core/gen_stats.c

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,11 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
3232
return 0;
3333

3434
nla_put_failure:
35+
if (d->lock)
36+
spin_unlock_bh(d->lock);
3537
kfree(d->xstats);
3638
d->xstats = NULL;
3739
d->xstats_len = 0;
38-
spin_unlock_bh(d->lock);
3940
return -1;
4041
}
4142

@@ -65,15 +66,16 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
6566
{
6667
memset(d, 0, sizeof(*d));
6768

68-
spin_lock_bh(lock);
69-
d->lock = lock;
7069
if (type)
7170
d->tail = (struct nlattr *)skb_tail_pointer(skb);
7271
d->skb = skb;
7372
d->compat_tc_stats = tc_stats_type;
7473
d->compat_xstats = xstats_type;
7574
d->padattr = padattr;
76-
75+
if (lock) {
76+
d->lock = lock;
77+
spin_lock_bh(lock);
78+
}
7779
if (d->tail)
7880
return gnet_stats_copy(d, type, NULL, 0, padattr);
7981

@@ -126,16 +128,23 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
126128
}
127129

128130
void
129-
__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
131+
__gnet_stats_copy_basic(const seqcount_t *running,
132+
struct gnet_stats_basic_packed *bstats,
130133
struct gnet_stats_basic_cpu __percpu *cpu,
131134
struct gnet_stats_basic_packed *b)
132135
{
136+
unsigned int seq;
137+
133138
if (cpu) {
134139
__gnet_stats_copy_basic_cpu(bstats, cpu);
135-
} else {
140+
return;
141+
}
142+
do {
143+
if (running)
144+
seq = read_seqcount_begin(running);
136145
bstats->bytes = b->bytes;
137146
bstats->packets = b->packets;
138-
}
147+
} while (running && read_seqcount_retry(running, seq));
139148
}
140149
EXPORT_SYMBOL(__gnet_stats_copy_basic);
141150

@@ -152,13 +161,14 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
152161
* if the room in the socket buffer was not sufficient.
153162
*/
154163
int
155-
gnet_stats_copy_basic(struct gnet_dump *d,
164+
gnet_stats_copy_basic(const seqcount_t *running,
165+
struct gnet_dump *d,
156166
struct gnet_stats_basic_cpu __percpu *cpu,
157167
struct gnet_stats_basic_packed *b)
158168
{
159169
struct gnet_stats_basic_packed bstats = {0};
160170

161-
__gnet_stats_copy_basic(&bstats, cpu, b);
171+
__gnet_stats_copy_basic(running, &bstats, cpu, b);
162172

163173
if (d->compat_tc_stats) {
164174
d->tc_stats.bytes = bstats.bytes;
@@ -328,8 +338,9 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
328338
return 0;
329339

330340
err_out:
341+
if (d->lock)
342+
spin_unlock_bh(d->lock);
331343
d->xstats_len = 0;
332-
spin_unlock_bh(d->lock);
333344
return -1;
334345
}
335346
EXPORT_SYMBOL(gnet_stats_copy_app);
@@ -363,10 +374,11 @@ gnet_stats_finish_copy(struct gnet_dump *d)
363374
return -1;
364375
}
365376

377+
if (d->lock)
378+
spin_unlock_bh(d->lock);
366379
kfree(d->xstats);
367380
d->xstats = NULL;
368381
d->xstats_len = 0;
369-
spin_unlock_bh(d->lock);
370382
return 0;
371383
}
372384
EXPORT_SYMBOL(gnet_stats_finish_copy);

net/ieee802154/6lowpan/core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ static struct header_ops lowpan_header_ops = {
6060

6161
static struct lock_class_key lowpan_tx_busylock;
6262
static struct lock_class_key lowpan_netdev_xmit_lock_key;
63+
static struct lock_class_key lowpan_qdisc_running_key;
6364

6465
static void lowpan_set_lockdep_class_one(struct net_device *ldev,
6566
struct netdev_queue *txq,
@@ -73,6 +74,8 @@ static int lowpan_dev_init(struct net_device *ldev)
7374
{
7475
netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL);
7576
ldev->qdisc_tx_busylock = &lowpan_tx_busylock;
77+
ldev->qdisc_running_key = &lowpan_qdisc_running_key;
78+
7679
return 0;
7780
}
7881

0 commit comments

Comments
 (0)