Skip to content

Commit 44b9b6c

Browse files
committed
Merge branch 'net-sched-move-back-qlen-to-per-CPU-accounting'
Paolo Abeni says: ==================== net: sched: move back qlen to per CPU accounting The commit 46b1c18 ("net: sched: put back q.qlen into a single location") introduced some measurable regression in the contended scenarios for lock qdisc. As Eric suggested we could replace q.qlen access with calls to qdisc_is_empty() in the datapath and revert the above commit. The TC subsystem updates qdisc->is_empty in a somewhat loose way: notably 'is_empty' is set only when the qdisc dequeue() calls return a NULL ptr. That is, the invocation after the last packet is dequeued. The above is good enough for BYPASS implementation - the only downside is that we end up avoiding the optimization for a very small time-frame - but will break hard things when internal structures consistency for classful qdisc relies on child qdisc_is_empty(). A more strict 'is_empty' update adds a relevant complexity to its life-cycle, so this series takes a different approach: we allow lockless qdisc to switch from per CPU accounting to global stats accounting when the NOLOCK bit is cleared. Since most pieces of infrastructure are already in place, this requires very little changes to the pfifo_fast qdisc, and any later NOLOCK qdisc can hook there with little effort - no need to maintain two different implementations. The first 2 patches removes direct qlen access from non core TC code, the 3rd and 4th patches place and use the infrastructure to allow stats account switching and the 5th patch is the actual revert. v1 -> v2: - fixed build issues - more descriptive commit message for patch 5/5 ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 4c75be0 + 73eb628 commit 44b9b6c

File tree

5 files changed

+105
-71
lines changed

5 files changed

+105
-71
lines changed

include/net/sch_generic.h

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,7 @@ struct qdisc_size_table {
5252
struct qdisc_skb_head {
5353
struct sk_buff *head;
5454
struct sk_buff *tail;
55-
union {
56-
u32 qlen;
57-
atomic_t atomic_qlen;
58-
};
55+
__u32 qlen;
5956
spinlock_t lock;
6057
};
6158

@@ -146,9 +143,14 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
146143
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
147144
}
148145

146+
static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
147+
{
148+
return q->flags & TCQ_F_CPUSTATS;
149+
}
150+
149151
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
150152
{
151-
if (qdisc->flags & TCQ_F_NOLOCK)
153+
if (qdisc_is_percpu_stats(qdisc))
152154
return qdisc->empty;
153155
return !qdisc->q.qlen;
154156
}
@@ -481,19 +483,27 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
481483
BUILD_BUG_ON(sizeof(qcb->data) < sz);
482484
}
483485

486+
static inline int qdisc_qlen_cpu(const struct Qdisc *q)
487+
{
488+
return this_cpu_ptr(q->cpu_qstats)->qlen;
489+
}
490+
484491
static inline int qdisc_qlen(const struct Qdisc *q)
485492
{
486493
return q->q.qlen;
487494
}
488495

489-
static inline u32 qdisc_qlen_sum(const struct Qdisc *q)
496+
static inline int qdisc_qlen_sum(const struct Qdisc *q)
490497
{
491-
u32 qlen = q->qstats.qlen;
498+
__u32 qlen = q->qstats.qlen;
499+
int i;
492500

493-
if (q->flags & TCQ_F_NOLOCK)
494-
qlen += atomic_read(&q->q.atomic_qlen);
495-
else
501+
if (qdisc_is_percpu_stats(q)) {
502+
for_each_possible_cpu(i)
503+
qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
504+
} else {
496505
qlen += q->q.qlen;
506+
}
497507

498508
return qlen;
499509
}
@@ -747,7 +757,7 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev)
747757
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
748758
const struct Qdisc *q = rcu_dereference(txq->qdisc);
749759

750-
if (q->q.qlen) {
760+
if (!qdisc_is_empty(q)) {
751761
rcu_read_unlock();
752762
return false;
753763
}
@@ -817,11 +827,6 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
817827
return sch->enqueue(skb, sch, to_free);
818828
}
819829

820-
static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
821-
{
822-
return q->flags & TCQ_F_CPUSTATS;
823-
}
824-
825830
static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
826831
__u64 bytes, __u32 packets)
827832
{
@@ -889,14 +894,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
889894
this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
890895
}
891896

892-
static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch)
897+
static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
893898
{
894-
atomic_inc(&sch->q.atomic_qlen);
899+
this_cpu_inc(sch->cpu_qstats->qlen);
895900
}
896901

897-
static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch)
902+
static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
898903
{
899-
atomic_dec(&sch->q.atomic_qlen);
904+
this_cpu_dec(sch->cpu_qstats->qlen);
900905
}
901906

902907
static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
@@ -1106,15 +1111,46 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
11061111
return skb;
11071112
}
11081113

1114+
static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch,
1115+
struct sk_buff *skb)
1116+
{
1117+
if (qdisc_is_percpu_stats(sch)) {
1118+
qdisc_qstats_cpu_backlog_dec(sch, skb);
1119+
qdisc_bstats_cpu_update(sch, skb);
1120+
qdisc_qstats_cpu_qlen_dec(sch);
1121+
} else {
1122+
qdisc_qstats_backlog_dec(sch, skb);
1123+
qdisc_bstats_update(sch, skb);
1124+
sch->q.qlen--;
1125+
}
1126+
}
1127+
1128+
static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch,
1129+
unsigned int pkt_len)
1130+
{
1131+
if (qdisc_is_percpu_stats(sch)) {
1132+
qdisc_qstats_cpu_qlen_inc(sch);
1133+
this_cpu_add(sch->cpu_qstats->backlog, pkt_len);
1134+
} else {
1135+
sch->qstats.backlog += pkt_len;
1136+
sch->q.qlen++;
1137+
}
1138+
}
1139+
11091140
/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
11101141
static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
11111142
{
11121143
struct sk_buff *skb = skb_peek(&sch->gso_skb);
11131144

11141145
if (skb) {
11151146
skb = __skb_dequeue(&sch->gso_skb);
1116-
qdisc_qstats_backlog_dec(sch, skb);
1117-
sch->q.qlen--;
1147+
if (qdisc_is_percpu_stats(sch)) {
1148+
qdisc_qstats_cpu_backlog_dec(sch, skb);
1149+
qdisc_qstats_cpu_qlen_dec(sch);
1150+
} else {
1151+
qdisc_qstats_backlog_dec(sch, skb);
1152+
sch->q.qlen--;
1153+
}
11181154
} else {
11191155
skb = sch->dequeue(sch);
11201156
}

net/caif/caif_dev.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,15 +186,19 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt)
186186
goto noxoff;
187187

188188
if (likely(!netif_queue_stopped(caifd->netdev))) {
189+
struct Qdisc *sch;
190+
189191
/* If we run with a TX queue, check if the queue is too long*/
190192
txq = netdev_get_tx_queue(skb->dev, 0);
191-
qlen = qdisc_qlen(rcu_dereference_bh(txq->qdisc));
192-
193-
if (likely(qlen == 0))
193+
sch = rcu_dereference_bh(txq->qdisc);
194+
if (likely(qdisc_is_empty(sch)))
194195
goto noxoff;
195196

197+
/* can check for explicit qdisc len value only !NOLOCK,
198+
* always set flow off otherwise
199+
*/
196200
high = (caifd->netdev->tx_queue_len * q_high) / 100;
197-
if (likely(qlen < high))
201+
if (!(sch->flags & TCQ_F_NOLOCK) && likely(sch->q.qlen < high))
198202
goto noxoff;
199203
}
200204

net/core/gen_stats.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
291291
for_each_possible_cpu(i) {
292292
const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
293293

294+
qstats->qlen = 0;
294295
qstats->backlog += qcpu->backlog;
295296
qstats->drops += qcpu->drops;
296297
qstats->requeues += qcpu->requeues;
@@ -306,6 +307,7 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
306307
if (cpu) {
307308
__gnet_stats_copy_queue_cpu(qstats, cpu);
308309
} else {
310+
qstats->qlen = q->qlen;
309311
qstats->backlog = q->backlog;
310312
qstats->drops = q->drops;
311313
qstats->requeues = q->requeues;

net/sched/sch_api.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -998,6 +998,19 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
998998
qdisc_put(old);
999999
}
10001000

1001+
static void qdisc_clear_nolock(struct Qdisc *sch)
1002+
{
1003+
sch->flags &= ~TCQ_F_NOLOCK;
1004+
if (!(sch->flags & TCQ_F_CPUSTATS))
1005+
return;
1006+
1007+
free_percpu(sch->cpu_bstats);
1008+
free_percpu(sch->cpu_qstats);
1009+
sch->cpu_bstats = NULL;
1010+
sch->cpu_qstats = NULL;
1011+
sch->flags &= ~TCQ_F_CPUSTATS;
1012+
}
1013+
10011014
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
10021015
* to device "dev".
10031016
*
@@ -1076,7 +1089,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
10761089
/* Only support running class lockless if parent is lockless */
10771090
if (new && (new->flags & TCQ_F_NOLOCK) &&
10781091
parent && !(parent->flags & TCQ_F_NOLOCK))
1079-
new->flags &= ~TCQ_F_NOLOCK;
1092+
qdisc_clear_nolock(new);
10801093

10811094
if (!cops || !cops->graft)
10821095
return -EOPNOTSUPP;

net/sched/sch_generic.c

Lines changed: 23 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
6868
skb = __skb_dequeue(&q->skb_bad_txq);
6969
if (qdisc_is_percpu_stats(q)) {
7070
qdisc_qstats_cpu_backlog_dec(q, skb);
71-
qdisc_qstats_atomic_qlen_dec(q);
71+
qdisc_qstats_cpu_qlen_dec(q);
7272
} else {
7373
qdisc_qstats_backlog_dec(q, skb);
7474
q->q.qlen--;
@@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
108108

109109
if (qdisc_is_percpu_stats(q)) {
110110
qdisc_qstats_cpu_backlog_inc(q, skb);
111-
qdisc_qstats_atomic_qlen_inc(q);
111+
qdisc_qstats_cpu_qlen_inc(q);
112112
} else {
113113
qdisc_qstats_backlog_inc(q, skb);
114114
q->q.qlen++;
@@ -118,52 +118,36 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
118118
spin_unlock(lock);
119119
}
120120

121-
static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
121+
static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
122122
{
123-
while (skb) {
124-
struct sk_buff *next = skb->next;
125-
126-
__skb_queue_tail(&q->gso_skb, skb);
127-
q->qstats.requeues++;
128-
qdisc_qstats_backlog_inc(q, skb);
129-
q->q.qlen++; /* it's still part of the queue */
123+
spinlock_t *lock = NULL;
130124

131-
skb = next;
125+
if (q->flags & TCQ_F_NOLOCK) {
126+
lock = qdisc_lock(q);
127+
spin_lock(lock);
132128
}
133-
__netif_schedule(q);
134-
135-
return 0;
136-
}
137129

138-
static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
139-
{
140-
spinlock_t *lock = qdisc_lock(q);
141-
142-
spin_lock(lock);
143130
while (skb) {
144131
struct sk_buff *next = skb->next;
145132

146133
__skb_queue_tail(&q->gso_skb, skb);
147134

148-
qdisc_qstats_cpu_requeues_inc(q);
149-
qdisc_qstats_cpu_backlog_inc(q, skb);
150-
qdisc_qstats_atomic_qlen_inc(q);
135+
/* it's still part of the queue */
136+
if (qdisc_is_percpu_stats(q)) {
137+
qdisc_qstats_cpu_requeues_inc(q);
138+
qdisc_qstats_cpu_backlog_inc(q, skb);
139+
qdisc_qstats_cpu_qlen_inc(q);
140+
} else {
141+
q->qstats.requeues++;
142+
qdisc_qstats_backlog_inc(q, skb);
143+
q->q.qlen++;
144+
}
151145

152146
skb = next;
153147
}
154-
spin_unlock(lock);
155-
148+
if (lock)
149+
spin_unlock(lock);
156150
__netif_schedule(q);
157-
158-
return 0;
159-
}
160-
161-
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
162-
{
163-
if (q->flags & TCQ_F_NOLOCK)
164-
return dev_requeue_skb_locked(skb, q);
165-
else
166-
return __dev_requeue_skb(skb, q);
167151
}
168152

169153
static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -252,7 +236,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
252236
skb = __skb_dequeue(&q->gso_skb);
253237
if (qdisc_is_percpu_stats(q)) {
254238
qdisc_qstats_cpu_backlog_dec(q, skb);
255-
qdisc_qstats_atomic_qlen_dec(q);
239+
qdisc_qstats_cpu_qlen_dec(q);
256240
} else {
257241
qdisc_qstats_backlog_dec(q, skb);
258242
q->q.qlen--;
@@ -645,11 +629,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
645629
if (unlikely(err))
646630
return qdisc_drop_cpu(skb, qdisc, to_free);
647631

648-
qdisc_qstats_atomic_qlen_inc(qdisc);
649-
/* Note: skb can not be used after skb_array_produce(),
650-
* so we better not use qdisc_qstats_cpu_backlog_inc()
651-
*/
652-
this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
632+
qdisc_update_stats_at_enqueue(qdisc, pkt_len);
653633
return NET_XMIT_SUCCESS;
654634
}
655635

@@ -668,9 +648,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
668648
skb = __skb_array_consume(q);
669649
}
670650
if (likely(skb)) {
671-
qdisc_qstats_cpu_backlog_dec(qdisc, skb);
672-
qdisc_bstats_cpu_update(qdisc, skb);
673-
qdisc_qstats_atomic_qlen_dec(qdisc);
651+
qdisc_update_stats_at_dequeue(qdisc, skb);
674652
} else {
675653
qdisc->empty = true;
676654
}
@@ -716,6 +694,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
716694
struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
717695

718696
q->backlog = 0;
697+
q->qlen = 0;
719698
}
720699
}
721700

0 commit comments

Comments
 (0)