Skip to content

Commit 70e57d5

Browse files
jrfastabdavem330
authored andcommitted
net: sched: use skb list for skb_bad_tx
Similar to how gso is handled use skb list for skb_bad_tx this is required with lockless qdiscs because we may have multiple cores attempting to push skbs into skb_bad_tx concurrently Signed-off-by: John Fastabend <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 7bbde83 commit 70e57d5

File tree

2 files changed

+87
-21
lines changed

2 files changed

+87
-21
lines changed

include/net/sch_generic.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ struct Qdisc {
9595
struct gnet_stats_queue qstats;
9696
unsigned long state;
9797
struct Qdisc *next_sched;
98-
struct sk_buff *skb_bad_txq;
98+
struct sk_buff_head skb_bad_txq;
9999
int padded;
100100
refcount_t refcnt;
101101

net/sched/sch_generic.c

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,68 @@ EXPORT_SYMBOL(default_qdisc_ops);
4545
* - ingress filtering is also serialized via qdisc root lock
4646
* - updates to tree and tree walking are only done under the rtnl mutex.
4747
*/
48+
49+
static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
50+
{
51+
const struct netdev_queue *txq = q->dev_queue;
52+
spinlock_t *lock = NULL;
53+
struct sk_buff *skb;
54+
55+
if (q->flags & TCQ_F_NOLOCK) {
56+
lock = qdisc_lock(q);
57+
spin_lock(lock);
58+
}
59+
60+
skb = skb_peek(&q->skb_bad_txq);
61+
if (skb) {
62+
/* check the reason of requeuing without tx lock first */
63+
txq = skb_get_tx_queue(txq->dev, skb);
64+
if (!netif_xmit_frozen_or_stopped(txq)) {
65+
skb = __skb_dequeue(&q->skb_bad_txq);
66+
if (qdisc_is_percpu_stats(q)) {
67+
qdisc_qstats_cpu_backlog_dec(q, skb);
68+
qdisc_qstats_cpu_qlen_dec(q);
69+
} else {
70+
qdisc_qstats_backlog_dec(q, skb);
71+
q->q.qlen--;
72+
}
73+
} else {
74+
skb = NULL;
75+
}
76+
}
77+
78+
if (lock)
79+
spin_unlock(lock);
80+
81+
return skb;
82+
}
83+
84+
static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
85+
{
86+
struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
87+
88+
if (unlikely(skb))
89+
skb = __skb_dequeue_bad_txq(q);
90+
91+
return skb;
92+
}
93+
94+
static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
95+
struct sk_buff *skb)
96+
{
97+
spinlock_t *lock = NULL;
98+
99+
if (q->flags & TCQ_F_NOLOCK) {
100+
lock = qdisc_lock(q);
101+
spin_lock(lock);
102+
}
103+
104+
__skb_queue_tail(&q->skb_bad_txq, skb);
105+
106+
if (lock)
107+
spin_unlock(lock);
108+
}
109+
48110
static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
49111
{
50112
__skb_queue_head(&q->gso_skb, skb);
@@ -117,9 +179,15 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
117179
if (!nskb)
118180
break;
119181
if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
120-
q->skb_bad_txq = nskb;
121-
qdisc_qstats_backlog_inc(q, nskb);
122-
q->q.qlen++;
182+
qdisc_enqueue_skb_bad_txq(q, nskb);
183+
184+
if (qdisc_is_percpu_stats(q)) {
185+
qdisc_qstats_cpu_backlog_inc(q, nskb);
186+
qdisc_qstats_cpu_qlen_inc(q);
187+
} else {
188+
qdisc_qstats_backlog_inc(q, nskb);
189+
q->q.qlen++;
190+
}
123191
break;
124192
}
125193
skb->next = nskb;
@@ -180,19 +248,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
180248
}
181249
validate:
182250
*validate = true;
183-
skb = q->skb_bad_txq;
184-
if (unlikely(skb)) {
185-
/* check the reason of requeuing without tx lock first */
186-
txq = skb_get_tx_queue(txq->dev, skb);
187-
if (!netif_xmit_frozen_or_stopped(txq)) {
188-
q->skb_bad_txq = NULL;
189-
qdisc_qstats_backlog_dec(q, skb);
190-
q->q.qlen--;
191-
goto bulk;
192-
}
193-
skb = NULL;
194-
goto trace;
195-
}
251+
skb = qdisc_dequeue_skb_bad_txq(q);
252+
if (unlikely(skb))
253+
goto bulk;
196254
if (!(q->flags & TCQ_F_ONETXQUEUE) ||
197255
!netif_xmit_frozen_or_stopped(txq))
198256
skb = q->dequeue(q);
@@ -680,6 +738,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
680738
sch->padded = (char *) sch - (char *) p;
681739
}
682740
__skb_queue_head_init(&sch->gso_skb);
741+
__skb_queue_head_init(&sch->skb_bad_txq);
683742
qdisc_skb_head_init(&sch->q);
684743
spin_lock_init(&sch->q.lock);
685744

@@ -753,14 +812,16 @@ void qdisc_reset(struct Qdisc *qdisc)
753812
if (ops->reset)
754813
ops->reset(qdisc);
755814

756-
kfree_skb(qdisc->skb_bad_txq);
757-
qdisc->skb_bad_txq = NULL;
758-
759815
skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
760816
__skb_unlink(skb, &qdisc->gso_skb);
761817
kfree_skb_list(skb);
762818
}
763819

820+
skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
821+
__skb_unlink(skb, &qdisc->skb_bad_txq);
822+
kfree_skb_list(skb);
823+
}
824+
764825
qdisc->q.qlen = 0;
765826
qdisc->qstats.backlog = 0;
766827
}
@@ -804,7 +865,11 @@ void qdisc_destroy(struct Qdisc *qdisc)
804865
kfree_skb_list(skb);
805866
}
806867

807-
kfree_skb(qdisc->skb_bad_txq);
868+
skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
869+
__skb_unlink(skb, &qdisc->skb_bad_txq);
870+
kfree_skb_list(skb);
871+
}
872+
808873
qdisc_free(qdisc);
809874
}
810875
EXPORT_SYMBOL(qdisc_destroy);
@@ -1042,6 +1107,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
10421107
rcu_assign_pointer(dev_queue->qdisc, qdisc);
10431108
dev_queue->qdisc_sleeping = qdisc;
10441109
__skb_queue_head_init(&qdisc->gso_skb);
1110+
__skb_queue_head_init(&qdisc->skb_bad_txq);
10451111
}
10461112

10471113
void dev_init_scheduler(struct net_device *dev)

0 commit comments

Comments
 (0)