Skip to content

Commit 6b3ba91

Browse files
jrfastabdavem330
authored andcommitted
net: sched: allow qdiscs to handle locking
This patch adds a flag for queueing disciplines to indicate the stack does not need to use the qdisc lock to protect operations. This can be used to build lockless scheduling algorithms and improving performance. The flag is checked in the tx path and the qdisc lock is only taken if it is not set. For now use a conditional if statement. Later we could be more aggressive if it proves worthwhile and use a static key or wrap this in a likely(). Also the lockless case drops the TCQ_F_CAN_BYPASS logic. The reason for this is synchronizing a qlen counter across threads proves to cost more than doing the enqueue/dequeue operations when tested with pktgen. Signed-off-by: John Fastabend <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6c14818 commit 6b3ba91

File tree

3 files changed

+43
-14
lines changed

3 files changed

+43
-14
lines changed

include/net/sch_generic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ struct Qdisc {
7171
* qdisc_tree_decrease_qlen() should stop.
7272
*/
7373
#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */
74+
#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */
7475
u32 limit;
7576
const struct Qdisc_ops *ops;
7677
struct qdisc_size_table __rcu *stab;

net/core/dev.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3162,6 +3162,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
31623162
int rc;
31633163

31643164
qdisc_calculate_pkt_len(skb, q);
3165+
3166+
if (q->flags & TCQ_F_NOLOCK) {
3167+
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3168+
__qdisc_drop(skb, &to_free);
3169+
rc = NET_XMIT_DROP;
3170+
} else {
3171+
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
3172+
__qdisc_run(q);
3173+
}
3174+
3175+
if (unlikely(to_free))
3176+
kfree_skb_list(to_free);
3177+
return rc;
3178+
}
3179+
31653180
/*
31663181
* Heuristic to force contended enqueues to serialize on a
31673182
* separate lock before trying to get qdisc main lock.
@@ -4144,19 +4159,22 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
41444159

41454160
while (head) {
41464161
struct Qdisc *q = head;
4147-
spinlock_t *root_lock;
4162+
spinlock_t *root_lock = NULL;
41484163

41494164
head = head->next_sched;
41504165

4151-
root_lock = qdisc_lock(q);
4152-
spin_lock(root_lock);
4166+
if (!(q->flags & TCQ_F_NOLOCK)) {
4167+
root_lock = qdisc_lock(q);
4168+
spin_lock(root_lock);
4169+
}
41534170
/* We need to make sure head->next_sched is read
41544171
* before clearing __QDISC_STATE_SCHED
41554172
*/
41564173
smp_mb__before_atomic();
41574174
clear_bit(__QDISC_STATE_SCHED, &q->state);
41584175
qdisc_run(q);
4159-
spin_unlock(root_lock);
4176+
if (root_lock)
4177+
spin_unlock(root_lock);
41604178
}
41614179
}
41624180
}

net/sched/sch_generic.c

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
174174
int ret = NETDEV_TX_BUSY;
175175

176176
/* And release qdisc */
177-
spin_unlock(root_lock);
177+
if (root_lock)
178+
spin_unlock(root_lock);
178179

179180
/* Note that we validate skb (GSO, checksum, ...) outside of locks */
180181
if (validate)
@@ -187,10 +188,13 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
187188

188189
HARD_TX_UNLOCK(dev, txq);
189190
} else {
190-
spin_lock(root_lock);
191+
if (root_lock)
192+
spin_lock(root_lock);
191193
return qdisc_qlen(q);
192194
}
193-
spin_lock(root_lock);
195+
196+
if (root_lock)
197+
spin_lock(root_lock);
194198

195199
if (dev_xmit_complete(ret)) {
196200
/* Driver sent out skb successfully or skb was consumed */
@@ -231,9 +235,9 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
231235
*/
232236
static inline int qdisc_restart(struct Qdisc *q, int *packets)
233237
{
238+
spinlock_t *root_lock = NULL;
234239
struct netdev_queue *txq;
235240
struct net_device *dev;
236-
spinlock_t *root_lock;
237241
struct sk_buff *skb;
238242
bool validate;
239243

@@ -242,7 +246,9 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
242246
if (unlikely(!skb))
243247
return 0;
244248

245-
root_lock = qdisc_lock(q);
249+
if (!(q->flags & TCQ_F_NOLOCK))
250+
root_lock = qdisc_lock(q);
251+
246252
dev = qdisc_dev(q);
247253
txq = skb_get_tx_queue(dev, skb);
248254

@@ -880,14 +886,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
880886

881887
dev_queue = netdev_get_tx_queue(dev, i);
882888
q = dev_queue->qdisc_sleeping;
883-
root_lock = qdisc_lock(q);
884889

885-
spin_lock_bh(root_lock);
890+
if (q->flags & TCQ_F_NOLOCK) {
891+
val = test_bit(__QDISC_STATE_SCHED, &q->state);
892+
} else {
893+
root_lock = qdisc_lock(q);
894+
spin_lock_bh(root_lock);
886895

887-
val = (qdisc_is_running(q) ||
888-
test_bit(__QDISC_STATE_SCHED, &q->state));
896+
val = (qdisc_is_running(q) ||
897+
test_bit(__QDISC_STATE_SCHED, &q->state));
889898

890-
spin_unlock_bh(root_lock);
899+
spin_unlock_bh(root_lock);
900+
}
891901

892902
if (val)
893903
return true;

0 commit comments

Comments
 (0)