Skip to content

Commit e722db8

Browse files
Sebastian Andrzej Siewiordavem330
authored andcommitted
net: dev: Make rps_lock() disable interrupts.
Disabling interrupts and in the RPS case locking input_pkt_queue is split into local_irq_disable() and optional spin_lock(). This breaks on PREEMPT_RT because the spinlock_t typed lock can not be acquired with disabled interrupts. The sections in which the lock is acquired is usually short in a sense that it is not causing long und unbounded latiencies. One exception is the skb_flow_limit() invocation which may invoke a BPF program (and may require sleeping locks). By moving local_irq_disable() + spin_lock() into rps_lock(), we can keep interrupts disabled on !PREEMPT_RT and enabled on PREEMPT_RT kernels. Without RPS on a PREEMPT_RT kernel, the needed synchronisation happens as part of local_bh_disable() on the local CPU. ____napi_schedule() is only invoked if sd is from the local CPU. Replace it with __napi_schedule_irqoff() which already disables interrupts on PREEMPT_RT as needed. Move this call to rps_ipi_queued() and rename the function to napi_schedule_rps as suggested by Jakub. Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Reviewed-by: Jakub Kicinski <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent baebdf4 commit e722db8

File tree

1 file changed

+42
-34
lines changed

1 file changed

+42
-34
lines changed

net/core/dev.c

Lines changed: 42 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -216,18 +216,38 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
216216
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
217217
}
218218

219-
static inline void rps_lock(struct softnet_data *sd)
219+
static inline void rps_lock_irqsave(struct softnet_data *sd,
220+
unsigned long *flags)
220221
{
221-
#ifdef CONFIG_RPS
222-
spin_lock(&sd->input_pkt_queue.lock);
223-
#endif
222+
if (IS_ENABLED(CONFIG_RPS))
223+
spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
224+
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
225+
local_irq_save(*flags);
224226
}
225227

226-
static inline void rps_unlock(struct softnet_data *sd)
228+
static inline void rps_lock_irq_disable(struct softnet_data *sd)
227229
{
228-
#ifdef CONFIG_RPS
229-
spin_unlock(&sd->input_pkt_queue.lock);
230-
#endif
230+
if (IS_ENABLED(CONFIG_RPS))
231+
spin_lock_irq(&sd->input_pkt_queue.lock);
232+
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
233+
local_irq_disable();
234+
}
235+
236+
static inline void rps_unlock_irq_restore(struct softnet_data *sd,
237+
unsigned long *flags)
238+
{
239+
if (IS_ENABLED(CONFIG_RPS))
240+
spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
241+
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
242+
local_irq_restore(*flags);
243+
}
244+
245+
static inline void rps_unlock_irq_enable(struct softnet_data *sd)
246+
{
247+
if (IS_ENABLED(CONFIG_RPS))
248+
spin_unlock_irq(&sd->input_pkt_queue.lock);
249+
else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
250+
local_irq_enable();
231251
}
232252

233253
static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
@@ -4456,11 +4476,11 @@ static void rps_trigger_softirq(void *data)
44564476
* If yes, queue it to our IPI list and return 1
44574477
* If no, return 0
44584478
*/
4459-
static int rps_ipi_queued(struct softnet_data *sd)
4479+
static int napi_schedule_rps(struct softnet_data *sd)
44604480
{
4461-
#ifdef CONFIG_RPS
44624481
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
44634482

4483+
#ifdef CONFIG_RPS
44644484
if (sd != mysd) {
44654485
sd->rps_ipi_next = mysd->rps_ipi_list;
44664486
mysd->rps_ipi_list = sd;
@@ -4469,6 +4489,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
44694489
return 1;
44704490
}
44714491
#endif /* CONFIG_RPS */
4492+
__napi_schedule_irqoff(&mysd->backlog);
44724493
return 0;
44734494
}
44744495

@@ -4525,9 +4546,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
45254546

45264547
sd = &per_cpu(softnet_data, cpu);
45274548

4528-
local_irq_save(flags);
4529-
4530-
rps_lock(sd);
4549+
rps_lock_irqsave(sd, &flags);
45314550
if (!netif_running(skb->dev))
45324551
goto drop;
45334552
qlen = skb_queue_len(&sd->input_pkt_queue);
@@ -4536,26 +4555,21 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
45364555
enqueue:
45374556
__skb_queue_tail(&sd->input_pkt_queue, skb);
45384557
input_queue_tail_incr_save(sd, qtail);
4539-
rps_unlock(sd);
4540-
local_irq_restore(flags);
4558+
rps_unlock_irq_restore(sd, &flags);
45414559
return NET_RX_SUCCESS;
45424560
}
45434561

45444562
/* Schedule NAPI for backlog device
45454563
* We can use non atomic operation since we own the queue lock
45464564
*/
4547-
if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
4548-
if (!rps_ipi_queued(sd))
4549-
____napi_schedule(sd, &sd->backlog);
4550-
}
4565+
if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
4566+
napi_schedule_rps(sd);
45514567
goto enqueue;
45524568
}
45534569

45544570
drop:
45554571
sd->dropped++;
4556-
rps_unlock(sd);
4557-
4558-
local_irq_restore(flags);
4572+
rps_unlock_irq_restore(sd, &flags);
45594573

45604574
atomic_long_inc(&skb->dev->rx_dropped);
45614575
kfree_skb(skb);
@@ -5638,17 +5652,15 @@ static void flush_backlog(struct work_struct *work)
56385652
local_bh_disable();
56395653
sd = this_cpu_ptr(&softnet_data);
56405654

5641-
local_irq_disable();
5642-
rps_lock(sd);
5655+
rps_lock_irq_disable(sd);
56435656
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
56445657
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
56455658
__skb_unlink(skb, &sd->input_pkt_queue);
56465659
dev_kfree_skb_irq(skb);
56475660
input_queue_head_incr(sd);
56485661
}
56495662
}
5650-
rps_unlock(sd);
5651-
local_irq_enable();
5663+
rps_unlock_irq_enable(sd);
56525664

56535665
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
56545666
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
@@ -5666,16 +5678,14 @@ static bool flush_required(int cpu)
56665678
struct softnet_data *sd = &per_cpu(softnet_data, cpu);
56675679
bool do_flush;
56685680

5669-
local_irq_disable();
5670-
rps_lock(sd);
5681+
rps_lock_irq_disable(sd);
56715682

56725683
/* as insertion into process_queue happens with the rps lock held,
56735684
* process_queue access may race only with dequeue
56745685
*/
56755686
do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
56765687
!skb_queue_empty_lockless(&sd->process_queue);
5677-
rps_unlock(sd);
5678-
local_irq_enable();
5688+
rps_unlock_irq_enable(sd);
56795689

56805690
return do_flush;
56815691
#endif
@@ -5790,8 +5800,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
57905800

57915801
}
57925802

5793-
local_irq_disable();
5794-
rps_lock(sd);
5803+
rps_lock_irq_disable(sd);
57955804
if (skb_queue_empty(&sd->input_pkt_queue)) {
57965805
/*
57975806
* Inline a custom version of __napi_complete().
@@ -5807,8 +5816,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
58075816
skb_queue_splice_tail_init(&sd->input_pkt_queue,
58085817
&sd->process_queue);
58095818
}
5810-
rps_unlock(sd);
5811-
local_irq_enable();
5819+
rps_unlock_irq_enable(sd);
58125820
}
58135821

58145822
return work;

0 commit comments

Comments
 (0)