Skip to content

Commit 5bebd3e

Browse files
author
Alexei Starovoitov
committed
Merge branch 'remove-unnecessary-synchronizations-in-cpumap'
Hou Tao says: ==================== Remove unnecessary synchronizations in cpumap From: Hou Tao <[email protected]> Hi, This is the formal patchset to remove unnecessary synchronizations in cpu-map after address comments and collect Rvb tags from Toke Høiland-Jørgensen (Big thanks to Toke). Patch #1 removes the unnecessary rcu_barrier() when freeing bpf_cpu_map_entry and replaces it by queue_rcu_work(). Patch #2 removes the unnecessary call_rcu() and queue_work() when destroying cpu-map and does the freeing directly. Test the patchset by using xdp_redirect_cpu and virtio-net. Both xdp-mode and skb-mode have been exercised and no issues were reported. As ususal, comments and suggestions are always welcome. Change Log: v1: * address comments from Toke Høiland-Jørgensen * add Rvb tags from Toke Høiland-Jørgensen * update outdated comment in cpu_map_delete_elem() RFC: https://lore.kernel.org/bpf/[email protected] ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 0a55264 + c2e42dd commit 5bebd3e

File tree

1 file changed

+35
-78
lines changed

1 file changed

+35
-78
lines changed

kernel/bpf/cpumap.c

Lines changed: 35 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,8 @@ struct bpf_cpu_map_entry {
6868
struct bpf_cpumap_val value;
6969
struct bpf_prog *prog;
7070

71-
atomic_t refcnt; /* Control when this struct can be free'ed */
72-
struct rcu_head rcu;
73-
74-
struct work_struct kthread_stop_wq;
7571
struct completion kthread_running;
72+
struct rcu_work free_work;
7673
};
7774

7875
struct bpf_cpu_map {
@@ -117,11 +114,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
117114
return &cmap->map;
118115
}
119116

120-
static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
121-
{
122-
atomic_inc(&rcpu->refcnt);
123-
}
124-
125117
static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
126118
{
127119
/* The tear-down procedure should have made sure that queue is
@@ -142,35 +134,6 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
142134
}
143135
}
144136

145-
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
146-
{
147-
if (atomic_dec_and_test(&rcpu->refcnt)) {
148-
if (rcpu->prog)
149-
bpf_prog_put(rcpu->prog);
150-
/* The queue should be empty at this point */
151-
__cpu_map_ring_cleanup(rcpu->queue);
152-
ptr_ring_cleanup(rcpu->queue, NULL);
153-
kfree(rcpu->queue);
154-
kfree(rcpu);
155-
}
156-
}
157-
158-
/* called from workqueue, to workaround syscall using preempt_disable */
159-
static void cpu_map_kthread_stop(struct work_struct *work)
160-
{
161-
struct bpf_cpu_map_entry *rcpu;
162-
163-
rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
164-
165-
/* Wait for flush in __cpu_map_entry_free(), via full RCU barrier,
166-
* as it waits until all in-flight call_rcu() callbacks complete.
167-
*/
168-
rcu_barrier();
169-
170-
/* kthread_stop will wake_up_process and wait for it to complete */
171-
kthread_stop(rcpu->kthread);
172-
}
173-
174137
static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
175138
struct list_head *listp,
176139
struct xdp_cpumap_stats *stats)
@@ -395,7 +358,6 @@ static int cpu_map_kthread_run(void *data)
395358
}
396359
__set_current_state(TASK_RUNNING);
397360

398-
put_cpu_map_entry(rcpu);
399361
return 0;
400362
}
401363

@@ -472,9 +434,6 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
472434
if (IS_ERR(rcpu->kthread))
473435
goto free_prog;
474436

475-
get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */
476-
get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */
477-
478437
/* Make sure kthread runs on a single CPU */
479438
kthread_bind(rcpu->kthread, cpu);
480439
wake_up_process(rcpu->kthread);
@@ -501,40 +460,40 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
501460
return NULL;
502461
}
503462

504-
static void __cpu_map_entry_free(struct rcu_head *rcu)
463+
static void __cpu_map_entry_free(struct work_struct *work)
505464
{
506465
struct bpf_cpu_map_entry *rcpu;
507466

508467
/* This cpu_map_entry have been disconnected from map and one
509-
* RCU grace-period have elapsed. Thus, XDP cannot queue any
468+
* RCU grace-period have elapsed. Thus, XDP cannot queue any
510469
* new packets and cannot change/set flush_needed that can
511470
* find this entry.
512471
*/
513-
rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu);
472+
rcpu = container_of(to_rcu_work(work), struct bpf_cpu_map_entry, free_work);
514473

474+
/* kthread_stop will wake_up_process and wait for it to complete.
475+
* cpu_map_kthread_run() makes sure the pointer ring is empty
476+
* before exiting.
477+
*/
478+
kthread_stop(rcpu->kthread);
479+
480+
if (rcpu->prog)
481+
bpf_prog_put(rcpu->prog);
482+
/* The queue should be empty at this point */
483+
__cpu_map_ring_cleanup(rcpu->queue);
484+
ptr_ring_cleanup(rcpu->queue, NULL);
485+
kfree(rcpu->queue);
515486
free_percpu(rcpu->bulkq);
516-
/* Cannot kthread_stop() here, last put free rcpu resources */
517-
put_cpu_map_entry(rcpu);
487+
kfree(rcpu);
518488
}
519489

520-
/* After xchg pointer to bpf_cpu_map_entry, use the call_rcu() to
521-
* ensure any driver rcu critical sections have completed, but this
522-
* does not guarantee a flush has happened yet. Because driver side
523-
* rcu_read_lock/unlock only protects the running XDP program. The
524-
* atomic xchg and NULL-ptr check in __cpu_map_flush() makes sure a
525-
* pending flush op doesn't fail.
526-
*
527-
* The bpf_cpu_map_entry is still used by the kthread, and there can
528-
* still be pending packets (in queue and percpu bulkq). A refcnt
529-
* makes sure to last user (kthread_stop vs. call_rcu) free memory
530-
* resources.
531-
*
532-
* The rcu callback __cpu_map_entry_free flush remaining packets in
533-
* percpu bulkq to queue. Due to caller map_delete_elem() disable
534-
* preemption, cannot call kthread_stop() to make sure queue is empty.
535-
* Instead a work_queue is started for stopping kthread,
536-
* cpu_map_kthread_stop, which waits for an RCU grace period before
537-
* stopping kthread, emptying the queue.
490+
/* After the xchg of the bpf_cpu_map_entry pointer, we need to make sure the old
491+
* entry is no longer in use before freeing. We use queue_rcu_work() to call
492+
* __cpu_map_entry_free() in a separate workqueue after waiting for an RCU grace
493+
* period. This means that (a) all pending enqueue and flush operations have
494+
* completed (because of the RCU callback), and (b) we are in a workqueue
495+
* context where we can stop the kthread and wait for it to exit before freeing
496+
* everything.
538497
*/
539498
static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
540499
u32 key_cpu, struct bpf_cpu_map_entry *rcpu)
@@ -543,9 +502,8 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
543502

544503
old_rcpu = unrcu_pointer(xchg(&cmap->cpu_map[key_cpu], RCU_INITIALIZER(rcpu)));
545504
if (old_rcpu) {
546-
call_rcu(&old_rcpu->rcu, __cpu_map_entry_free);
547-
INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop);
548-
schedule_work(&old_rcpu->kthread_stop_wq);
505+
INIT_RCU_WORK(&old_rcpu->free_work, __cpu_map_entry_free);
506+
queue_rcu_work(system_wq, &old_rcpu->free_work);
549507
}
550508
}
551509

@@ -557,7 +515,7 @@ static long cpu_map_delete_elem(struct bpf_map *map, void *key)
557515
if (key_cpu >= map->max_entries)
558516
return -EINVAL;
559517

560-
/* notice caller map_delete_elem() use preempt_disable() */
518+
/* notice caller map_delete_elem() uses rcu_read_lock() */
561519
__cpu_map_entry_replace(cmap, key_cpu, NULL);
562520
return 0;
563521
}
@@ -608,16 +566,15 @@ static void cpu_map_free(struct bpf_map *map)
608566
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
609567
* so the bpf programs (can be more than one that used this map) were
610568
* disconnected from events. Wait for outstanding critical sections in
611-
* these programs to complete. The rcu critical section only guarantees
612-
* no further "XDP/bpf-side" reads against bpf_cpu_map->cpu_map.
613-
* It does __not__ ensure pending flush operations (if any) are
614-
* complete.
569+
* these programs to complete. synchronize_rcu() below not only
570+
* guarantees no further "XDP/bpf-side" reads against
571+
* bpf_cpu_map->cpu_map, but also ensure pending flush operations
572+
* (if any) are completed.
615573
*/
616-
617574
synchronize_rcu();
618575

619-
/* For cpu_map the remote CPUs can still be using the entries
620-
* (struct bpf_cpu_map_entry).
576+
/* The only possible user of bpf_cpu_map_entry is
577+
* cpu_map_kthread_run().
621578
*/
622579
for (i = 0; i < cmap->map.max_entries; i++) {
623580
struct bpf_cpu_map_entry *rcpu;
@@ -626,8 +583,8 @@ static void cpu_map_free(struct bpf_map *map)
626583
if (!rcpu)
627584
continue;
628585

629-
/* bq flush and cleanup happens after RCU grace-period */
630-
__cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */
586+
/* Stop kthread and cleanup entry directly */
587+
__cpu_map_entry_free(&rcpu->free_work.work);
631588
}
632589
bpf_map_area_free(cmap->cpu_map);
633590
bpf_map_area_free(cmap);

0 commit comments

Comments
 (0)