Skip to content

Commit a650d38

Browse files
kkdwivediAlexei Starovoitov
authored andcommitted
bpf: Convert ringbuf map to rqspinlock
Convert the raw spinlock used by BPF ringbuf to rqspinlock. Currently, we have an open syzbot report of a potential deadlock. In addition, the ringbuf can fail to reserve spuriously under contention from NMI context. It is potentially attractive to enable unconstrained usage (incl. NMIs) while ensuring no deadlocks manifest at runtime, perform the conversion to rqspinlock to achieve this. This change was benchmarked for BPF ringbuf's multi-producer contention case on an Intel Sapphire Rapids server, with hyperthreading disabled and performance governor turned on. 5 warm up runs were done for each case before obtaining the results. Before (raw_spinlock_t): Ringbuf, multi-producer contention ================================== rb-libbpf nr_prod 1 11.440 ± 0.019M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 2 2.706 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 3 3.130 ± 0.004M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 4 2.472 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 8 2.352 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 12 2.813 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 16 1.988 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 20 2.245 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 24 2.148 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 28 2.190 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 32 2.490 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 36 2.180 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 40 2.201 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 44 2.226 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 48 2.164 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 52 1.874 ± 0.001M/s (drops 0.000 ± 0.000M/s) After (rqspinlock_t): Ringbuf, multi-producer contention ================================== rb-libbpf nr_prod 1 11.078 ± 0.019M/s (drops 0.000 ± 0.000M/s) (-3.16%) rb-libbpf nr_prod 2 2.801 ± 0.014M/s (drops 0.000 ± 0.000M/s) (3.51%) rb-libbpf nr_prod 3 3.454 ± 0.005M/s (drops 0.000 ± 0.000M/s) (10.35%) rb-libbpf nr_prod 4 2.567 ± 0.002M/s (drops 0.000 ± 0.000M/s) (3.84%) rb-libbpf nr_prod 8 2.468 ± 0.001M/s (drops 0.000 ± 0.000M/s) (4.93%) rb-libbpf nr_prod 12 2.510 ± 0.001M/s (drops 0.000 ± 0.000M/s) (-10.77%) rb-libbpf nr_prod 16 2.075 ± 0.001M/s (drops 0.000 ± 0.000M/s) (4.38%) rb-libbpf nr_prod 20 2.640 ± 0.001M/s (drops 0.000 ± 0.000M/s) (17.59%) rb-libbpf nr_prod 24 2.092 ± 0.001M/s (drops 0.000 ± 0.000M/s) (-2.61%) rb-libbpf nr_prod 28 2.426 ± 0.005M/s (drops 0.000 ± 0.000M/s) (10.78%) rb-libbpf nr_prod 32 2.331 ± 0.004M/s (drops 0.000 ± 0.000M/s) (-6.39%) rb-libbpf nr_prod 36 2.306 ± 0.003M/s (drops 0.000 ± 0.000M/s) (5.78%) rb-libbpf nr_prod 40 2.178 ± 0.002M/s (drops 0.000 ± 0.000M/s) (-1.04%) rb-libbpf nr_prod 44 2.293 ± 0.001M/s (drops 0.000 ± 0.000M/s) (3.01%) rb-libbpf nr_prod 48 2.022 ± 0.001M/s (drops 0.000 ± 0.000M/s) (-6.56%) rb-libbpf nr_prod 52 1.809 ± 0.001M/s (drops 0.000 ± 0.000M/s) (-3.47%) There's a fair amount of noise in the benchmark, with numbers on reruns going up and down by 10%, so all changes are in the range of this disturbance, and we see no major regressions. Reported-by: [email protected] Closes: https://lore.kernel.org/all/[email protected] Signed-off-by: Kumar Kartikeya Dwivedi <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 2f41503 commit a650d38

File tree

1 file changed

+7
-10
lines changed

1 file changed

+7
-10
lines changed

kernel/bpf/ringbuf.c

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/kmemleak.h>
1212
#include <uapi/linux/btf.h>
1313
#include <linux/btf_ids.h>
14+
#include <asm/rqspinlock.h>
1415

1516
#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
1617

@@ -29,7 +30,7 @@ struct bpf_ringbuf {
2930
u64 mask;
3031
struct page **pages;
3132
int nr_pages;
32-
raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
33+
rqspinlock_t spinlock ____cacheline_aligned_in_smp;
3334
/* For user-space producer ring buffers, an atomic_t busy bit is used
3435
* to synchronize access to the ring buffers in the kernel, rather than
3536
* the spinlock that is used for kernel-producer ring buffers. This is
@@ -173,7 +174,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
173174
if (!rb)
174175
return NULL;
175176

176-
raw_spin_lock_init(&rb->spinlock);
177+
raw_res_spin_lock_init(&rb->spinlock);
177178
atomic_set(&rb->busy, 0);
178179
init_waitqueue_head(&rb->waitq);
179180
init_irq_work(&rb->work, bpf_ringbuf_notify);
@@ -416,12 +417,8 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
416417

417418
cons_pos = smp_load_acquire(&rb->consumer_pos);
418419

419-
if (in_nmi()) {
420-
if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
421-
return NULL;
422-
} else {
423-
raw_spin_lock_irqsave(&rb->spinlock, flags);
424-
}
420+
if (raw_res_spin_lock_irqsave(&rb->spinlock, flags))
421+
return NULL;
425422

426423
pend_pos = rb->pending_pos;
427424
prod_pos = rb->producer_pos;
@@ -446,7 +443,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
446443
*/
447444
if (new_prod_pos - cons_pos > rb->mask ||
448445
new_prod_pos - pend_pos > rb->mask) {
449-
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
446+
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
450447
return NULL;
451448
}
452449

@@ -458,7 +455,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
458455
/* pairs with consumer's smp_load_acquire() */
459456
smp_store_release(&rb->producer_pos, new_prod_pos);
460457

461-
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
458+
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
462459

463460
return (void *)hdr + BPF_RINGBUF_HDR_SZ;
464461
}

0 commit comments

Comments
 (0)