Skip to content

Commit b676ac4

Browse files
committed
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov: - Followup fixes for resilient spinlock (Kumar Kartikeya Dwivedi): - Make res_spin_lock test less verbose, since it was spamming BPF CI on failure, and make the check for AA deadlock stronger - Fix rebasing mistake and use architecture provided res_smp_cond_load_acquire - Convert BPF maps (queue_stack and ringbuf) to resilient spinlock to address long standing syzbot reports - Make sure that classic BPF load instruction from SKF_[NET|LL]_OFF offsets works when skb is fragmeneted (Willem de Bruijn) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf: Convert ringbuf map to rqspinlock bpf: Convert queue_stack map to rqspinlock bpf: Use architecture provided res_smp_cond_load_acquire selftests/bpf: Make res_spin_lock AA test condition stronger selftests/net: test sk_filter support for SKF_NET_OFF on frags bpf: support SKF_NET_OFF and SKF_LL_OFF on skb frags selftests/bpf: Make res_spin_lock test less verbose
2 parents ecd5d67 + a650d38 commit b676ac4

File tree

11 files changed

+354
-76
lines changed

11 files changed

+354
-76
lines changed

arch/arm64/include/asm/rqspinlock.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686

8787
#endif
8888

89-
#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
89+
#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
9090

9191
#include <asm-generic/rqspinlock.h>
9292

kernel/bpf/queue_stack_maps.c

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
#include <linux/slab.h>
1010
#include <linux/btf_ids.h>
1111
#include "percpu_freelist.h"
12+
#include <asm/rqspinlock.h>
1213

1314
#define QUEUE_STACK_CREATE_FLAG_MASK \
1415
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
1516

1617
struct bpf_queue_stack {
1718
struct bpf_map map;
18-
raw_spinlock_t lock;
19+
rqspinlock_t lock;
1920
u32 head, tail;
2021
u32 size; /* max_entries + 1 */
2122

@@ -78,7 +79,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
7879

7980
qs->size = size;
8081

81-
raw_spin_lock_init(&qs->lock);
82+
raw_res_spin_lock_init(&qs->lock);
8283

8384
return &qs->map;
8485
}
@@ -98,12 +99,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
9899
int err = 0;
99100
void *ptr;
100101

101-
if (in_nmi()) {
102-
if (!raw_spin_trylock_irqsave(&qs->lock, flags))
103-
return -EBUSY;
104-
} else {
105-
raw_spin_lock_irqsave(&qs->lock, flags);
106-
}
102+
if (raw_res_spin_lock_irqsave(&qs->lock, flags))
103+
return -EBUSY;
107104

108105
if (queue_stack_map_is_empty(qs)) {
109106
memset(value, 0, qs->map.value_size);
@@ -120,7 +117,7 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
120117
}
121118

122119
out:
123-
raw_spin_unlock_irqrestore(&qs->lock, flags);
120+
raw_res_spin_unlock_irqrestore(&qs->lock, flags);
124121
return err;
125122
}
126123

@@ -133,12 +130,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
133130
void *ptr;
134131
u32 index;
135132

136-
if (in_nmi()) {
137-
if (!raw_spin_trylock_irqsave(&qs->lock, flags))
138-
return -EBUSY;
139-
} else {
140-
raw_spin_lock_irqsave(&qs->lock, flags);
141-
}
133+
if (raw_res_spin_lock_irqsave(&qs->lock, flags))
134+
return -EBUSY;
142135

143136
if (queue_stack_map_is_empty(qs)) {
144137
memset(value, 0, qs->map.value_size);
@@ -157,7 +150,7 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
157150
qs->head = index;
158151

159152
out:
160-
raw_spin_unlock_irqrestore(&qs->lock, flags);
153+
raw_res_spin_unlock_irqrestore(&qs->lock, flags);
161154
return err;
162155
}
163156

@@ -203,12 +196,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
203196
if (flags & BPF_NOEXIST || flags > BPF_EXIST)
204197
return -EINVAL;
205198

206-
if (in_nmi()) {
207-
if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
208-
return -EBUSY;
209-
} else {
210-
raw_spin_lock_irqsave(&qs->lock, irq_flags);
211-
}
199+
if (raw_res_spin_lock_irqsave(&qs->lock, irq_flags))
200+
return -EBUSY;
212201

213202
if (queue_stack_map_is_full(qs)) {
214203
if (!replace) {
@@ -227,7 +216,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
227216
qs->head = 0;
228217

229218
out:
230-
raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
219+
raw_res_spin_unlock_irqrestore(&qs->lock, irq_flags);
231220
return err;
232221
}
233222

kernel/bpf/ringbuf.c

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/kmemleak.h>
1212
#include <uapi/linux/btf.h>
1313
#include <linux/btf_ids.h>
14+
#include <asm/rqspinlock.h>
1415

1516
#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
1617

@@ -29,7 +30,7 @@ struct bpf_ringbuf {
2930
u64 mask;
3031
struct page **pages;
3132
int nr_pages;
32-
raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
33+
rqspinlock_t spinlock ____cacheline_aligned_in_smp;
3334
/* For user-space producer ring buffers, an atomic_t busy bit is used
3435
* to synchronize access to the ring buffers in the kernel, rather than
3536
* the spinlock that is used for kernel-producer ring buffers. This is
@@ -173,7 +174,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
173174
if (!rb)
174175
return NULL;
175176

176-
raw_spin_lock_init(&rb->spinlock);
177+
raw_res_spin_lock_init(&rb->spinlock);
177178
atomic_set(&rb->busy, 0);
178179
init_waitqueue_head(&rb->waitq);
179180
init_irq_work(&rb->work, bpf_ringbuf_notify);
@@ -416,12 +417,8 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
416417

417418
cons_pos = smp_load_acquire(&rb->consumer_pos);
418419

419-
if (in_nmi()) {
420-
if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
421-
return NULL;
422-
} else {
423-
raw_spin_lock_irqsave(&rb->spinlock, flags);
424-
}
420+
if (raw_res_spin_lock_irqsave(&rb->spinlock, flags))
421+
return NULL;
425422

426423
pend_pos = rb->pending_pos;
427424
prod_pos = rb->producer_pos;
@@ -446,7 +443,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
446443
*/
447444
if (new_prod_pos - cons_pos > rb->mask ||
448445
new_prod_pos - pend_pos > rb->mask) {
449-
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
446+
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
450447
return NULL;
451448
}
452449

@@ -458,7 +455,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
458455
/* pairs with consumer's smp_load_acquire() */
459456
smp_store_release(&rb->producer_pos, new_prod_pos);
460457

461-
raw_spin_unlock_irqrestore(&rb->spinlock, flags);
458+
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
462459

463460
return (void *)hdr + BPF_RINGBUF_HDR_SZ;
464461
}

kernel/bpf/rqspinlock.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
253253
})
254254
#else
255255
#define RES_CHECK_TIMEOUT(ts, ret, mask) \
256-
({ (ret) = check_timeout(&(ts)); })
256+
({ (ret) = check_timeout((lock), (mask), &(ts)); })
257257
#endif
258258

259259
/*

net/core/filter.c

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -218,24 +218,36 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
218218
return 0;
219219
}
220220

221+
static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset)
222+
{
223+
if (likely(offset >= 0))
224+
return offset;
225+
226+
if (offset >= SKF_NET_OFF)
227+
return offset - SKF_NET_OFF + skb_network_offset(skb);
228+
229+
if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb))
230+
return offset - SKF_LL_OFF + skb_mac_offset(skb);
231+
232+
return INT_MIN;
233+
}
234+
221235
BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
222236
data, int, headlen, int, offset)
223237
{
224-
u8 tmp, *ptr;
238+
u8 tmp;
225239
const int len = sizeof(tmp);
226240

227-
if (offset >= 0) {
228-
if (headlen - offset >= len)
229-
return *(u8 *)(data + offset);
230-
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
231-
return tmp;
232-
} else {
233-
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
234-
if (likely(ptr))
235-
return *(u8 *)ptr;
236-
}
241+
offset = bpf_skb_load_helper_convert_offset(skb, offset);
242+
if (offset == INT_MIN)
243+
return -EFAULT;
237244

238-
return -EFAULT;
245+
if (headlen - offset >= len)
246+
return *(u8 *)(data + offset);
247+
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
248+
return tmp;
249+
else
250+
return -EFAULT;
239251
}
240252

241253
BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
@@ -248,21 +260,19 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
248260
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
249261
data, int, headlen, int, offset)
250262
{
251-
__be16 tmp, *ptr;
263+
__be16 tmp;
252264
const int len = sizeof(tmp);
253265

254-
if (offset >= 0) {
255-
if (headlen - offset >= len)
256-
return get_unaligned_be16(data + offset);
257-
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
258-
return be16_to_cpu(tmp);
259-
} else {
260-
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
261-
if (likely(ptr))
262-
return get_unaligned_be16(ptr);
263-
}
266+
offset = bpf_skb_load_helper_convert_offset(skb, offset);
267+
if (offset == INT_MIN)
268+
return -EFAULT;
264269

265-
return -EFAULT;
270+
if (headlen - offset >= len)
271+
return get_unaligned_be16(data + offset);
272+
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
273+
return be16_to_cpu(tmp);
274+
else
275+
return -EFAULT;
266276
}
267277

268278
BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
@@ -275,21 +285,19 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
275285
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
276286
data, int, headlen, int, offset)
277287
{
278-
__be32 tmp, *ptr;
288+
__be32 tmp;
279289
const int len = sizeof(tmp);
280290

281-
if (likely(offset >= 0)) {
282-
if (headlen - offset >= len)
283-
return get_unaligned_be32(data + offset);
284-
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
285-
return be32_to_cpu(tmp);
286-
} else {
287-
ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
288-
if (likely(ptr))
289-
return get_unaligned_be32(ptr);
290-
}
291+
offset = bpf_skb_load_helper_convert_offset(skb, offset);
292+
if (offset == INT_MIN)
293+
return -EFAULT;
291294

292-
return -EFAULT;
295+
if (headlen - offset >= len)
296+
return get_unaligned_be32(data + offset);
297+
if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
298+
return be32_to_cpu(tmp);
299+
else
300+
return -EFAULT;
293301
}
294302

295303
BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,

tools/testing/selftests/bpf/prog_tests/res_spin_lock.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,11 @@ static void *spin_lock_thread(void *arg)
2525

2626
while (!READ_ONCE(skip)) {
2727
err = bpf_prog_test_run_opts(prog_fd, &topts);
28-
ASSERT_OK(err, "test_run");
29-
ASSERT_OK(topts.retval, "test_run retval");
28+
if (err || topts.retval) {
29+
ASSERT_OK(err, "test_run");
30+
ASSERT_OK(topts.retval, "test_run retval");
31+
break;
32+
}
3033
}
3134
pthread_exit(arg);
3235
}

tools/testing/selftests/bpf/progs/res_spin_lock.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,14 @@ int res_spin_lock_test(struct __sk_buff *ctx)
3838
r = bpf_res_spin_lock(&elem1->lock);
3939
if (r)
4040
return r;
41-
if (!bpf_res_spin_lock(&elem2->lock)) {
41+
r = bpf_res_spin_lock(&elem2->lock);
42+
if (!r) {
4243
bpf_res_spin_unlock(&elem2->lock);
4344
bpf_res_spin_unlock(&elem1->lock);
4445
return -1;
4546
}
4647
bpf_res_spin_unlock(&elem1->lock);
47-
return 0;
48+
return r != -EDEADLK;
4849
}
4950

5051
SEC("tc")
@@ -124,12 +125,15 @@ int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx)
124125
/* Trigger AA, after exhausting entries in the held lock table. This
125126
* time, only the timeout can save us, as AA detection won't succeed.
126127
*/
127-
if (!bpf_res_spin_lock(locks[34])) {
128+
ret = bpf_res_spin_lock(locks[34]);
129+
if (!ret) {
128130
bpf_res_spin_unlock(locks[34]);
129131
ret = 1;
130132
goto end;
131133
}
132134

135+
ret = ret != -ETIMEDOUT ? 2 : 0;
136+
133137
end:
134138
for (i = i - 1; i >= 0; i--)
135139
bpf_res_spin_unlock(locks[i]);

tools/testing/selftests/net/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ scm_rights
3939
sk_bind_sendto_listen
4040
sk_connect_zero_addr
4141
sk_so_peek_off
42+
skf_net_off
4243
socket
4344
so_incoming_cpu
4445
so_netns_cookie

tools/testing/selftests/net/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh
106106
TEST_PROGS += busy_poll_test.sh
107107
TEST_GEN_PROGS += proc_net_pktgen
108108
TEST_PROGS += lwt_dst_cache_ref_loop.sh
109+
TEST_PROGS += skf_net_off.sh
110+
TEST_GEN_FILES += skf_net_off
109111

110112
# YNL files, must be before "include ..lib.mk"
111113
YNL_GEN_FILES := busy_poller netlink-dumps

0 commit comments

Comments
 (0)