Skip to content

Commit 37ccdf7

Browse files
author
Paolo Abeni
committed
Daniel Borkmann says: ==================== pull-request: bpf 2024-03-25 The following pull-request contains BPF updates for your *net* tree. We've added 17 non-merge commits during the last 12 day(s) which contain a total of 19 files changed, 184 insertions(+), 61 deletions(-). The main changes are: 1) Fix an arm64 BPF JIT bug in BPF_LDX_MEMSX implementation's offset handling found via test_bpf module, from Puranjay Mohan. 2) Various fixups to the BPF arena code in particular in the BPF verifier and around BPF selftests to match latest corresponding LLVM implementation, from Puranjay Mohan and Alexei Starovoitov. 3) Fix xsk to not assume that metadata is always requested in TX completion, from Stanislav Fomichev. 4) Fix riscv BPF JIT's kfunc parameter incompatibility between BPF and the riscv ABI which requires sign-extension on int/uint, from Pu Lehui. 5) Fix s390x BPF JIT's bpf_plt pointer arithmetic which triggered a crash when testing struct_ops, from Ilya Leoshkevich. 6) Fix libbpf's arena mmap handling which had incorrect u64-to-pointer cast on 32-bit architectures, from Andrii Nakryiko. 7) Fix libbpf to define MFD_CLOEXEC when not available, from Arnaldo Carvalho de Melo. 8) Fix arm64 BPF JIT implementation for 32bit unconditional bswap which resulted in an incorrect swap as indicated by test_bpf, from Artem Savkov. 9) Fix BPF man page build script to use silent mode, from Hangbin Liu. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: riscv, bpf: Fix kfunc parameters incompatibility between bpf and riscv abi bpf: verifier: reject addr_space_cast insn without arena selftests/bpf: verifier_arena: fix mmap address for arm64 bpf: verifier: fix addr_space_cast from as(1) to as(0) libbpf: Define MFD_CLOEXEC if not available arm64: bpf: fix 32bit unconditional bswap bpf, arm64: fix bug in BPF_LDX_MEMSX libbpf: fix u64-to-pointer cast on 32-bit arches s390/bpf: Fix bpf_plt pointer arithmetic xsk: Don't assume metadata is always requested in TX completion selftests/bpf: Add arena test case for 4Gbyte corner case selftests/bpf: Remove hard coded PAGE_SIZE macro. libbpf, selftests/bpf: Adjust libbpf, bpftool, selftests to match LLVM bpf: Clarify bpf_arena comments. MAINTAINERS: Update email address for Quentin Monnet scripts/bpf_doc: Use silent mode when exec make cmd bpf: Temporarily disable atomic operations in BPF arena ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
2 parents f142552 + 443574b commit 37ccdf7

File tree

19 files changed

+184
-61
lines changed

19 files changed

+184
-61
lines changed

.mailmap

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,8 @@ Prasad Sodagudi <[email protected]> <[email protected]>
497497
498498
499499
500-
500+
501+
501502
502503
503504

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3967,7 +3967,7 @@ F: kernel/bpf/bpf_lru*
39673967
F: kernel/bpf/cgroup.c
39683968

39693969
BPF [TOOLING] (bpftool)
3970-
M: Quentin Monnet <[email protected]>
3970+
M: Quentin Monnet <[email protected]>
39713971
39723972
S: Maintained
39733973
F: kernel/bpf/disasm.*

arch/arm64/net/bpf_jit_comp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
943943
emit(A64_UXTH(is64, dst, dst), ctx);
944944
break;
945945
case 32:
946-
emit(A64_REV32(is64, dst, dst), ctx);
946+
emit(A64_REV32(0, dst, dst), ctx);
947947
/* upper 32 bits already cleared */
948948
break;
949949
case 64:
@@ -1256,7 +1256,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
12561256
} else {
12571257
emit_a64_mov_i(1, tmp, off, ctx);
12581258
if (sign_extend)
1259-
emit(A64_LDRSW(dst, src_adj, off_adj), ctx);
1259+
emit(A64_LDRSW(dst, src, tmp), ctx);
12601260
else
12611261
emit(A64_LDR32(dst, src, tmp), ctx);
12621262
}

arch/riscv/net/bpf_jit_comp64.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
14631463
if (ret < 0)
14641464
return ret;
14651465

1466+
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1467+
const struct btf_func_model *fm;
1468+
int idx;
1469+
1470+
fm = bpf_jit_find_kfunc_model(ctx->prog, insn);
1471+
if (!fm)
1472+
return -EINVAL;
1473+
1474+
for (idx = 0; idx < fm->nr_args; idx++) {
1475+
u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx);
1476+
1477+
if (fm->arg_size[idx] == sizeof(int))
1478+
emit_sextw(reg, reg, ctx);
1479+
}
1480+
}
1481+
14661482
ret = emit_call(addr, fixed_addr, ctx);
14671483
if (ret)
14681484
return ret;

arch/s390/net/bpf_jit_comp.c

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size)
516516
* PLT for hotpatchable calls. The calling convention is the same as for the
517517
* ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
518518
*/
519-
extern const char bpf_plt[];
520-
extern const char bpf_plt_ret[];
521-
extern const char bpf_plt_target[];
522-
extern const char bpf_plt_end[];
523-
#define BPF_PLT_SIZE 32
519+
struct bpf_plt {
520+
char code[16];
521+
void *ret;
522+
void *target;
523+
} __packed;
524+
extern const struct bpf_plt bpf_plt;
524525
asm(
525526
".pushsection .rodata\n"
526527
" .balign 8\n"
@@ -531,15 +532,14 @@ asm(
531532
" .balign 8\n"
532533
"bpf_plt_ret: .quad 0\n"
533534
"bpf_plt_target: .quad 0\n"
534-
"bpf_plt_end:\n"
535535
" .popsection\n"
536536
);
537537

538-
static void bpf_jit_plt(void *plt, void *ret, void *target)
538+
static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
539539
{
540-
memcpy(plt, bpf_plt, BPF_PLT_SIZE);
541-
*(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
542-
*(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
540+
memcpy(plt, &bpf_plt, sizeof(*plt));
541+
plt->ret = ret;
542+
plt->target = target;
543543
}
544544

545545
/*
@@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
662662
jit->prg = ALIGN(jit->prg, 8);
663663
jit->prologue_plt = jit->prg;
664664
if (jit->prg_buf)
665-
bpf_jit_plt(jit->prg_buf + jit->prg,
665+
bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
666666
jit->prg_buf + jit->prologue_plt_ret, NULL);
667-
jit->prg += BPF_PLT_SIZE;
667+
jit->prg += sizeof(struct bpf_plt);
668668
}
669669

670670
static int get_probe_mem_regno(const u8 *insn)
@@ -2040,9 +2040,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
20402040
struct bpf_jit jit;
20412041
int pass;
20422042

2043-
if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
2044-
return orig_fp;
2045-
20462043
if (!fp->jit_requested)
20472044
return orig_fp;
20482045

@@ -2148,14 +2145,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
21482145
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
21492146
void *old_addr, void *new_addr)
21502147
{
2148+
struct bpf_plt expected_plt, current_plt, new_plt, *plt;
21512149
struct {
21522150
u16 opc;
21532151
s32 disp;
21542152
} __packed insn;
2155-
char expected_plt[BPF_PLT_SIZE];
2156-
char current_plt[BPF_PLT_SIZE];
2157-
char new_plt[BPF_PLT_SIZE];
2158-
char *plt;
21592153
char *ret;
21602154
int err;
21612155

@@ -2174,18 +2168,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
21742168
*/
21752169
} else {
21762170
/* Verify the PLT. */
2177-
plt = (char *)ip + (insn.disp << 1);
2178-
err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
2171+
plt = ip + (insn.disp << 1);
2172+
err = copy_from_kernel_nofault(&current_plt, plt,
2173+
sizeof(current_plt));
21792174
if (err < 0)
21802175
return err;
21812176
ret = (char *)ip + 6;
2182-
bpf_jit_plt(expected_plt, ret, old_addr);
2183-
if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
2177+
bpf_jit_plt(&expected_plt, ret, old_addr);
2178+
if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
21842179
return -EINVAL;
21852180
/* Adjust the call address. */
2186-
bpf_jit_plt(new_plt, ret, new_addr);
2187-
s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
2188-
new_plt + (bpf_plt_target - bpf_plt),
2181+
bpf_jit_plt(&new_plt, ret, new_addr);
2182+
s390_kernel_write(&plt->target, &new_plt.target,
21892183
sizeof(void *));
21902184
}
21912185

include/net/xdp_sock.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
188188
{
189189
if (!compl)
190190
return;
191+
if (!compl->tx_timestamp)
192+
return;
191193

192194
*compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
193195
}

kernel/bpf/arena.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
4040
#define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8)
41-
#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ)
41+
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
4242

4343
struct bpf_arena {
4444
struct bpf_map map;
@@ -110,7 +110,7 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
110110
return ERR_PTR(-EINVAL);
111111

112112
vm_range = (u64)attr->max_entries * PAGE_SIZE;
113-
if (vm_range > (1ull << 32))
113+
if (vm_range > SZ_4G)
114114
return ERR_PTR(-E2BIG);
115115

116116
if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32))
@@ -301,7 +301,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
301301

302302
if (pgoff)
303303
return -EINVAL;
304-
if (len > (1ull << 32))
304+
if (len > SZ_4G)
305305
return -E2BIG;
306306

307307
/* if user_vm_start was specified at arena creation time */
@@ -322,7 +322,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
322322
if (WARN_ON_ONCE(arena->user_vm_start))
323323
/* checks at map creation time should prevent this */
324324
return -EFAULT;
325-
return round_up(ret, 1ull << 32);
325+
return round_up(ret, SZ_4G);
326326
}
327327

328328
static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
@@ -346,7 +346,7 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
346346
return -EBUSY;
347347

348348
/* Earlier checks should prevent this */
349-
if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff))
349+
if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > SZ_4G || vma->vm_pgoff))
350350
return -EFAULT;
351351

352352
if (remember_vma(arena, vma))
@@ -420,7 +420,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
420420
if (uaddr & ~PAGE_MASK)
421421
return 0;
422422
pgoff = compute_pgoff(arena, uaddr);
423-
if (pgoff + page_cnt > page_cnt_max)
423+
if (pgoff > page_cnt_max - page_cnt)
424424
/* requested address will be outside of user VMA */
425425
return 0;
426426
}
@@ -447,7 +447,13 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
447447
goto out;
448448

449449
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
450-
/* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */
450+
/* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
451+
* will not overflow 32-bit. Lower 32-bit need to represent
452+
* contiguous user address range.
453+
* Map these pages at kern_vm_start base.
454+
* kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
455+
* lower 32-bit and it's ok.
456+
*/
451457
ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32,
452458
kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages);
453459
if (ret) {
@@ -510,6 +516,11 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
510516
if (!page)
511517
continue;
512518
if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
519+
/* Optimization for the common case of page_cnt==1:
520+
* If page wasn't mapped into some user vma there
521+
* is no need to call zap_pages which is slow. When
522+
* page_cnt is big it's faster to do the batched zap.
523+
*/
513524
zap_pages(arena, full_uaddr, 1);
514525
vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE);
515526
__free_page(page);

kernel/bpf/verifier.c

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5682,6 +5682,13 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
56825682
return reg->type == PTR_TO_FLOW_KEYS;
56835683
}
56845684

5685+
static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
5686+
{
5687+
const struct bpf_reg_state *reg = reg_state(env, regno);
5688+
5689+
return reg->type == PTR_TO_ARENA;
5690+
}
5691+
56855692
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
56865693
#ifdef CONFIG_NET
56875694
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
@@ -7019,7 +7026,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
70197026
if (is_ctx_reg(env, insn->dst_reg) ||
70207027
is_pkt_reg(env, insn->dst_reg) ||
70217028
is_flow_key_reg(env, insn->dst_reg) ||
7022-
is_sk_reg(env, insn->dst_reg)) {
7029+
is_sk_reg(env, insn->dst_reg) ||
7030+
is_arena_reg(env, insn->dst_reg)) {
70237031
verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
70247032
insn->dst_reg,
70257033
reg_type_str(env, reg_state(env, insn->dst_reg)->type));
@@ -14014,6 +14022,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
1401414022
verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
1401514023
return -EINVAL;
1401614024
}
14025+
if (!env->prog->aux->arena) {
14026+
verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
14027+
return -EINVAL;
14028+
}
1401714029
} else {
1401814030
if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
1401914031
insn->off != 32) || insn->imm) {
@@ -14046,8 +14058,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
1404614058
if (insn->imm) {
1404714059
/* off == BPF_ADDR_SPACE_CAST */
1404814060
mark_reg_unknown(env, regs, insn->dst_reg);
14049-
if (insn->imm == 1) /* cast from as(1) to as(0) */
14061+
if (insn->imm == 1) { /* cast from as(1) to as(0) */
1405014062
dst_reg->type = PTR_TO_ARENA;
14063+
/* PTR_TO_ARENA is 32-bit */
14064+
dst_reg->subreg_def = env->insn_idx + 1;
14065+
}
1405114066
} else if (insn->off == 0) {
1405214067
/* case: R1 = R2
1405314068
* copy register state to dest reg
@@ -19601,8 +19616,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
1960119616
(((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
1960219617
/* convert to 32-bit mov that clears upper 32-bit */
1960319618
insn->code = BPF_ALU | BPF_MOV | BPF_X;
19604-
/* clear off, so it's a normal 'wX = wY' from JIT pov */
19619+
/* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
1960519620
insn->off = 0;
19621+
insn->imm = 0;
1960619622
} /* cast from as(0) to as(1) should be handled by JIT */
1960719623
goto next_insn;
1960819624
}

scripts/bpf_doc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,8 @@ def get_kernel_version(self):
414414
version = version.stdout.decode().rstrip()
415415
except:
416416
try:
417-
version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot,
418-
capture_output=True, check=True)
417+
version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'],
418+
cwd=linuxRoot, capture_output=True, check=True)
419419
version = version.stdout.decode().rstrip()
420420
except:
421421
return 'Linux'

tools/bpf/bpftool/gen.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
121121
int i, n;
122122

123123
/* recognize hard coded LLVM section name */
124-
if (strcmp(sec_name, ".arena.1") == 0) {
124+
if (strcmp(sec_name, ".addr_space.1") == 0) {
125125
/* this is the name to use in skeleton */
126126
snprintf(buf, buf_sz, "arena");
127127
return true;

tools/lib/bpf/libbpf.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ struct bpf_struct_ops {
498498
#define KSYMS_SEC ".ksyms"
499499
#define STRUCT_OPS_SEC ".struct_ops"
500500
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
501-
#define ARENA_SEC ".arena.1"
501+
#define ARENA_SEC ".addr_space.1"
502502

503503
enum libbpf_map_type {
504504
LIBBPF_MAP_UNSPEC,
@@ -1650,6 +1650,10 @@ static int sys_memfd_create(const char *name, unsigned flags)
16501650
return syscall(__NR_memfd_create, name, flags);
16511651
}
16521652

1653+
#ifndef MFD_CLOEXEC
1654+
#define MFD_CLOEXEC 0x0001U
1655+
#endif
1656+
16531657
static int create_placeholder_fd(void)
16541658
{
16551659
int fd;
@@ -5352,8 +5356,8 @@ bpf_object__create_maps(struct bpf_object *obj)
53525356
goto err_out;
53535357
}
53545358
if (map->def.type == BPF_MAP_TYPE_ARENA) {
5355-
map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
5356-
PROT_READ | PROT_WRITE,
5359+
map->mmaped = mmap((void *)(long)map->map_extra,
5360+
bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
53575361
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
53585362
map->fd, 0);
53595363
if (map->mmaped == MAP_FAILED) {

tools/testing/selftests/bpf/bpf_arena_common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
*/
3333
#endif
3434

35-
#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
35+
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
3636
#define __arena __attribute__((address_space(1)))
3737
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
3838
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */

tools/testing/selftests/bpf/prog_tests/arena_htab.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
#include <test_progs.h>
44
#include <sys/mman.h>
55
#include <network_helpers.h>
6-
6+
#include <sys/user.h>
7+
#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
8+
#include <unistd.h>
9+
#define PAGE_SIZE getpagesize()
10+
#endif
711
#include "arena_htab_asm.skel.h"
812
#include "arena_htab.skel.h"
913

10-
#define PAGE_SIZE 4096
11-
1214
#include "bpf_arena_htab.h"
1315

1416
static void test_arena_htab_common(struct htab *htab)

0 commit comments

Comments
 (0)