Skip to content

Commit 0462eaa

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-05-31 The following pull-request contains BPF updates for your *net-next* tree. Lots of exciting new features in the first PR of this developement cycle! The main changes are: 1) misc verifier improvements, from Alexei. 2) bpftool can now convert btf to valid C, from Andrii. 3) verifier can insert explicit ZEXT insn when requested by 32-bit JITs. This feature greatly improves BPF speed on 32-bit architectures. From Jiong. 4) cgroups will now auto-detach bpf programs. This fixes issue of thousands bpf programs got stuck in dying cgroups. From Roman. 5) new bpf_send_signal() helper, from Yonghong. 6) cgroup inet skb programs can signal CN to the stack, from Lawrence. 7) miscellaneous cleanups, from many developers. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 33aae28 + cd53850 commit 0462eaa

File tree

122 files changed

+6430
-1013
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+6430
-1013
lines changed

Documentation/bpf/bpf_design_QA.rst

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,31 @@ registers which makes BPF inefficient virtual machine for 32-bit
172172
CPU architectures and 32-bit HW accelerators. Can true 32-bit registers
173173
be added to BPF in the future?
174174

175-
A: NO. The first thing to improve performance on 32-bit archs is to teach
176-
LLVM to generate code that uses 32-bit subregisters. Then second step
177-
is to teach verifier to mark operations where zero-ing upper bits
178-
is unnecessary. Then JITs can take advantage of those markings and
179-
drastically reduce size of generated code and improve performance.
175+
A: NO.
176+
177+
But some optimizations on zero-ing the upper 32 bits for BPF registers are
178+
available, and can be leveraged to improve the performance of JITed BPF
179+
programs for 32-bit architectures.
180+
181+
Starting with version 7, LLVM is able to generate instructions that operate
182+
on 32-bit subregisters, provided the option -mattr=+alu32 is passed for
183+
compiling a program. Furthermore, the verifier can now mark the
184+
instructions for which zero-ing the upper bits of the destination register
185+
is required, and insert an explicit zero-extension (zext) instruction
186+
(a mov32 variant). This means that for architectures without zext hardware
187+
support, the JIT back-ends do not need to clear the upper bits for
188+
subregisters written by alu32 instructions or narrow loads. Instead, the
189+
back-ends simply need to support code generation for that mov32 variant,
190+
and to overwrite bpf_jit_needs_zext() to make it return "true" (in order to
191+
enable zext insertion in the verifier).
192+
193+
Note that it is possible for a JIT back-end to have partial hardware
194+
support for zext. In that case, if verifier zext insertion is enabled,
195+
it could lead to the insertion of unnecessary zext instructions. Such
196+
instructions could be removed by creating a simple peephole inside the JIT
197+
back-end: if one instruction has hardware support for zext and if the next
198+
instruction is an explicit zext, then the latter can be skipped when doing
199+
the code generation.
180200

181201
Q: Does BPF have a stable ABI?
182202
------------------------------

arch/arm/net/bpf_jit_32.c

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,8 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
736736

737737
/* ALU operation */
738738
emit_alu_r(rd[1], rs, true, false, op, ctx);
739-
emit_a32_mov_i(rd[0], 0, ctx);
739+
if (!ctx->prog->aux->verifier_zext)
740+
emit_a32_mov_i(rd[0], 0, ctx);
740741
}
741742

742743
arm_bpf_put_reg64(dst, rd, ctx);
@@ -758,8 +759,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
758759
struct jit_ctx *ctx) {
759760
if (!is64) {
760761
emit_a32_mov_r(dst_lo, src_lo, ctx);
761-
/* Zero out high 4 bytes */
762-
emit_a32_mov_i(dst_hi, 0, ctx);
762+
if (!ctx->prog->aux->verifier_zext)
763+
/* Zero out high 4 bytes */
764+
emit_a32_mov_i(dst_hi, 0, ctx);
763765
} else if (__LINUX_ARM_ARCH__ < 6 &&
764766
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
765767
/* complete 8 byte move */
@@ -1060,17 +1062,20 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
10601062
case BPF_B:
10611063
/* Load a Byte */
10621064
emit(ARM_LDRB_I(rd[1], rm, off), ctx);
1063-
emit_a32_mov_i(rd[0], 0, ctx);
1065+
if (!ctx->prog->aux->verifier_zext)
1066+
emit_a32_mov_i(rd[0], 0, ctx);
10641067
break;
10651068
case BPF_H:
10661069
/* Load a HalfWord */
10671070
emit(ARM_LDRH_I(rd[1], rm, off), ctx);
1068-
emit_a32_mov_i(rd[0], 0, ctx);
1071+
if (!ctx->prog->aux->verifier_zext)
1072+
emit_a32_mov_i(rd[0], 0, ctx);
10691073
break;
10701074
case BPF_W:
10711075
/* Load a Word */
10721076
emit(ARM_LDR_I(rd[1], rm, off), ctx);
1073-
emit_a32_mov_i(rd[0], 0, ctx);
1077+
if (!ctx->prog->aux->verifier_zext)
1078+
emit_a32_mov_i(rd[0], 0, ctx);
10741079
break;
10751080
case BPF_DW:
10761081
/* Load a Double Word */
@@ -1359,6 +1364,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
13591364
case BPF_ALU64 | BPF_MOV | BPF_X:
13601365
switch (BPF_SRC(code)) {
13611366
case BPF_X:
1367+
if (imm == 1) {
1368+
/* Special mov32 for zext */
1369+
emit_a32_mov_i(dst_hi, 0, ctx);
1370+
break;
1371+
}
13621372
emit_a32_mov_r64(is64, dst, src, ctx);
13631373
break;
13641374
case BPF_K:
@@ -1438,7 +1448,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
14381448
}
14391449
emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
14401450
arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
1441-
emit_a32_mov_i(dst_hi, 0, ctx);
1451+
if (!ctx->prog->aux->verifier_zext)
1452+
emit_a32_mov_i(dst_hi, 0, ctx);
14421453
break;
14431454
case BPF_ALU64 | BPF_DIV | BPF_K:
14441455
case BPF_ALU64 | BPF_DIV | BPF_X:
@@ -1453,7 +1464,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
14531464
return -EINVAL;
14541465
if (imm)
14551466
emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code));
1456-
emit_a32_mov_i(dst_hi, 0, ctx);
1467+
if (!ctx->prog->aux->verifier_zext)
1468+
emit_a32_mov_i(dst_hi, 0, ctx);
14571469
break;
14581470
/* dst = dst << imm */
14591471
case BPF_ALU64 | BPF_LSH | BPF_K:
@@ -1488,7 +1500,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
14881500
/* dst = ~dst */
14891501
case BPF_ALU | BPF_NEG:
14901502
emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code));
1491-
emit_a32_mov_i(dst_hi, 0, ctx);
1503+
if (!ctx->prog->aux->verifier_zext)
1504+
emit_a32_mov_i(dst_hi, 0, ctx);
14921505
break;
14931506
/* dst = ~dst (64 bit) */
14941507
case BPF_ALU64 | BPF_NEG:
@@ -1544,11 +1557,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
15441557
#else /* ARMv6+ */
15451558
emit(ARM_UXTH(rd[1], rd[1]), ctx);
15461559
#endif
1547-
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
1560+
if (!ctx->prog->aux->verifier_zext)
1561+
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
15481562
break;
15491563
case 32:
15501564
/* zero-extend 32 bits into 64 bits */
1551-
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
1565+
if (!ctx->prog->aux->verifier_zext)
1566+
emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
15521567
break;
15531568
case 64:
15541569
/* nop */
@@ -1838,6 +1853,11 @@ void bpf_jit_compile(struct bpf_prog *prog)
18381853
/* Nothing to do here. We support Internal BPF. */
18391854
}
18401855

1856+
bool bpf_jit_needs_zext(void)
1857+
{
1858+
return true;
1859+
}
1860+
18411861
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
18421862
{
18431863
struct bpf_prog *tmp, *orig_prog = prog;

arch/powerpc/net/bpf_jit_comp64.c

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,26 +504,35 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
504504
case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
505505
/* slw clears top 32 bits */
506506
PPC_SLW(dst_reg, dst_reg, src_reg);
507+
/* skip zero extension move, but set address map. */
508+
if (insn_is_zext(&insn[i + 1]))
509+
addrs[++i] = ctx->idx * 4;
507510
break;
508511
case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
509512
PPC_SLD(dst_reg, dst_reg, src_reg);
510513
break;
511514
case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
512515
/* with imm 0, we still need to clear top 32 bits */
513516
PPC_SLWI(dst_reg, dst_reg, imm);
517+
if (insn_is_zext(&insn[i + 1]))
518+
addrs[++i] = ctx->idx * 4;
514519
break;
515520
case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
516521
if (imm != 0)
517522
PPC_SLDI(dst_reg, dst_reg, imm);
518523
break;
519524
case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
520525
PPC_SRW(dst_reg, dst_reg, src_reg);
526+
if (insn_is_zext(&insn[i + 1]))
527+
addrs[++i] = ctx->idx * 4;
521528
break;
522529
case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
523530
PPC_SRD(dst_reg, dst_reg, src_reg);
524531
break;
525532
case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
526533
PPC_SRWI(dst_reg, dst_reg, imm);
534+
if (insn_is_zext(&insn[i + 1]))
535+
addrs[++i] = ctx->idx * 4;
527536
break;
528537
case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
529538
if (imm != 0)
@@ -548,18 +557,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
548557
*/
549558
case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
550559
case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
560+
if (imm == 1) {
561+
/* special mov32 for zext */
562+
PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
563+
break;
564+
}
551565
PPC_MR(dst_reg, src_reg);
552566
goto bpf_alu32_trunc;
553567
case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
554568
case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
555569
PPC_LI32(dst_reg, imm);
556570
if (imm < 0)
557571
goto bpf_alu32_trunc;
572+
else if (insn_is_zext(&insn[i + 1]))
573+
addrs[++i] = ctx->idx * 4;
558574
break;
559575

560576
bpf_alu32_trunc:
561577
/* Truncate to 32-bits */
562-
if (BPF_CLASS(code) == BPF_ALU)
578+
if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
563579
PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
564580
break;
565581

@@ -618,10 +634,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
618634
case 16:
619635
/* zero-extend 16 bits into 64 bits */
620636
PPC_RLDICL(dst_reg, dst_reg, 0, 48);
637+
if (insn_is_zext(&insn[i + 1]))
638+
addrs[++i] = ctx->idx * 4;
621639
break;
622640
case 32:
623-
/* zero-extend 32 bits into 64 bits */
624-
PPC_RLDICL(dst_reg, dst_reg, 0, 32);
641+
if (!fp->aux->verifier_zext)
642+
/* zero-extend 32 bits into 64 bits */
643+
PPC_RLDICL(dst_reg, dst_reg, 0, 32);
625644
break;
626645
case 64:
627646
/* nop */
@@ -698,14 +717,20 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
698717
/* dst = *(u8 *)(ul) (src + off) */
699718
case BPF_LDX | BPF_MEM | BPF_B:
700719
PPC_LBZ(dst_reg, src_reg, off);
720+
if (insn_is_zext(&insn[i + 1]))
721+
addrs[++i] = ctx->idx * 4;
701722
break;
702723
/* dst = *(u16 *)(ul) (src + off) */
703724
case BPF_LDX | BPF_MEM | BPF_H:
704725
PPC_LHZ(dst_reg, src_reg, off);
726+
if (insn_is_zext(&insn[i + 1]))
727+
addrs[++i] = ctx->idx * 4;
705728
break;
706729
/* dst = *(u32 *)(ul) (src + off) */
707730
case BPF_LDX | BPF_MEM | BPF_W:
708731
PPC_LWZ(dst_reg, src_reg, off);
732+
if (insn_is_zext(&insn[i + 1]))
733+
addrs[++i] = ctx->idx * 4;
709734
break;
710735
/* dst = *(u64 *)(ul) (src + off) */
711736
case BPF_LDX | BPF_MEM | BPF_DW:
@@ -1046,6 +1071,11 @@ struct powerpc64_jit_data {
10461071
struct codegen_context ctx;
10471072
};
10481073

1074+
bool bpf_jit_needs_zext(void)
1075+
{
1076+
return true;
1077+
}
1078+
10491079
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
10501080
{
10511081
u32 proglen;

0 commit comments

Comments
 (0)