Skip to content

Commit af487c5

Browse files
committed
Merge branch 'bpf-optimize-neg-sums'
Jakub Kicinski says: ==================== This set adds an optimization run to the NFP jit to turn ADD and SUB instructions with negative immediate into the opposite operation with a positive immediate. NFP can fit small immediates into the instructions but it can't ever fit negative immediates. Addition of small negative immediates is quite common in BPF programs for stack address calculations, therefore this optimization gives us non-negligible savings in instruction count (up to 4%). ==================== Signed-off-by: Daniel Borkmann <[email protected]>
2 parents a18fda1 + 7bdc97b commit af487c5

File tree

2 files changed

+124
-113
lines changed

2 files changed

+124
-113
lines changed

drivers/net/ethernet/netronome/nfp/bpf/jit.c

Lines changed: 119 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,45 +1214,83 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
12141214
return 0;
12151215
}
12161216

1217-
static int
1218-
wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1219-
enum br_mask br_mask, bool swap)
1217+
static const struct jmp_code_map {
1218+
enum br_mask br_mask;
1219+
bool swap;
1220+
} jmp_code_map[] = {
1221+
[BPF_JGT >> 4] = { BR_BLO, true },
1222+
[BPF_JGE >> 4] = { BR_BHS, false },
1223+
[BPF_JLT >> 4] = { BR_BLO, false },
1224+
[BPF_JLE >> 4] = { BR_BHS, true },
1225+
[BPF_JSGT >> 4] = { BR_BLT, true },
1226+
[BPF_JSGE >> 4] = { BR_BGE, false },
1227+
[BPF_JSLT >> 4] = { BR_BLT, false },
1228+
[BPF_JSLE >> 4] = { BR_BGE, true },
1229+
};
1230+
1231+
static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
1232+
{
1233+
unsigned int op;
1234+
1235+
op = BPF_OP(meta->insn.code) >> 4;
1236+
/* br_mask of 0 is BR_BEQ which we don't use in jump code table */
1237+
if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
1238+
!jmp_code_map[op].br_mask,
1239+
"no code found for jump instruction"))
1240+
return NULL;
1241+
1242+
return &jmp_code_map[op];
1243+
}
1244+
1245+
static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
12201246
{
12211247
const struct bpf_insn *insn = &meta->insn;
12221248
u64 imm = insn->imm; /* sign extend */
1249+
const struct jmp_code_map *code;
1250+
enum alu_op alu_op, carry_op;
12231251
u8 reg = insn->dst_reg * 2;
12241252
swreg tmp_reg;
12251253

1254+
code = nfp_jmp_code_get(meta);
1255+
if (!code)
1256+
return -EINVAL;
1257+
1258+
alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
1259+
carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
1260+
12261261
tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1227-
if (!swap)
1228-
emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
1262+
if (!code->swap)
1263+
emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
12291264
else
1230-
emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
1265+
emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
12311266

12321267
tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1233-
if (!swap)
1268+
if (!code->swap)
12341269
emit_alu(nfp_prog, reg_none(),
1235-
reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
1270+
reg_a(reg + 1), carry_op, tmp_reg);
12361271
else
12371272
emit_alu(nfp_prog, reg_none(),
1238-
tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
1273+
tmp_reg, carry_op, reg_a(reg + 1));
12391274

1240-
emit_br(nfp_prog, br_mask, insn->off, 0);
1275+
emit_br(nfp_prog, code->br_mask, insn->off, 0);
12411276

12421277
return 0;
12431278
}
12441279

1245-
static int
1246-
wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1247-
enum br_mask br_mask, bool swap)
1280+
static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
12481281
{
12491282
const struct bpf_insn *insn = &meta->insn;
1283+
const struct jmp_code_map *code;
12501284
u8 areg, breg;
12511285

1286+
code = nfp_jmp_code_get(meta);
1287+
if (!code)
1288+
return -EINVAL;
1289+
12521290
areg = insn->dst_reg * 2;
12531291
breg = insn->src_reg * 2;
12541292

1255-
if (swap) {
1293+
if (code->swap) {
12561294
areg ^= breg;
12571295
breg ^= areg;
12581296
areg ^= breg;
@@ -1261,7 +1299,7 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
12611299
emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
12621300
emit_alu(nfp_prog, reg_none(),
12631301
reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1264-
emit_br(nfp_prog, br_mask, insn->off, 0);
1302+
emit_br(nfp_prog, code->br_mask, insn->off, 0);
12651303

12661304
return 0;
12671305
}
@@ -1400,7 +1438,7 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
14001438
if (!load_lm_ptr)
14011439
return 0;
14021440

1403-
emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
1441+
emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
14041442
wrp_nops(nfp_prog, 3);
14051443

14061444
return 0;
@@ -2283,46 +2321,6 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
22832321
return 0;
22842322
}
22852323

2286-
static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2287-
{
2288-
return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
2289-
}
2290-
2291-
static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2292-
{
2293-
return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
2294-
}
2295-
2296-
static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2297-
{
2298-
return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
2299-
}
2300-
2301-
static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2302-
{
2303-
return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
2304-
}
2305-
2306-
static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2307-
{
2308-
return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true);
2309-
}
2310-
2311-
static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2312-
{
2313-
return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false);
2314-
}
2315-
2316-
static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2317-
{
2318-
return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false);
2319-
}
2320-
2321-
static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2322-
{
2323-
return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true);
2324-
}
2325-
23262324
static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
23272325
{
23282326
const struct bpf_insn *insn = &meta->insn;
@@ -2392,46 +2390,6 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
23922390
return 0;
23932391
}
23942392

2395-
static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2396-
{
2397-
return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
2398-
}
2399-
2400-
static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2401-
{
2402-
return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
2403-
}
2404-
2405-
static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2406-
{
2407-
return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
2408-
}
2409-
2410-
static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2411-
{
2412-
return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
2413-
}
2414-
2415-
static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2416-
{
2417-
return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true);
2418-
}
2419-
2420-
static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2421-
{
2422-
return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false);
2423-
}
2424-
2425-
static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2426-
{
2427-
return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false);
2428-
}
2429-
2430-
static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2431-
{
2432-
return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true);
2433-
}
2434-
24352393
static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
24362394
{
24372395
return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
@@ -2520,25 +2478,25 @@ static const instr_cb_t instr_cb[256] = {
25202478
[BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
25212479
[BPF_JMP | BPF_JA | BPF_K] = jump,
25222480
[BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
2523-
[BPF_JMP | BPF_JGT | BPF_K] = jgt_imm,
2524-
[BPF_JMP | BPF_JGE | BPF_K] = jge_imm,
2525-
[BPF_JMP | BPF_JLT | BPF_K] = jlt_imm,
2526-
[BPF_JMP | BPF_JLE | BPF_K] = jle_imm,
2527-
[BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm,
2528-
[BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm,
2529-
[BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm,
2530-
[BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm,
2481+
[BPF_JMP | BPF_JGT | BPF_K] = cmp_imm,
2482+
[BPF_JMP | BPF_JGE | BPF_K] = cmp_imm,
2483+
[BPF_JMP | BPF_JLT | BPF_K] = cmp_imm,
2484+
[BPF_JMP | BPF_JLE | BPF_K] = cmp_imm,
2485+
[BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm,
2486+
[BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm,
2487+
[BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm,
2488+
[BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm,
25312489
[BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
25322490
[BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
25332491
[BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
2534-
[BPF_JMP | BPF_JGT | BPF_X] = jgt_reg,
2535-
[BPF_JMP | BPF_JGE | BPF_X] = jge_reg,
2536-
[BPF_JMP | BPF_JLT | BPF_X] = jlt_reg,
2537-
[BPF_JMP | BPF_JLE | BPF_X] = jle_reg,
2538-
[BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg,
2539-
[BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg,
2540-
[BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg,
2541-
[BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg,
2492+
[BPF_JMP | BPF_JGT | BPF_X] = cmp_reg,
2493+
[BPF_JMP | BPF_JGE | BPF_X] = cmp_reg,
2494+
[BPF_JMP | BPF_JLT | BPF_X] = cmp_reg,
2495+
[BPF_JMP | BPF_JLE | BPF_X] = cmp_reg,
2496+
[BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg,
2497+
[BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg,
2498+
[BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg,
2499+
[BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg,
25422500
[BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
25432501
[BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
25442502
[BPF_JMP | BPF_CALL] = call,
@@ -2777,6 +2735,54 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
27772735
}
27782736
}
27792737

2738+
/* abs(insn.imm) will fit better into unrestricted reg immediate -
2739+
* convert add/sub of a negative number into a sub/add of a positive one.
2740+
*/
2741+
static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
2742+
{
2743+
struct nfp_insn_meta *meta;
2744+
2745+
list_for_each_entry(meta, &nfp_prog->insns, l) {
2746+
struct bpf_insn insn = meta->insn;
2747+
2748+
if (meta->skip)
2749+
continue;
2750+
2751+
if (BPF_CLASS(insn.code) != BPF_ALU &&
2752+
BPF_CLASS(insn.code) != BPF_ALU64 &&
2753+
BPF_CLASS(insn.code) != BPF_JMP)
2754+
continue;
2755+
if (BPF_SRC(insn.code) != BPF_K)
2756+
continue;
2757+
if (insn.imm >= 0)
2758+
continue;
2759+
2760+
if (BPF_CLASS(insn.code) == BPF_JMP) {
2761+
switch (BPF_OP(insn.code)) {
2762+
case BPF_JGE:
2763+
case BPF_JSGE:
2764+
case BPF_JLT:
2765+
case BPF_JSLT:
2766+
meta->jump_neg_op = true;
2767+
break;
2768+
default:
2769+
continue;
2770+
}
2771+
} else {
2772+
if (BPF_OP(insn.code) == BPF_ADD)
2773+
insn.code = BPF_CLASS(insn.code) | BPF_SUB;
2774+
else if (BPF_OP(insn.code) == BPF_SUB)
2775+
insn.code = BPF_CLASS(insn.code) | BPF_ADD;
2776+
else
2777+
continue;
2778+
2779+
meta->insn.code = insn.code | BPF_K;
2780+
}
2781+
2782+
meta->insn.imm = -insn.imm;
2783+
}
2784+
}
2785+
27802786
/* Remove masking after load since our load guarantees this is not needed */
27812787
static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
27822788
{
@@ -3212,6 +3218,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
32123218
{
32133219
nfp_bpf_opt_reg_init(nfp_prog);
32143220

3221+
nfp_bpf_opt_neg_add_sub(nfp_prog);
32153222
nfp_bpf_opt_ld_mask(nfp_prog);
32163223
nfp_bpf_opt_ld_shift(nfp_prog);
32173224
nfp_bpf_opt_ldst_gather(nfp_prog);

drivers/net/ethernet/netronome/nfp/bpf/main.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ struct nfp_bpf_reg_state {
236236
* @xadd_over_16bit: 16bit immediate is not guaranteed
237237
* @xadd_maybe_16bit: 16bit immediate is possible
238238
* @jmp_dst: destination info for jump instructions
239+
* @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB
239240
* @func_id: function id for call instructions
240241
* @arg1: arg1 for call instructions
241242
* @arg2: arg2 for call instructions
@@ -264,7 +265,10 @@ struct nfp_insn_meta {
264265
bool xadd_maybe_16bit;
265266
};
266267
/* jump */
267-
struct nfp_insn_meta *jmp_dst;
268+
struct {
269+
struct nfp_insn_meta *jmp_dst;
270+
bool jump_neg_op;
271+
};
268272
/* function calls */
269273
struct {
270274
u32 func_id;

0 commit comments

Comments
 (0)