Skip to content

Commit 4485166

Browse files
committed
Merge branch 'bpf-nfp-jmp-memcpy-improvements'
Jiong Wang says: ==================== Currently, compiler will lower memcpy function call in XDP/eBPF C program into a sequence of eBPF load/store pairs for some scenarios. Compiler is thinking this "inline" optimiation is beneficial as it could avoid function call and also increase code locality. However, Netronome NPU is not an tranditional load/store architecture that doing a sequence of individual load/store actions are not efficient. This patch set tries to identify the load/store sequences composed of load/store pairs that comes from memcpy lowering, then accelerates them through NPU's Command Push Pull (CPP) instruction. This patch set registered an new optimization pass before doing the actual JIT work, it traverse through eBPF IR, once found candidate sequence then record the memory copy source, destination and length information in the first load instruction starting the sequence and marks all remaining instructions in the sequence into skipable status. Later, when JITing the first load instructoin, optimal instructions will be generated using those record information. For this safety of this transformation: - jump into the middle of the sequence will cancel the optimization. - overlapped memory access will cancel the optimization. - the load destination register still contains the same value as before the transformation. ==================== Signed-off-by: Daniel Borkmann <[email protected]>
2 parents 554b36b + 6bc7103 commit 4485166

File tree

8 files changed

+503
-75
lines changed

8 files changed

+503
-75
lines changed

drivers/net/ethernet/netronome/nfp/bpf/jit.c

Lines changed: 431 additions & 56 deletions
Large diffs are not rendered by default.

drivers/net/ethernet/netronome/nfp/bpf/main.h

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2016 Netronome Systems, Inc.
2+
* Copyright (C) 2016-2017 Netronome Systems, Inc.
33
*
44
* This software is dual licensed under the GNU General License Version 2,
55
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -89,23 +89,37 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
8989
#define nfp_meta_next(meta) list_next_entry(meta, l)
9090
#define nfp_meta_prev(meta) list_prev_entry(meta, l)
9191

92+
#define FLAG_INSN_IS_JUMP_DST BIT(0)
93+
9294
/**
9395
* struct nfp_insn_meta - BPF instruction wrapper
9496
* @insn: BPF instruction
9597
* @ptr: pointer type for memory operations
98+
* @ldst_gather_len: memcpy length gathered from load/store sequence
99+
* @paired_st: the paired store insn at the head of the sequence
96100
* @ptr_not_const: pointer is not always constant
101+
* @jmp_dst: destination info for jump instructions
97102
* @off: index of first generated machine instruction (in nfp_prog.prog)
98103
* @n: eBPF instruction number
104+
* @flags: eBPF instruction extra optimization flags
99105
* @skip: skip this instruction (optimized out)
100106
* @double_cb: callback for second part of the instruction
101107
* @l: link on nfp_prog->insns list
102108
*/
103109
struct nfp_insn_meta {
104110
struct bpf_insn insn;
105-
struct bpf_reg_state ptr;
106-
bool ptr_not_const;
111+
union {
112+
struct {
113+
struct bpf_reg_state ptr;
114+
struct bpf_insn *paired_st;
115+
s16 ldst_gather_len;
116+
bool ptr_not_const;
117+
};
118+
struct nfp_insn_meta *jmp_dst;
119+
};
107120
unsigned int off;
108121
unsigned short n;
122+
unsigned short flags;
109123
bool skip;
110124
instr_cb_t double_cb;
111125

@@ -134,6 +148,16 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
134148
return BPF_MODE(meta->insn.code);
135149
}
136150

151+
static inline bool is_mbpf_load(const struct nfp_insn_meta *meta)
152+
{
153+
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM);
154+
}
155+
156+
static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
157+
{
158+
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
159+
}
160+
137161
/**
138162
* struct nfp_prog - nfp BPF program
139163
* @prog: machine code
@@ -142,6 +166,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
142166
* @verifier_meta: temporary storage for verifier's insn meta
143167
* @type: BPF program type
144168
* @start_off: address of the first instruction in the memory
169+
* @last_bpf_off: address of the last instruction translated from BPF
145170
* @tgt_out: jump target for normal exit
146171
* @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
147172
* @tgt_done: jump target to get the next packet
@@ -160,6 +185,7 @@ struct nfp_prog {
160185
enum bpf_prog_type type;
161186

162187
unsigned int start_off;
188+
unsigned int last_bpf_off;
163189
unsigned int tgt_out;
164190
unsigned int tgt_abort;
165191
unsigned int tgt_done;
@@ -189,4 +215,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
189215
struct bpf_prog *prog);
190216
int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
191217
struct bpf_prog *prog);
218+
struct nfp_insn_meta *
219+
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
220+
unsigned int insn_idx, unsigned int n_insns);
192221
#endif

drivers/net/ethernet/netronome/nfp/bpf/offload.c

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2016 Netronome Systems, Inc.
2+
* Copyright (C) 2016-2017 Netronome Systems, Inc.
33
*
44
* This software is dual licensed under the GNU General License Version 2,
55
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -55,11 +55,10 @@ static int
5555
nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
5656
unsigned int cnt)
5757
{
58+
struct nfp_insn_meta *meta;
5859
unsigned int i;
5960

6061
for (i = 0; i < cnt; i++) {
61-
struct nfp_insn_meta *meta;
62-
6362
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
6463
if (!meta)
6564
return -ENOMEM;
@@ -70,6 +69,24 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
7069
list_add_tail(&meta->l, &nfp_prog->insns);
7170
}
7271

72+
/* Another pass to record jump information. */
73+
list_for_each_entry(meta, &nfp_prog->insns, l) {
74+
u64 code = meta->insn.code;
75+
76+
if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
77+
BPF_OP(code) != BPF_CALL) {
78+
struct nfp_insn_meta *dst_meta;
79+
unsigned short dst_indx;
80+
81+
dst_indx = meta->n + 1 + meta->insn.off;
82+
dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
83+
cnt);
84+
85+
meta->jmp_dst = dst_meta;
86+
dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
87+
}
88+
}
89+
7390
return 0;
7491
}
7592

drivers/net/ethernet/netronome/nfp/bpf/verifier.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2016 Netronome Systems, Inc.
2+
* Copyright (C) 2016-2017 Netronome Systems, Inc.
33
*
44
* This software is dual licensed under the GNU General License Version 2,
55
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -40,7 +40,7 @@
4040

4141
#include "main.h"
4242

43-
static struct nfp_insn_meta *
43+
struct nfp_insn_meta *
4444
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
4545
unsigned int insn_idx, unsigned int n_insns)
4646
{
@@ -180,10 +180,10 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
180180
if (meta->insn.code == (BPF_JMP | BPF_EXIT))
181181
return nfp_bpf_check_exit(nfp_prog, env);
182182

183-
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
183+
if (is_mbpf_load(meta))
184184
return nfp_bpf_check_ptr(nfp_prog, meta, env,
185185
meta->insn.src_reg);
186-
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
186+
if (is_mbpf_store(meta))
187187
return nfp_bpf_check_ptr(nfp_prog, meta, env,
188188
meta->insn.dst_reg);
189189

drivers/net/ethernet/netronome/nfp/nfp_asm.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
4343
[CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 },
44+
[CMD_TGT_WRITE32_SWAP] = { 0x02, 0x5f },
4445
[CMD_TGT_READ8] = { 0x01, 0x43 },
4546
[CMD_TGT_READ32] = { 0x00, 0x5c },
4647
[CMD_TGT_READ32_LE] = { 0x01, 0x5c },
@@ -120,7 +121,8 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg,
120121
reg->dst = nfp_swreg_to_unreg(dst, true);
121122

122123
/* Decode source operands */
123-
if (swreg_type(lreg) == swreg_type(rreg))
124+
if (swreg_type(lreg) == swreg_type(rreg) &&
125+
swreg_type(lreg) != NN_REG_NONE)
124126
return -EFAULT;
125127

126128
if (swreg_type(lreg) == NN_REG_GPR_B ||
@@ -200,7 +202,8 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg,
200202
reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
201203

202204
/* Decode source operands */
203-
if (swreg_type(lreg) == swreg_type(rreg))
205+
if (swreg_type(lreg) == swreg_type(rreg) &&
206+
swreg_type(lreg) != NN_REG_NONE)
204207
return -EFAULT;
205208

206209
if (swreg_type(lreg) == NN_REG_GPR_B ||

drivers/net/ethernet/netronome/nfp/nfp_asm.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2016 Netronome Systems, Inc.
2+
* Copyright (C) 2016-2017 Netronome Systems, Inc.
33
*
44
* This software is dual licensed under the GNU General License Version 2,
55
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -209,6 +209,7 @@ enum alu_dst_ab {
209209
#define OP_CMD_CNT 0x0000e000000ULL
210210
#define OP_CMD_SIG 0x000f0000000ULL
211211
#define OP_CMD_TGT_CMD 0x07f00000000ULL
212+
#define OP_CMD_INDIR 0x20000000000ULL
212213
#define OP_CMD_MODE 0x1c0000000000ULL
213214

214215
struct cmd_tgt_act {
@@ -219,6 +220,7 @@ struct cmd_tgt_act {
219220
enum cmd_tgt_map {
220221
CMD_TGT_READ8,
221222
CMD_TGT_WRITE8_SWAP,
223+
CMD_TGT_WRITE32_SWAP,
222224
CMD_TGT_READ32,
223225
CMD_TGT_READ32_LE,
224226
CMD_TGT_READ32_SWAP,
@@ -240,6 +242,9 @@ enum cmd_ctx_swap {
240242
CMD_CTX_NO_SWAP = 3,
241243
};
242244

245+
#define CMD_OVE_LEN BIT(7)
246+
#define CMD_OV_LEN GENMASK(12, 8)
247+
243248
#define OP_LCSR_BASE 0x0fc00000000ULL
244249
#define OP_LCSR_A_SRC 0x000000003ffULL
245250
#define OP_LCSR_B_SRC 0x000000ffc00ULL

drivers/net/ethernet/netronome/nfp/nfp_net.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,8 @@ struct nfp_net_dp {
548548
* @max_r_vecs: Number of allocated interrupt vectors for RX/TX
549549
* @max_tx_rings: Maximum number of TX rings supported by the Firmware
550550
* @max_rx_rings: Maximum number of RX rings supported by the Firmware
551+
* @stride_rx: Queue controller RX queue spacing
552+
* @stride_tx: Queue controller TX queue spacing
551553
* @r_vecs: Pre-allocated array of ring vectors
552554
* @irq_entries: Pre-allocated array of MSI-X entries
553555
* @lsc_handler: Handler for Link State Change interrupt

drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,7 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest,
372372
* that it can be accessed directly.
373373
*
374374
* NOTE: @address and @size must be 32-bit aligned values.
375-
*
376-
* NOTE: The area must also be 'released' when the structure is freed.
375+
* The area must also be 'released' when the structure is freed.
377376
*
378377
* Return: NFP CPP Area handle, or NULL
379378
*/
@@ -536,8 +535,7 @@ void nfp_cpp_area_release_free(struct nfp_cpp_area *area)
536535
* Read data from indicated CPP region.
537536
*
538537
* NOTE: @offset and @length must be 32-bit aligned values.
539-
*
540-
* NOTE: Area must have been locked down with an 'acquire'.
538+
* Area must have been locked down with an 'acquire'.
541539
*
542540
* Return: length of io, or -ERRNO
543541
*/
@@ -558,8 +556,7 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area,
558556
* Write data to indicated CPP region.
559557
*
560558
* NOTE: @offset and @length must be 32-bit aligned values.
561-
*
562-
* NOTE: Area must have been locked down with an 'acquire'.
559+
* Area must have been locked down with an 'acquire'.
563560
*
564561
* Return: length of io, or -ERRNO
565562
*/

0 commit comments

Comments
 (0)