Skip to content

Commit 071a234

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2018-10-08 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) sk_lookup_[tcp|udp] and sk_release helpers from Joe Stringer which allow BPF programs to perform lookups for sockets in a network namespace. This would allow programs to determine early on in processing whether the stack is expecting to receive the packet, and perform some action (eg drop, forward somewhere) based on this information. 2) per-cpu cgroup local storage from Roman Gushchin. Per-cpu cgroup local storage is very similar to simple cgroup storage except all the data is per-cpu. The main goal of per-cpu variant is to implement super fast counters (e.g. packet counters), which don't require neither lookups, neither atomic operations in a fast path. The example of these hybrid counters is in selftests/bpf/netcnt_prog.c 3) allow HW offload of programs with BPF-to-BPF function calls from Quentin Monnet 4) support more than 64-byte key/value in HW offloaded BPF maps from Jakub Kicinski 5) rename of libbpf interfaces from Andrey Ignatov. libbpf is maturing as a library and should follow good practices in library design and implementation to play well with other libraries. This patch set brings consistent naming convention to global symbols. 6) relicense libbpf as LGPL-2.1 OR BSD-2-Clause from Alexei Starovoitov to let Apache2 projects use libbpf 7) various AF_XDP fixes from Björn and Magnus ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 9000a45 + df3f94a commit 071a234

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+4184
-707
lines changed

Documentation/networking/af_xdp.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ log2(2048) LSB of the addr will be masked off, meaning that 2048, 2050
159159
and 3000 refers to the same chunk.
160160

161161

162-
UMEM Completetion Ring
163-
~~~~~~~~~~~~~~~~~~~~~~
162+
UMEM Completion Ring
163+
~~~~~~~~~~~~~~~~~~~~
164164

165165
The Completion Ring is used transfer ownership of UMEM frames from
166166
kernel-space to user-space. Just like the Fill ring, UMEM indicies are

Documentation/networking/filter.txt

Lines changed: 80 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,11 @@ opcodes as defined in linux/filter.h stand for:
203203

204204
Instruction Addressing mode Description
205205

206-
ld 1, 2, 3, 4, 10 Load word into A
206+
ld 1, 2, 3, 4, 12 Load word into A
207207
ldi 4 Load word into A
208208
ldh 1, 2 Load half-word into A
209209
ldb 1, 2 Load byte into A
210-
ldx 3, 4, 5, 10 Load word into X
210+
ldx 3, 4, 5, 12 Load word into X
211211
ldxi 4 Load word into X
212212
ldxb 5 Load byte into X
213213

@@ -216,14 +216,14 @@ opcodes as defined in linux/filter.h stand for:
216216

217217
jmp 6 Jump to label
218218
ja 6 Jump to label
219-
jeq 7, 8 Jump on A == k
220-
jneq 8 Jump on A != k
221-
jne 8 Jump on A != k
222-
jlt 8 Jump on A < k
223-
jle 8 Jump on A <= k
224-
jgt 7, 8 Jump on A > k
225-
jge 7, 8 Jump on A >= k
226-
jset 7, 8 Jump on A & k
219+
jeq 7, 8, 9, 10 Jump on A == <x>
220+
jneq 9, 10 Jump on A != <x>
221+
jne 9, 10 Jump on A != <x>
222+
jlt 9, 10 Jump on A < <x>
223+
jle 9, 10 Jump on A <= <x>
224+
jgt 7, 8, 9, 10 Jump on A > <x>
225+
jge 7, 8, 9, 10 Jump on A >= <x>
226+
jset 7, 8, 9, 10 Jump on A & <x>
227227

228228
add 0, 4 A + <x>
229229
sub 0, 4 A - <x>
@@ -240,7 +240,7 @@ opcodes as defined in linux/filter.h stand for:
240240
tax Copy A into X
241241
txa Copy X into A
242242

243-
ret 4, 9 Return
243+
ret 4, 11 Return
244244

245245
The next table shows addressing formats from the 2nd column:
246246

@@ -254,9 +254,11 @@ The next table shows addressing formats from the 2nd column:
254254
5 4*([k]&0xf) Lower nibble * 4 at byte offset k in the packet
255255
6 L Jump label L
256256
7 #k,Lt,Lf Jump to Lt if true, otherwise jump to Lf
257-
8 #k,Lt Jump to Lt if predicate is true
258-
9 a/%a Accumulator A
259-
10 extension BPF extension
257+
8 x/%x,Lt,Lf Jump to Lt if true, otherwise jump to Lf
258+
9 #k,Lt Jump to Lt if predicate is true
259+
10 x/%x,Lt Jump to Lt if predicate is true
260+
11 a/%a Accumulator A
261+
12 extension BPF extension
260262

261263
The Linux kernel also has a couple of BPF extensions that are used along
262264
with the class of load instructions by "overloading" the k argument with
@@ -1125,6 +1127,14 @@ pointer type. The types of pointers describe their base, as follows:
11251127
PTR_TO_STACK Frame pointer.
11261128
PTR_TO_PACKET skb->data.
11271129
PTR_TO_PACKET_END skb->data + headlen; arithmetic forbidden.
1130+
PTR_TO_SOCKET Pointer to struct bpf_sock_ops, implicitly refcounted.
1131+
PTR_TO_SOCKET_OR_NULL
1132+
Either a pointer to a socket, or NULL; socket lookup
1133+
returns this type, which becomes a PTR_TO_SOCKET when
1134+
checked != NULL. PTR_TO_SOCKET is reference-counted,
1135+
so programs must release the reference through the
1136+
socket release function before the end of the program.
1137+
Arithmetic on these pointers is forbidden.
11281138
However, a pointer may be offset from this base (as a result of pointer
11291139
arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable
11301140
offset'. The former is used when an exactly-known value (e.g. an immediate
@@ -1171,6 +1181,13 @@ over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting
11711181
pointer will have a variable offset known to be 4n+2 for some n, so adding the 2
11721182
bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through
11731183
that pointer are safe.
1184+
The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common
1185+
to all copies of the pointer returned from a socket lookup. This has similar
1186+
behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but
1187+
it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly
1188+
represents a reference to the corresponding 'struct sock'. To ensure that the
1189+
reference is not leaked, it is imperative to NULL-check the reference and in
1190+
the non-NULL case, and pass the valid reference to the socket release function.
11741191

11751192
Direct packet access
11761193
--------------------
@@ -1444,6 +1461,55 @@ Error:
14441461
8: (7a) *(u64 *)(r0 +0) = 1
14451462
R0 invalid mem access 'imm'
14461463

1464+
Program that performs a socket lookup then sets the pointer to NULL without
1465+
checking it:
1466+
value:
1467+
BPF_MOV64_IMM(BPF_REG_2, 0),
1468+
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
1469+
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
1470+
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
1471+
BPF_MOV64_IMM(BPF_REG_3, 4),
1472+
BPF_MOV64_IMM(BPF_REG_4, 0),
1473+
BPF_MOV64_IMM(BPF_REG_5, 0),
1474+
BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
1475+
BPF_MOV64_IMM(BPF_REG_0, 0),
1476+
BPF_EXIT_INSN(),
1477+
Error:
1478+
0: (b7) r2 = 0
1479+
1: (63) *(u32 *)(r10 -8) = r2
1480+
2: (bf) r2 = r10
1481+
3: (07) r2 += -8
1482+
4: (b7) r3 = 4
1483+
5: (b7) r4 = 0
1484+
6: (b7) r5 = 0
1485+
7: (85) call bpf_sk_lookup_tcp#65
1486+
8: (b7) r0 = 0
1487+
9: (95) exit
1488+
Unreleased reference id=1, alloc_insn=7
1489+
1490+
Program that performs a socket lookup but does not NULL-check the returned
1491+
value:
1492+
BPF_MOV64_IMM(BPF_REG_2, 0),
1493+
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
1494+
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
1495+
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
1496+
BPF_MOV64_IMM(BPF_REG_3, 4),
1497+
BPF_MOV64_IMM(BPF_REG_4, 0),
1498+
BPF_MOV64_IMM(BPF_REG_5, 0),
1499+
BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
1500+
BPF_EXIT_INSN(),
1501+
Error:
1502+
0: (b7) r2 = 0
1503+
1: (63) *(u32 *)(r10 -8) = r2
1504+
2: (bf) r2 = r10
1505+
3: (07) r2 += -8
1506+
4: (b7) r3 = 4
1507+
5: (b7) r4 = 0
1508+
6: (b7) r5 = 0
1509+
7: (85) call bpf_sk_lookup_tcp#65
1510+
8: (95) exit
1511+
Unreleased reference id=1, alloc_insn=7
1512+
14471513
Testing
14481514
-------
14491515

drivers/net/ethernet/netronome/nfp/bpf/cmsg.c

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,32 @@ nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size)
8989
return skb;
9090
}
9191

92+
static unsigned int
93+
nfp_bpf_cmsg_map_req_size(struct nfp_app_bpf *bpf, unsigned int n)
94+
{
95+
unsigned int size;
96+
97+
size = sizeof(struct cmsg_req_map_op);
98+
size += (bpf->cmsg_key_sz + bpf->cmsg_val_sz) * n;
99+
100+
return size;
101+
}
102+
92103
static struct sk_buff *
93104
nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
105+
{
106+
return nfp_bpf_cmsg_alloc(bpf, nfp_bpf_cmsg_map_req_size(bpf, n));
107+
}
108+
109+
static unsigned int
110+
nfp_bpf_cmsg_map_reply_size(struct nfp_app_bpf *bpf, unsigned int n)
94111
{
95112
unsigned int size;
96113

97-
size = sizeof(struct cmsg_req_map_op);
98-
size += sizeof(struct cmsg_key_value_pair) * n;
114+
size = sizeof(struct cmsg_reply_map_op);
115+
size += (bpf->cmsg_key_sz + bpf->cmsg_val_sz) * n;
99116

100-
return nfp_bpf_cmsg_alloc(bpf, size);
117+
return size;
101118
}
102119

103120
static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
@@ -338,6 +355,34 @@ void nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map)
338355
dev_consume_skb_any(skb);
339356
}
340357

358+
static void *
359+
nfp_bpf_ctrl_req_key(struct nfp_app_bpf *bpf, struct cmsg_req_map_op *req,
360+
unsigned int n)
361+
{
362+
return &req->data[bpf->cmsg_key_sz * n + bpf->cmsg_val_sz * n];
363+
}
364+
365+
static void *
366+
nfp_bpf_ctrl_req_val(struct nfp_app_bpf *bpf, struct cmsg_req_map_op *req,
367+
unsigned int n)
368+
{
369+
return &req->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
370+
}
371+
372+
static void *
373+
nfp_bpf_ctrl_reply_key(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
374+
unsigned int n)
375+
{
376+
return &reply->data[bpf->cmsg_key_sz * n + bpf->cmsg_val_sz * n];
377+
}
378+
379+
static void *
380+
nfp_bpf_ctrl_reply_val(struct nfp_app_bpf *bpf, struct cmsg_reply_map_op *reply,
381+
unsigned int n)
382+
{
383+
return &reply->data[bpf->cmsg_key_sz * (n + 1) + bpf->cmsg_val_sz * n];
384+
}
385+
341386
static int
342387
nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
343388
enum nfp_bpf_cmsg_type op,
@@ -366,12 +411,13 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
366411

367412
/* Copy inputs */
368413
if (key)
369-
memcpy(&req->elem[0].key, key, map->key_size);
414+
memcpy(nfp_bpf_ctrl_req_key(bpf, req, 0), key, map->key_size);
370415
if (value)
371-
memcpy(&req->elem[0].value, value, map->value_size);
416+
memcpy(nfp_bpf_ctrl_req_val(bpf, req, 0), value,
417+
map->value_size);
372418

373419
skb = nfp_bpf_cmsg_communicate(bpf, skb, op,
374-
sizeof(*reply) + sizeof(*reply->elem));
420+
nfp_bpf_cmsg_map_reply_size(bpf, 1));
375421
if (IS_ERR(skb))
376422
return PTR_ERR(skb);
377423

@@ -382,9 +428,11 @@ nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
382428

383429
/* Copy outputs */
384430
if (out_key)
385-
memcpy(out_key, &reply->elem[0].key, map->key_size);
431+
memcpy(out_key, nfp_bpf_ctrl_reply_key(bpf, reply, 0),
432+
map->key_size);
386433
if (out_value)
387-
memcpy(out_value, &reply->elem[0].value, map->value_size);
434+
memcpy(out_value, nfp_bpf_ctrl_reply_val(bpf, reply, 0),
435+
map->value_size);
388436

389437
dev_consume_skb_any(skb);
390438

@@ -428,6 +476,13 @@ int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
428476
key, NULL, 0, next_key, NULL);
429477
}
430478

479+
unsigned int nfp_bpf_ctrl_cmsg_mtu(struct nfp_app_bpf *bpf)
480+
{
481+
return max3((unsigned int)NFP_NET_DEFAULT_MTU,
482+
nfp_bpf_cmsg_map_req_size(bpf, 1),
483+
nfp_bpf_cmsg_map_reply_size(bpf, 1));
484+
}
485+
431486
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
432487
{
433488
struct nfp_app_bpf *bpf = app->priv;

drivers/net/ethernet/netronome/nfp/bpf/fw.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ enum bpf_cap_tlv_type {
5252
NFP_BPF_CAP_TYPE_RANDOM = 4,
5353
NFP_BPF_CAP_TYPE_QUEUE_SELECT = 5,
5454
NFP_BPF_CAP_TYPE_ADJUST_TAIL = 6,
55+
NFP_BPF_CAP_TYPE_ABI_VERSION = 7,
5556
};
5657

5758
struct nfp_bpf_cap_tlv_func {
@@ -98,6 +99,7 @@ enum nfp_bpf_cmsg_type {
9899
#define CMSG_TYPE_MAP_REPLY_BIT 7
99100
#define __CMSG_REPLY(req) (BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req))
100101

102+
/* BPF ABIv2 fixed-length control message fields */
101103
#define CMSG_MAP_KEY_LW 16
102104
#define CMSG_MAP_VALUE_LW 16
103105

@@ -147,24 +149,19 @@ struct cmsg_reply_map_free_tbl {
147149
__be32 count;
148150
};
149151

150-
struct cmsg_key_value_pair {
151-
__be32 key[CMSG_MAP_KEY_LW];
152-
__be32 value[CMSG_MAP_VALUE_LW];
153-
};
154-
155152
struct cmsg_req_map_op {
156153
struct cmsg_hdr hdr;
157154
__be32 tid;
158155
__be32 count;
159156
__be32 flags;
160-
struct cmsg_key_value_pair elem[0];
157+
u8 data[0];
161158
};
162159

163160
struct cmsg_reply_map_op {
164161
struct cmsg_reply_map_simple reply_hdr;
165162
__be32 count;
166163
__be32 resv;
167-
struct cmsg_key_value_pair elem[0];
164+
u8 data[0];
168165
};
169166

170167
struct cmsg_bpf_event {

0 commit comments

Comments
 (0)