Skip to content

Commit d691f9e

Browse files
Alexei Starovoitovdavem330
authored andcommitted
bpf: allow programs to write to certain skb fields
allow programs read/write skb->mark, tc_index fields and ((struct qdisc_skb_cb *)cb)->data. mark and tc_index are generically useful in TC. cb[0]-cb[4] are primarily used to pass arguments from one program to another called via bpf_tail_call() which can be seen in sockex3_kern.c example. All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs. mark, tc_index are writeable from tc_cls_act only. cb[0]-cb[4] are writeable by both sockets and tc_cls_act. Add verifier tests and improve sample code. Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 3431205 commit d691f9e

File tree

6 files changed

+207
-48
lines changed

6 files changed

+207
-48
lines changed

include/linux/bpf.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ struct bpf_verifier_ops {
105105
*/
106106
bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
107107

108-
u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
108+
u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
109+
int src_reg, int ctx_off,
109110
struct bpf_insn *insn);
110111
};
111112

include/uapi/linux/bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@ struct __sk_buff {
248248
__u32 priority;
249249
__u32 ingress_ifindex;
250250
__u32 ifindex;
251+
__u32 tc_index;
252+
__u32 cb[5];
251253
};
252254

253255
#endif /* _UAPI__LINUX_BPF_H__ */

kernel/bpf/verifier.c

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,6 +1692,8 @@ static int do_check(struct verifier_env *env)
16921692
}
16931693

16941694
} else if (class == BPF_STX) {
1695+
enum bpf_reg_type dst_reg_type;
1696+
16951697
if (BPF_MODE(insn->code) == BPF_XADD) {
16961698
err = check_xadd(env, insn);
16971699
if (err)
@@ -1700,11 +1702,6 @@ static int do_check(struct verifier_env *env)
17001702
continue;
17011703
}
17021704

1703-
if (BPF_MODE(insn->code) != BPF_MEM ||
1704-
insn->imm != 0) {
1705-
verbose("BPF_STX uses reserved fields\n");
1706-
return -EINVAL;
1707-
}
17081705
/* check src1 operand */
17091706
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
17101707
if (err)
@@ -1714,13 +1711,24 @@ static int do_check(struct verifier_env *env)
17141711
if (err)
17151712
return err;
17161713

1714+
dst_reg_type = regs[insn->dst_reg].type;
1715+
17171716
/* check that memory (dst_reg + off) is writeable */
17181717
err = check_mem_access(env, insn->dst_reg, insn->off,
17191718
BPF_SIZE(insn->code), BPF_WRITE,
17201719
insn->src_reg);
17211720
if (err)
17221721
return err;
17231722

1723+
if (insn->imm == 0) {
1724+
insn->imm = dst_reg_type;
1725+
} else if (dst_reg_type != insn->imm &&
1726+
(dst_reg_type == PTR_TO_CTX ||
1727+
insn->imm == PTR_TO_CTX)) {
1728+
verbose("same insn cannot be used with different pointers\n");
1729+
return -EINVAL;
1730+
}
1731+
17241732
} else if (class == BPF_ST) {
17251733
if (BPF_MODE(insn->code) != BPF_MEM ||
17261734
insn->src_reg != BPF_REG_0) {
@@ -1839,12 +1847,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
18391847

18401848
for (i = 0; i < insn_cnt; i++, insn++) {
18411849
if (BPF_CLASS(insn->code) == BPF_LDX &&
1842-
(BPF_MODE(insn->code) != BPF_MEM ||
1843-
insn->imm != 0)) {
1850+
(BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
18441851
verbose("BPF_LDX uses reserved fields\n");
18451852
return -EINVAL;
18461853
}
18471854

1855+
if (BPF_CLASS(insn->code) == BPF_STX &&
1856+
((BPF_MODE(insn->code) != BPF_MEM &&
1857+
BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
1858+
verbose("BPF_STX uses reserved fields\n");
1859+
return -EINVAL;
1860+
}
1861+
18481862
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
18491863
struct bpf_map *map;
18501864
struct fd f;
@@ -1967,12 +1981,17 @@ static int convert_ctx_accesses(struct verifier_env *env)
19671981
struct bpf_prog *new_prog;
19681982
u32 cnt;
19691983
int i;
1984+
enum bpf_access_type type;
19701985

19711986
if (!env->prog->aux->ops->convert_ctx_access)
19721987
return 0;
19731988

19741989
for (i = 0; i < insn_cnt; i++, insn++) {
1975-
if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
1990+
if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
1991+
type = BPF_READ;
1992+
else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
1993+
type = BPF_WRITE;
1994+
else
19761995
continue;
19771996

19781997
if (insn->imm != PTR_TO_CTX) {
@@ -1982,7 +2001,7 @@ static int convert_ctx_accesses(struct verifier_env *env)
19822001
}
19832002

19842003
cnt = env->prog->aux->ops->
1985-
convert_ctx_access(insn->dst_reg, insn->src_reg,
2004+
convert_ctx_access(type, insn->dst_reg, insn->src_reg,
19862005
insn->off, insn_buf);
19872006
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
19882007
verbose("bpf verifier is misconfigured\n");

net/core/filter.c

Lines changed: 82 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include <linux/seccomp.h>
4747
#include <linux/if_vlan.h>
4848
#include <linux/bpf.h>
49+
#include <net/sch_generic.h>
4950

5051
/**
5152
* sk_filter - run a packet through a socket filter
@@ -1463,13 +1464,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
14631464
}
14641465
}
14651466

1466-
static bool sk_filter_is_valid_access(int off, int size,
1467-
enum bpf_access_type type)
1467+
static bool __is_valid_access(int off, int size, enum bpf_access_type type)
14681468
{
1469-
/* only read is allowed */
1470-
if (type != BPF_READ)
1471-
return false;
1472-
14731469
/* check bounds */
14741470
if (off < 0 || off >= sizeof(struct __sk_buff))
14751471
return false;
@@ -1485,8 +1481,42 @@ static bool sk_filter_is_valid_access(int off, int size,
14851481
return true;
14861482
}
14871483

1488-
static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
1489-
struct bpf_insn *insn_buf)
1484+
static bool sk_filter_is_valid_access(int off, int size,
1485+
enum bpf_access_type type)
1486+
{
1487+
if (type == BPF_WRITE) {
1488+
switch (off) {
1489+
case offsetof(struct __sk_buff, cb[0]) ...
1490+
offsetof(struct __sk_buff, cb[4]):
1491+
break;
1492+
default:
1493+
return false;
1494+
}
1495+
}
1496+
1497+
return __is_valid_access(off, size, type);
1498+
}
1499+
1500+
static bool tc_cls_act_is_valid_access(int off, int size,
1501+
enum bpf_access_type type)
1502+
{
1503+
if (type == BPF_WRITE) {
1504+
switch (off) {
1505+
case offsetof(struct __sk_buff, mark):
1506+
case offsetof(struct __sk_buff, tc_index):
1507+
case offsetof(struct __sk_buff, cb[0]) ...
1508+
offsetof(struct __sk_buff, cb[4]):
1509+
break;
1510+
default:
1511+
return false;
1512+
}
1513+
}
1514+
return __is_valid_access(off, size, type);
1515+
}
1516+
1517+
static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
1518+
int src_reg, int ctx_off,
1519+
struct bpf_insn *insn_buf)
14901520
{
14911521
struct bpf_insn *insn = insn_buf;
14921522

@@ -1538,7 +1568,15 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
15381568
break;
15391569

15401570
case offsetof(struct __sk_buff, mark):
1541-
return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
1571+
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
1572+
1573+
if (type == BPF_WRITE)
1574+
*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
1575+
offsetof(struct sk_buff, mark));
1576+
else
1577+
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
1578+
offsetof(struct sk_buff, mark));
1579+
break;
15421580

15431581
case offsetof(struct __sk_buff, pkt_type):
15441582
return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
@@ -1553,6 +1591,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
15531591
case offsetof(struct __sk_buff, vlan_tci):
15541592
return convert_skb_access(SKF_AD_VLAN_TAG,
15551593
dst_reg, src_reg, insn);
1594+
1595+
case offsetof(struct __sk_buff, cb[0]) ...
1596+
offsetof(struct __sk_buff, cb[4]):
1597+
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
1598+
1599+
ctx_off -= offsetof(struct __sk_buff, cb[0]);
1600+
ctx_off += offsetof(struct sk_buff, cb);
1601+
ctx_off += offsetof(struct qdisc_skb_cb, data);
1602+
if (type == BPF_WRITE)
1603+
*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
1604+
else
1605+
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
1606+
break;
1607+
1608+
case offsetof(struct __sk_buff, tc_index):
1609+
#ifdef CONFIG_NET_SCHED
1610+
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
1611+
1612+
if (type == BPF_WRITE)
1613+
*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
1614+
offsetof(struct sk_buff, tc_index));
1615+
else
1616+
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
1617+
offsetof(struct sk_buff, tc_index));
1618+
break;
1619+
#else
1620+
if (type == BPF_WRITE)
1621+
*insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
1622+
else
1623+
*insn++ = BPF_MOV64_IMM(dst_reg, 0);
1624+
break;
1625+
#endif
15561626
}
15571627

15581628
return insn - insn_buf;
@@ -1561,13 +1631,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
15611631
static const struct bpf_verifier_ops sk_filter_ops = {
15621632
.get_func_proto = sk_filter_func_proto,
15631633
.is_valid_access = sk_filter_is_valid_access,
1564-
.convert_ctx_access = sk_filter_convert_ctx_access,
1634+
.convert_ctx_access = bpf_net_convert_ctx_access,
15651635
};
15661636

15671637
static const struct bpf_verifier_ops tc_cls_act_ops = {
15681638
.get_func_proto = tc_cls_act_func_proto,
1569-
.is_valid_access = sk_filter_is_valid_access,
1570-
.convert_ctx_access = sk_filter_convert_ctx_access,
1639+
.is_valid_access = tc_cls_act_is_valid_access,
1640+
.convert_ctx_access = bpf_net_convert_ctx_access,
15711641
};
15721642

15731643
static struct bpf_prog_type_list sk_filter_type __read_mostly = {

samples/bpf/sockex3_kern.c

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
8989

9090
struct globals {
9191
struct flow_keys flow;
92-
__u32 nhoff;
9392
};
9493

9594
struct bpf_map_def SEC("maps") percpu_map = {
@@ -139,7 +138,7 @@ static void update_stats(struct __sk_buff *skb, struct globals *g)
139138
static __always_inline void parse_ip_proto(struct __sk_buff *skb,
140139
struct globals *g, __u32 ip_proto)
141140
{
142-
__u32 nhoff = g->nhoff;
141+
__u32 nhoff = skb->cb[0];
143142
int poff;
144143

145144
switch (ip_proto) {
@@ -165,7 +164,7 @@ static __always_inline void parse_ip_proto(struct __sk_buff *skb,
165164
if (gre_flags & GRE_SEQ)
166165
nhoff += 4;
167166

168-
g->nhoff = nhoff;
167+
skb->cb[0] = nhoff;
169168
parse_eth_proto(skb, gre_proto);
170169
break;
171170
}
@@ -195,7 +194,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
195194
if (!g)
196195
return 0;
197196

198-
nhoff = g->nhoff;
197+
nhoff = skb->cb[0];
199198

200199
if (unlikely(ip_is_fragment(skb, nhoff)))
201200
return 0;
@@ -210,7 +209,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
210209
verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
211210
nhoff += (verlen & 0xF) << 2;
212211

213-
g->nhoff = nhoff;
212+
skb->cb[0] = nhoff;
214213
parse_ip_proto(skb, g, ip_proto);
215214
return 0;
216215
}
@@ -223,7 +222,7 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
223222
if (!g)
224223
return 0;
225224

226-
nhoff = g->nhoff;
225+
nhoff = skb->cb[0];
227226

228227
ip_proto = load_byte(skb,
229228
nhoff + offsetof(struct ipv6hdr, nexthdr));
@@ -233,25 +232,21 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
233232
nhoff + offsetof(struct ipv6hdr, daddr));
234233
nhoff += sizeof(struct ipv6hdr);
235234

236-
g->nhoff = nhoff;
235+
skb->cb[0] = nhoff;
237236
parse_ip_proto(skb, g, ip_proto);
238237
return 0;
239238
}
240239

241240
PROG(PARSE_VLAN)(struct __sk_buff *skb)
242241
{
243-
struct globals *g = this_cpu_globals();
244242
__u32 nhoff, proto;
245243

246-
if (!g)
247-
return 0;
248-
249-
nhoff = g->nhoff;
244+
nhoff = skb->cb[0];
250245

251246
proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
252247
h_vlan_encapsulated_proto));
253248
nhoff += sizeof(struct vlan_hdr);
254-
g->nhoff = nhoff;
249+
skb->cb[0] = nhoff;
255250

256251
parse_eth_proto(skb, proto);
257252

@@ -260,17 +255,13 @@ PROG(PARSE_VLAN)(struct __sk_buff *skb)
260255

261256
PROG(PARSE_MPLS)(struct __sk_buff *skb)
262257
{
263-
struct globals *g = this_cpu_globals();
264258
__u32 nhoff, label;
265259

266-
if (!g)
267-
return 0;
268-
269-
nhoff = g->nhoff;
260+
nhoff = skb->cb[0];
270261

271262
label = load_word(skb, nhoff);
272263
nhoff += sizeof(struct mpls_label);
273-
g->nhoff = nhoff;
264+
skb->cb[0] = nhoff;
274265

275266
if (label & MPLS_LS_S_MASK) {
276267
__u8 verlen = load_byte(skb, nhoff);
@@ -288,14 +279,10 @@ PROG(PARSE_MPLS)(struct __sk_buff *skb)
288279
SEC("socket/0")
289280
int main_prog(struct __sk_buff *skb)
290281
{
291-
struct globals *g = this_cpu_globals();
292282
__u32 nhoff = ETH_HLEN;
293283
__u32 proto = load_half(skb, 12);
294284

295-
if (!g)
296-
return 0;
297-
298-
g->nhoff = nhoff;
285+
skb->cb[0] = nhoff;
299286
parse_eth_proto(skb, proto);
300287
return 0;
301288
}

0 commit comments

Comments
 (0)