Skip to content

Commit 3431205

Browse files
Alexei Starovoitovdavem330
authored andcommitted
bpf: make programs see skb->data == L2 for ingress and egress
eBPF programs attached to ingress and egress qdiscs see inconsistent skb->data. For ingress L2 header is already pulled, whereas for egress it's present. This is known to program writers which are currently forced to use BPF_LL_OFF workaround. Since programs don't change skb internal pointers it is safe to do pull/push right around invocation of the program and earlier taps and later pt->func() will not be affected. Multiple taps via packet_rcv(), tpacket_rcv() are doing the same trick around run_filter/BPF_PROG_RUN even if skb_shared. This fix finally allows programs to use optimized LD_ABS/IND instructions without BPF_LL_OFF for higher performance. tc ingress + cls_bpf + samples/bpf/tcbpf1_kern.o w/o JIT w/JIT before 20.5 23.6 Mpps after 21.8 26.6 Mpps Old programs with BPF_LL_OFF will still work as-is. We can now undo most of the earlier workaround commit: a166151 ("bpf: fix bpf helpers to use skb->mac_header relative offsets") Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Jamal Hadi Salim <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 98da81a commit 3431205

File tree

4 files changed

+30
-29
lines changed

4 files changed

+30
-29
lines changed

net/core/filter.c

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,21 +1238,6 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
12381238
return 0;
12391239
}
12401240

1241-
/**
1242-
* bpf_skb_clone_not_writable - is the header of a clone not writable
1243-
* @skb: buffer to check
1244-
* @len: length up to which to write, can be negative
1245-
*
1246-
* Returns true if modifying the header part of the cloned buffer
1247-
* does require the data to be copied. I.e. this version works with
1248-
* negative lengths needed for eBPF case!
1249-
*/
1250-
static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
1251-
{
1252-
return skb_header_cloned(skb) ||
1253-
(int) skb_headroom(skb) + len > skb->hdr_len;
1254-
}
1255-
12561241
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
12571242

12581243
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
@@ -1275,9 +1260,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
12751260
if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
12761261
return -EFAULT;
12771262

1278-
offset -= skb->data - skb_mac_header(skb);
12791263
if (unlikely(skb_cloned(skb) &&
1280-
bpf_skb_clone_unwritable(skb, offset + len)))
1264+
!skb_clone_writable(skb, offset + len)))
12811265
return -EFAULT;
12821266

12831267
ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1321,9 +1305,8 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
13211305
if (unlikely((u32) offset > 0xffff))
13221306
return -EFAULT;
13231307

1324-
offset -= skb->data - skb_mac_header(skb);
13251308
if (unlikely(skb_cloned(skb) &&
1326-
bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
1309+
!skb_clone_writable(skb, offset + sizeof(sum))))
13271310
return -EFAULT;
13281311

13291312
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1369,9 +1352,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
13691352
if (unlikely((u32) offset > 0xffff))
13701353
return -EFAULT;
13711354

1372-
offset -= skb->data - skb_mac_header(skb);
13731355
if (unlikely(skb_cloned(skb) &&
1374-
bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
1356+
!skb_clone_writable(skb, offset + sizeof(sum))))
13751357
return -EFAULT;
13761358

13771359
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1425,8 +1407,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
14251407
if (unlikely(!skb2))
14261408
return -ENOMEM;
14271409

1428-
skb_push(skb2, skb2->data - skb_mac_header(skb2));
1429-
14301410
if (BPF_IS_REDIRECT_INGRESS(flags))
14311411
return dev_forward_skb(dev, skb2);
14321412

net/sched/act_bpf.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
3737
{
3838
struct tcf_bpf *prog = act->priv;
3939
int action, filter_res;
40+
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
4041

4142
if (unlikely(!skb_mac_header_was_set(skb)))
4243
return TC_ACT_UNSPEC;
@@ -48,7 +49,13 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
4849

4950
/* Needed here for accessing maps. */
5051
rcu_read_lock();
51-
filter_res = BPF_PROG_RUN(prog->filter, skb);
52+
if (at_ingress) {
53+
__skb_push(skb, skb->mac_len);
54+
filter_res = BPF_PROG_RUN(prog->filter, skb);
55+
__skb_pull(skb, skb->mac_len);
56+
} else {
57+
filter_res = BPF_PROG_RUN(prog->filter, skb);
58+
}
5259
rcu_read_unlock();
5360

5461
/* A BPF program may overwrite the default action opcode.

net/sched/cls_bpf.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
6464
{
6565
struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
6666
struct cls_bpf_prog *prog;
67+
#ifdef CONFIG_NET_CLS_ACT
68+
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
69+
#else
70+
bool at_ingress = false;
71+
#endif
6772
int ret = -1;
6873

6974
if (unlikely(!skb_mac_header_was_set(skb)))
@@ -72,7 +77,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
7277
/* Needed here for accessing maps. */
7378
rcu_read_lock();
7479
list_for_each_entry_rcu(prog, &head->plist, link) {
75-
int filter_res = BPF_PROG_RUN(prog->filter, skb);
80+
int filter_res;
81+
82+
if (at_ingress) {
83+
/* It is safe to push/pull even if skb_shared() */
84+
__skb_push(skb, skb->mac_len);
85+
filter_res = BPF_PROG_RUN(prog->filter, skb);
86+
__skb_pull(skb, skb->mac_len);
87+
} else {
88+
filter_res = BPF_PROG_RUN(prog->filter, skb);
89+
}
7690

7791
if (filter_res == 0)
7892
continue;

samples/bpf/tcbpf1_kern.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
2121

2222
static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
2323
{
24-
__u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
24+
__u8 old_tos = load_byte(skb, TOS_OFF);
2525

2626
bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
2727
bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
@@ -34,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
3434

3535
static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
3636
{
37-
__u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
37+
__u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
3838

3939
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
4040
bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
@@ -44,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
4444
#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
4545
static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
4646
{
47-
__u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
47+
__u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
4848

4949
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
5050
bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
@@ -53,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
5353
SEC("classifier")
5454
int bpf_prog1(struct __sk_buff *skb)
5555
{
56-
__u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
56+
__u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
5757
long *value;
5858

5959
if (proto == IPPROTO_TCP) {

0 commit comments

Comments
 (0)