Skip to content

Commit fa24669

Browse files
jrfastabborkmann
authored andcommitted
bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT:
Add support for the BPF_F_INGRESS flag in skb redirect helper. To do this convert skb into a scatterlist and push into ingress queue. This is the same logic that is used in the sk_msg redirect helper so it should feel familiar. Signed-off-by: John Fastabend <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 2596f64 commit fa24669

File tree

3 files changed

+78
-19
lines changed

3 files changed

+78
-19
lines changed

include/linux/filter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ struct sk_msg_buff {
521521
__u32 key;
522522
__u32 flags;
523523
struct bpf_map *map;
524+
struct sk_buff *skb;
524525
struct list_head list;
525526
};
526527

kernel/bpf/sockmap.c

Lines changed: 76 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
785785
i++;
786786
if (i == MAX_SKB_FRAGS)
787787
i = 0;
788-
put_page(page);
788+
if (!md->skb)
789+
put_page(page);
789790
}
790791
if (copied == len)
791792
break;
@@ -794,6 +795,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
794795

795796
if (!sg->length && md->sg_start == md->sg_end) {
796797
list_del(&md->list);
798+
if (md->skb)
799+
consume_skb(md->skb);
797800
kfree(md);
798801
}
799802
}
@@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
10451048
__SK_DROP;
10461049
}
10471050

1051+
static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
1052+
{
1053+
struct sock *sk = psock->sock;
1054+
int copied = 0, num_sg;
1055+
struct sk_msg_buff *r;
1056+
1057+
r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
1058+
if (unlikely(!r))
1059+
return -EAGAIN;
1060+
1061+
if (!sk_rmem_schedule(sk, skb, skb->len)) {
1062+
kfree(r);
1063+
return -EAGAIN;
1064+
}
1065+
1066+
sg_init_table(r->sg_data, MAX_SKB_FRAGS);
1067+
num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
1068+
if (unlikely(num_sg < 0)) {
1069+
kfree(r);
1070+
return num_sg;
1071+
}
1072+
sk_mem_charge(sk, skb->len);
1073+
copied = skb->len;
1074+
r->sg_start = 0;
1075+
r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
1076+
r->skb = skb;
1077+
list_add_tail(&r->list, &psock->ingress);
1078+
sk->sk_data_ready(sk);
1079+
return copied;
1080+
}
1081+
10481082
static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
10491083
{
1084+
struct smap_psock *peer;
10501085
struct sock *sk;
1086+
__u32 in;
10511087
int rc;
10521088

10531089
rc = smap_verdict_func(psock, skb);
10541090
switch (rc) {
10551091
case __SK_REDIRECT:
10561092
sk = do_sk_redirect_map(skb);
1057-
if (likely(sk)) {
1058-
struct smap_psock *peer = smap_psock_sk(sk);
1059-
1060-
if (likely(peer &&
1061-
test_bit(SMAP_TX_RUNNING, &peer->state) &&
1062-
!sock_flag(sk, SOCK_DEAD) &&
1063-
sock_writeable(sk))) {
1064-
skb_set_owner_w(skb, sk);
1065-
skb_queue_tail(&peer->rxqueue, skb);
1066-
schedule_work(&peer->tx_work);
1067-
break;
1068-
}
1093+
if (!sk) {
1094+
kfree_skb(skb);
1095+
break;
1096+
}
1097+
1098+
peer = smap_psock_sk(sk);
1099+
in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
1100+
1101+
if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
1102+
!test_bit(SMAP_TX_RUNNING, &peer->state))) {
1103+
kfree_skb(skb);
1104+
break;
1105+
}
1106+
1107+
if (!in && sock_writeable(sk)) {
1108+
skb_set_owner_w(skb, sk);
1109+
skb_queue_tail(&peer->rxqueue, skb);
1110+
schedule_work(&peer->tx_work);
1111+
break;
1112+
} else if (in &&
1113+
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
1114+
skb_queue_tail(&peer->rxqueue, skb);
1115+
schedule_work(&peer->tx_work);
1116+
break;
10691117
}
10701118
/* Fall through and free skb otherwise */
10711119
case __SK_DROP:
@@ -1127,15 +1175,23 @@ static void smap_tx_work(struct work_struct *w)
11271175
}
11281176

11291177
while ((skb = skb_dequeue(&psock->rxqueue))) {
1178+
__u32 flags;
1179+
11301180
rem = skb->len;
11311181
off = 0;
11321182
start:
1183+
flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
11331184
do {
1134-
if (likely(psock->sock->sk_socket))
1135-
n = skb_send_sock_locked(psock->sock,
1136-
skb, off, rem);
1137-
else
1185+
if (likely(psock->sock->sk_socket)) {
1186+
if (flags)
1187+
n = smap_do_ingress(psock, skb);
1188+
else
1189+
n = skb_send_sock_locked(psock->sock,
1190+
skb, off, rem);
1191+
} else {
11381192
n = -EINVAL;
1193+
}
1194+
11391195
if (n <= 0) {
11401196
if (n == -EAGAIN) {
11411197
/* Retry when space is available */
@@ -1153,7 +1209,9 @@ static void smap_tx_work(struct work_struct *w)
11531209
rem -= n;
11541210
off += n;
11551211
} while (rem);
1156-
kfree_skb(skb);
1212+
1213+
if (!flags)
1214+
kfree_skb(skb);
11571215
}
11581216
out:
11591217
release_sock(psock->sock);

net/core/filter.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1855,7 +1855,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
18551855
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
18561856

18571857
/* If user passes invalid input drop the packet. */
1858-
if (unlikely(flags))
1858+
if (unlikely(flags & ~(BPF_F_INGRESS)))
18591859
return SK_DROP;
18601860

18611861
tcb->bpf.key = key;

0 commit comments

Comments
 (0)