Skip to content

Commit 173d3ad

Browse files
Björn Töpelborkmann
authored andcommitted
xsk: add zero-copy support for Rx
Extend the xsk_rcv to support the new MEM_TYPE_ZERO_COPY memory, and wireup ndo_bpf call in bind. Signed-off-by: Björn Töpel <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 02b55e5 commit 173d3ad

File tree

5 files changed

+165
-21
lines changed

5 files changed

+165
-21
lines changed

include/net/xdp_sock.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ struct xdp_umem_props {
2222

2323
struct xdp_umem_page {
2424
void *addr;
25+
dma_addr_t dma;
2526
};
2627

2728
struct xdp_umem {
@@ -38,6 +39,9 @@ struct xdp_umem {
3839
struct work_struct work;
3940
struct page **pgs;
4041
u32 npgs;
42+
struct net_device *dev;
43+
u16 queue_id;
44+
bool zc;
4145
};
4246

4347
struct xdp_sock {
@@ -60,6 +64,8 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
6064
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
6165
void xsk_flush(struct xdp_sock *xs);
6266
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
67+
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
68+
void xsk_umem_discard_addr(struct xdp_umem *umem);
6369
#else
6470
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
6571
{

include/uapi/linux/if_xdp.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
#include <linux/types.h>
1414

1515
/* Options for the sxdp_flags field */
16-
#define XDP_SHARED_UMEM 1
16+
#define XDP_SHARED_UMEM (1 << 0)
17+
#define XDP_COPY (1 << 1) /* Force copy-mode */
18+
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
1719

1820
struct sockaddr_xdp {
1921
__u16 sxdp_family;

net/xdp/xdp_umem.c

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,81 @@
1717

1818
#define XDP_UMEM_MIN_CHUNK_SIZE 2048
1919

20+
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
21+
u32 queue_id, u16 flags)
22+
{
23+
bool force_zc, force_copy;
24+
struct netdev_bpf bpf;
25+
int err;
26+
27+
force_zc = flags & XDP_ZEROCOPY;
28+
force_copy = flags & XDP_COPY;
29+
30+
if (force_zc && force_copy)
31+
return -EINVAL;
32+
33+
if (force_copy)
34+
return 0;
35+
36+
dev_hold(dev);
37+
38+
if (dev->netdev_ops->ndo_bpf) {
39+
bpf.command = XDP_QUERY_XSK_UMEM;
40+
41+
rtnl_lock();
42+
err = dev->netdev_ops->ndo_bpf(dev, &bpf);
43+
rtnl_unlock();
44+
45+
if (err) {
46+
dev_put(dev);
47+
return force_zc ? -ENOTSUPP : 0;
48+
}
49+
50+
bpf.command = XDP_SETUP_XSK_UMEM;
51+
bpf.xsk.umem = umem;
52+
bpf.xsk.queue_id = queue_id;
53+
54+
rtnl_lock();
55+
err = dev->netdev_ops->ndo_bpf(dev, &bpf);
56+
rtnl_unlock();
57+
58+
if (err) {
59+
dev_put(dev);
60+
return force_zc ? err : 0; /* fail or fallback */
61+
}
62+
63+
umem->dev = dev;
64+
umem->queue_id = queue_id;
65+
umem->zc = true;
66+
return 0;
67+
}
68+
69+
dev_put(dev);
70+
return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
71+
}
72+
73+
void xdp_umem_clear_dev(struct xdp_umem *umem)
74+
{
75+
struct netdev_bpf bpf;
76+
int err;
77+
78+
if (umem->dev) {
79+
bpf.command = XDP_SETUP_XSK_UMEM;
80+
bpf.xsk.umem = NULL;
81+
bpf.xsk.queue_id = umem->queue_id;
82+
83+
rtnl_lock();
84+
err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
85+
rtnl_unlock();
86+
87+
if (err)
88+
WARN(1, "failed to disable umem!\n");
89+
90+
dev_put(umem->dev);
91+
umem->dev = NULL;
92+
}
93+
}
94+
2095
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
2196
{
2297
unsigned int i;
@@ -43,6 +118,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
43118
struct task_struct *task;
44119
struct mm_struct *mm;
45120

121+
xdp_umem_clear_dev(umem);
122+
46123
if (umem->fq) {
47124
xskq_destroy(umem->fq);
48125
umem->fq = NULL;

net/xdp/xdp_umem.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
1313
return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
1414
}
1515

16+
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
17+
u32 queue_id, u16 flags);
18+
void xdp_umem_clear_dev(struct xdp_umem *umem);
1619
bool xdp_umem_validate_queues(struct xdp_umem *umem);
1720
void xdp_get_umem(struct xdp_umem *umem);
1821
void xdp_put_umem(struct xdp_umem *umem);

net/xdp/xsk.c

Lines changed: 76 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,28 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
3636

3737
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
3838
{
39-
return !!xs->rx;
39+
return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
40+
READ_ONCE(xs->umem->fq);
4041
}
4142

42-
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
43+
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
44+
{
45+
return xskq_peek_addr(umem->fq, addr);
46+
}
47+
EXPORT_SYMBOL(xsk_umem_peek_addr);
48+
49+
void xsk_umem_discard_addr(struct xdp_umem *umem)
50+
{
51+
xskq_discard_addr(umem->fq);
52+
}
53+
EXPORT_SYMBOL(xsk_umem_discard_addr);
54+
55+
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
4356
{
44-
u32 len = xdp->data_end - xdp->data;
4557
void *buffer;
4658
u64 addr;
4759
int err;
4860

49-
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
50-
return -EINVAL;
51-
5261
if (!xskq_peek_addr(xs->umem->fq, &addr) ||
5362
len > xs->umem->chunk_size_nohr) {
5463
xs->rx_dropped++;
@@ -60,25 +69,41 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
6069
buffer = xdp_umem_get_data(xs->umem, addr);
6170
memcpy(buffer, xdp->data, len);
6271
err = xskq_produce_batch_desc(xs->rx, addr, len);
63-
if (!err)
72+
if (!err) {
6473
xskq_discard_addr(xs->umem->fq);
65-
else
66-
xs->rx_dropped++;
74+
xdp_return_buff(xdp);
75+
return 0;
76+
}
6777

78+
xs->rx_dropped++;
6879
return err;
6980
}
7081

71-
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
82+
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
7283
{
73-
int err;
84+
int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
7485

75-
err = __xsk_rcv(xs, xdp);
76-
if (likely(!err))
86+
if (err) {
7787
xdp_return_buff(xdp);
88+
xs->rx_dropped++;
89+
}
7890

7991
return err;
8092
}
8193

94+
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
95+
{
96+
u32 len;
97+
98+
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
99+
return -EINVAL;
100+
101+
len = xdp->data_end - xdp->data;
102+
103+
return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
104+
__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
105+
}
106+
82107
void xsk_flush(struct xdp_sock *xs)
83108
{
84109
xskq_produce_flush_desc(xs->rx);
@@ -87,12 +112,29 @@ void xsk_flush(struct xdp_sock *xs)
87112

88113
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
89114
{
115+
u32 len = xdp->data_end - xdp->data;
116+
void *buffer;
117+
u64 addr;
90118
int err;
91119

92-
err = __xsk_rcv(xs, xdp);
93-
if (!err)
120+
if (!xskq_peek_addr(xs->umem->fq, &addr) ||
121+
len > xs->umem->chunk_size_nohr) {
122+
xs->rx_dropped++;
123+
return -ENOSPC;
124+
}
125+
126+
addr += xs->umem->headroom;
127+
128+
buffer = xdp_umem_get_data(xs->umem, addr);
129+
memcpy(buffer, xdp->data, len);
130+
err = xskq_produce_batch_desc(xs->rx, addr, len);
131+
if (!err) {
132+
xskq_discard_addr(xs->umem->fq);
94133
xsk_flush(xs);
134+
return 0;
135+
}
95136

137+
xs->rx_dropped++;
96138
return err;
97139
}
98140

@@ -291,6 +333,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
291333
struct sock *sk = sock->sk;
292334
struct xdp_sock *xs = xdp_sk(sk);
293335
struct net_device *dev;
336+
u32 flags, qid;
294337
int err = 0;
295338

296339
if (addr_len < sizeof(struct sockaddr_xdp))
@@ -315,16 +358,26 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
315358
goto out_unlock;
316359
}
317360

318-
if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
319-
(xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
361+
qid = sxdp->sxdp_queue_id;
362+
363+
if ((xs->rx && qid >= dev->real_num_rx_queues) ||
364+
(xs->tx && qid >= dev->real_num_tx_queues)) {
320365
err = -EINVAL;
321366
goto out_unlock;
322367
}
323368

324-
if (sxdp->sxdp_flags & XDP_SHARED_UMEM) {
369+
flags = sxdp->sxdp_flags;
370+
371+
if (flags & XDP_SHARED_UMEM) {
325372
struct xdp_sock *umem_xs;
326373
struct socket *sock;
327374

375+
if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
376+
/* Cannot specify flags for shared sockets. */
377+
err = -EINVAL;
378+
goto out_unlock;
379+
}
380+
328381
if (xs->umem) {
329382
/* We have already our own. */
330383
err = -EINVAL;
@@ -343,8 +396,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
343396
err = -EBADF;
344397
sockfd_put(sock);
345398
goto out_unlock;
346-
} else if (umem_xs->dev != dev ||
347-
umem_xs->queue_id != sxdp->sxdp_queue_id) {
399+
} else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
348400
err = -EINVAL;
349401
sockfd_put(sock);
350402
goto out_unlock;
@@ -360,6 +412,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
360412
/* This xsk has its own umem. */
361413
xskq_set_umem(xs->umem->fq, &xs->umem->props);
362414
xskq_set_umem(xs->umem->cq, &xs->umem->props);
415+
416+
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
417+
if (err)
418+
goto out_unlock;
363419
}
364420

365421
xs->dev = dev;

0 commit comments

Comments
 (0)