Skip to content

Commit ac98d8a

Browse files
magnus-karlssonborkmann
authored andcommitted
xsk: wire upp Tx zero-copy functions
Here we add the functionality required to support zero-copy Tx, and also exposes various zero-copy related functions for the netdevs. Signed-off-by: Magnus Karlsson <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent e3760c7 commit ac98d8a

File tree

5 files changed

+137
-11
lines changed

5 files changed

+137
-11
lines changed

include/net/xdp_sock.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/workqueue.h>
1010
#include <linux/if_xdp.h>
1111
#include <linux/mutex.h>
12+
#include <linux/spinlock.h>
1213
#include <linux/mm.h>
1314
#include <net/sock.h>
1415

@@ -42,6 +43,8 @@ struct xdp_umem {
4243
struct net_device *dev;
4344
u16 queue_id;
4445
bool zc;
46+
spinlock_t xsk_list_lock;
47+
struct list_head xsk_list;
4548
};
4649

4750
struct xdp_sock {
@@ -53,6 +56,8 @@ struct xdp_sock {
5356
struct list_head flush_node;
5457
u16 queue_id;
5558
struct xsk_queue *tx ____cacheline_aligned_in_smp;
59+
struct list_head list;
60+
bool zc;
5661
/* Protects multiple processes in the control path */
5762
struct mutex mutex;
5863
u64 rx_dropped;
@@ -64,8 +69,12 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
6469
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
6570
void xsk_flush(struct xdp_sock *xs);
6671
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
72+
/* Used from netdev driver */
6773
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
6874
void xsk_umem_discard_addr(struct xdp_umem *umem);
75+
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
76+
bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
77+
void xsk_umem_consume_tx_done(struct xdp_umem *umem);
6978
#else
7079
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
7180
{

net/xdp/xdp_umem.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,29 @@
1717

1818
#define XDP_UMEM_MIN_CHUNK_SIZE 2048
1919

20+
void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
21+
{
22+
unsigned long flags;
23+
24+
spin_lock_irqsave(&umem->xsk_list_lock, flags);
25+
list_add_rcu(&xs->list, &umem->xsk_list);
26+
spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
27+
}
28+
29+
void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
30+
{
31+
unsigned long flags;
32+
33+
if (xs->dev) {
34+
spin_lock_irqsave(&umem->xsk_list_lock, flags);
35+
list_del_rcu(&xs->list);
36+
spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
37+
38+
if (umem->zc)
39+
synchronize_net();
40+
}
41+
}
42+
2043
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
2144
u32 queue_id, u16 flags)
2245
{
@@ -35,7 +58,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
3558

3659
dev_hold(dev);
3760

38-
if (dev->netdev_ops->ndo_bpf) {
61+
if (dev->netdev_ops->ndo_bpf && dev->netdev_ops->ndo_xsk_async_xmit) {
3962
bpf.command = XDP_QUERY_XSK_UMEM;
4063

4164
rtnl_lock();
@@ -70,7 +93,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
7093
return force_zc ? -ENOTSUPP : 0; /* fail or fallback */
7194
}
7295

73-
void xdp_umem_clear_dev(struct xdp_umem *umem)
96+
static void xdp_umem_clear_dev(struct xdp_umem *umem)
7497
{
7598
struct netdev_bpf bpf;
7699
int err;
@@ -283,6 +306,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
283306
umem->npgs = size / PAGE_SIZE;
284307
umem->pgs = NULL;
285308
umem->user = NULL;
309+
INIT_LIST_HEAD(&umem->xsk_list);
310+
spin_lock_init(&umem->xsk_list_lock);
286311

287312
refcount_set(&umem->users, 1);
288313

net/xdp/xdp_umem.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,18 @@ static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
1313
return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
1414
}
1515

16+
static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
17+
{
18+
return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
19+
}
20+
1621
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
1722
u32 queue_id, u16 flags);
18-
void xdp_umem_clear_dev(struct xdp_umem *umem);
1923
bool xdp_umem_validate_queues(struct xdp_umem *umem);
2024
void xdp_get_umem(struct xdp_umem *umem);
2125
void xdp_put_umem(struct xdp_umem *umem);
26+
void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
27+
void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs);
2228
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
2329

2430
#endif /* XDP_UMEM_H_ */

net/xdp/xsk.c

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <linux/uaccess.h>
2222
#include <linux/net.h>
2323
#include <linux/netdevice.h>
24+
#include <linux/rculist.h>
2425
#include <net/xdp_sock.h>
2526
#include <net/xdp.h>
2627

@@ -138,6 +139,59 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
138139
return err;
139140
}
140141

142+
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
143+
{
144+
xskq_produce_flush_addr_n(umem->cq, nb_entries);
145+
}
146+
EXPORT_SYMBOL(xsk_umem_complete_tx);
147+
148+
void xsk_umem_consume_tx_done(struct xdp_umem *umem)
149+
{
150+
struct xdp_sock *xs;
151+
152+
rcu_read_lock();
153+
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
154+
xs->sk.sk_write_space(&xs->sk);
155+
}
156+
rcu_read_unlock();
157+
}
158+
EXPORT_SYMBOL(xsk_umem_consume_tx_done);
159+
160+
bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len)
161+
{
162+
struct xdp_desc desc;
163+
struct xdp_sock *xs;
164+
165+
rcu_read_lock();
166+
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
167+
if (!xskq_peek_desc(xs->tx, &desc))
168+
continue;
169+
170+
if (xskq_produce_addr_lazy(umem->cq, desc.addr))
171+
goto out;
172+
173+
*dma = xdp_umem_get_dma(umem, desc.addr);
174+
*len = desc.len;
175+
176+
xskq_discard_desc(xs->tx);
177+
rcu_read_unlock();
178+
return true;
179+
}
180+
181+
out:
182+
rcu_read_unlock();
183+
return false;
184+
}
185+
EXPORT_SYMBOL(xsk_umem_consume_tx);
186+
187+
static int xsk_zc_xmit(struct sock *sk)
188+
{
189+
struct xdp_sock *xs = xdp_sk(sk);
190+
struct net_device *dev = xs->dev;
191+
192+
return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
193+
}
194+
141195
static void xsk_destruct_skb(struct sk_buff *skb)
142196
{
143197
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
@@ -151,7 +205,6 @@ static void xsk_destruct_skb(struct sk_buff *skb)
151205
static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
152206
size_t total_len)
153207
{
154-
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
155208
u32 max_batch = TX_BATCH_SIZE;
156209
struct xdp_sock *xs = xdp_sk(sk);
157210
bool sent_frame = false;
@@ -161,8 +214,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
161214

162215
if (unlikely(!xs->tx))
163216
return -ENOBUFS;
164-
if (need_wait)
165-
return -EOPNOTSUPP;
166217

167218
mutex_lock(&xs->mutex);
168219

@@ -192,7 +243,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
192243
goto out;
193244
}
194245

195-
skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
246+
skb = sock_alloc_send_skb(sk, len, 1, &err);
196247
if (unlikely(!skb)) {
197248
err = -EAGAIN;
198249
goto out;
@@ -235,15 +286,18 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
235286

236287
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
237288
{
289+
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
238290
struct sock *sk = sock->sk;
239291
struct xdp_sock *xs = xdp_sk(sk);
240292

241293
if (unlikely(!xs->dev))
242294
return -ENXIO;
243295
if (unlikely(!(xs->dev->flags & IFF_UP)))
244296
return -ENETDOWN;
297+
if (need_wait)
298+
return -EOPNOTSUPP;
245299

246-
return xsk_generic_xmit(sk, m, total_len);
300+
return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
247301
}
248302

249303
static unsigned int xsk_poll(struct file *file, struct socket *sock,
@@ -419,10 +473,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
419473
}
420474

421475
xs->dev = dev;
422-
xs->queue_id = sxdp->sxdp_queue_id;
423-
476+
xs->zc = xs->umem->zc;
477+
xs->queue_id = qid;
424478
xskq_set_umem(xs->rx, &xs->umem->props);
425479
xskq_set_umem(xs->tx, &xs->umem->props);
480+
xdp_add_sk_umem(xs->umem, xs);
426481

427482
out_unlock:
428483
if (err)
@@ -660,6 +715,7 @@ static void xsk_destruct(struct sock *sk)
660715

661716
xskq_destroy(xs->rx);
662717
xskq_destroy(xs->tx);
718+
xdp_del_sk_umem(xs->umem, xs);
663719
xdp_put_umem(xs->umem);
664720

665721
sk_refcnt_debug_dec(sk);

net/xdp/xsk_queue.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <net/xdp_sock.h>
1212

1313
#define RX_BATCH_SIZE 16
14+
#define LAZY_UPDATE_THRESHOLD 128
1415

1516
struct xdp_ring {
1617
u32 producer ____cacheline_aligned_in_smp;
@@ -61,9 +62,14 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
6162
return (entries > dcnt) ? dcnt : entries;
6263
}
6364

65+
static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
66+
{
67+
return q->nentries - (producer - q->cons_tail);
68+
}
69+
6470
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
6571
{
66-
u32 free_entries = q->nentries - (producer - q->cons_tail);
72+
u32 free_entries = xskq_nb_free_lazy(q, producer);
6773

6874
if (free_entries >= dcnt)
6975
return free_entries;
@@ -123,6 +129,9 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
123129
{
124130
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
125131

132+
if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
133+
return -ENOSPC;
134+
126135
ring->desc[q->prod_tail++ & q->ring_mask] = addr;
127136

128137
/* Order producer and data */
@@ -132,6 +141,27 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
132141
return 0;
133142
}
134143

144+
static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr)
145+
{
146+
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
147+
148+
if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0)
149+
return -ENOSPC;
150+
151+
ring->desc[q->prod_head++ & q->ring_mask] = addr;
152+
return 0;
153+
}
154+
155+
static inline void xskq_produce_flush_addr_n(struct xsk_queue *q,
156+
u32 nb_entries)
157+
{
158+
/* Order producer and data */
159+
smp_wmb();
160+
161+
q->prod_tail += nb_entries;
162+
WRITE_ONCE(q->ring->producer, q->prod_tail);
163+
}
164+
135165
static inline int xskq_reserve_addr(struct xsk_queue *q)
136166
{
137167
if (xskq_nb_free(q, q->prod_head, 1) == 0)

0 commit comments

Comments
 (0)