Skip to content

Commit cf24f5a

Browse files
tirthendu-intelAlexei Starovoitov
authored andcommitted
xsk: add support for AF_XDP multi-buffer on Tx path
For transmitting an AF_XDP packet, allocate skb while processing the first desc and copy data to it. The 'XDP_PKT_CONTD' flag in 'options' field of the desc indicates the EOP status of the packet. If the current desc is not EOP, store the skb, release the current desc and go on to read the next descs. Allocate a page for each subsequent desc, copy data to it and add it as a frag in the skb stored in xsk. On processing EOP, transmit the skb with frags. Addresses contained in descs have been already queued in consumer queue and skb destructor updated the completion count. On transmit failure cancel the releases, clear the descs from the completion queue and consume the skb for retrying packet transmission. For any invalid descriptor (invalid length/address/options) in the middle of a packet, all pending descriptors will be dropped by xsk core along with the invalid one and the next descriptor is treated as the start of a new packet. Maximum supported frames for a packet is MAX_SKB_FRAGS + 1. If it is exceeded, all descriptors accumulated so far are dropped. Signed-off-by: Tirthendu Sarkar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 1b725b0 commit cf24f5a

File tree

2 files changed

+100
-33
lines changed

2 files changed

+100
-33
lines changed

net/xdp/xsk.c

Lines changed: 92 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,8 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
393393
rcu_read_lock();
394394
list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
395395
if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
396-
xs->tx->queue_empty_descs++;
396+
if (xskq_has_descs(xs->tx))
397+
xskq_cons_release(xs->tx);
397398
continue;
398399
}
399400

@@ -539,24 +540,32 @@ static void xsk_consume_skb(struct sk_buff *skb)
539540
xs->skb = NULL;
540541
}
541542

543+
static void xsk_drop_skb(struct sk_buff *skb)
544+
{
545+
xdp_sk(skb->sk)->tx->invalid_descs += xsk_get_num_desc(skb);
546+
xsk_consume_skb(skb);
547+
}
548+
542549
static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
543550
struct xdp_desc *desc)
544551
{
545552
struct xsk_buff_pool *pool = xs->pool;
546553
u32 hr, len, ts, offset, copy, copied;
547-
struct sk_buff *skb;
554+
struct sk_buff *skb = xs->skb;
548555
struct page *page;
549556
void *buffer;
550557
int err, i;
551558
u64 addr;
552559

553-
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
560+
if (!skb) {
561+
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
554562

555-
skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
556-
if (unlikely(!skb))
557-
return ERR_PTR(err);
563+
skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
564+
if (unlikely(!skb))
565+
return ERR_PTR(err);
558566

559-
skb_reserve(skb, hr);
567+
skb_reserve(skb, hr);
568+
}
560569

561570
addr = desc->addr;
562571
len = desc->len;
@@ -566,7 +575,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
566575
offset = offset_in_page(buffer);
567576
addr = buffer - pool->addrs;
568577

569-
for (copied = 0, i = 0; copied < len; i++) {
578+
for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
579+
if (unlikely(i >= MAX_SKB_FRAGS))
580+
return ERR_PTR(-EFAULT);
581+
570582
page = pool->umem->pgs[addr >> PAGE_SHIFT];
571583
get_page(page);
572584

@@ -591,33 +603,56 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
591603
struct xdp_desc *desc)
592604
{
593605
struct net_device *dev = xs->dev;
594-
struct sk_buff *skb;
606+
struct sk_buff *skb = xs->skb;
607+
int err;
595608

596609
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
597610
skb = xsk_build_skb_zerocopy(xs, desc);
598-
if (IS_ERR(skb))
599-
return skb;
611+
if (IS_ERR(skb)) {
612+
err = PTR_ERR(skb);
613+
goto free_err;
614+
}
600615
} else {
601616
u32 hr, tr, len;
602617
void *buffer;
603-
int err;
604618

605-
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
606-
tr = dev->needed_tailroom;
619+
buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
607620
len = desc->len;
608621

609-
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
610-
if (unlikely(!skb))
611-
return ERR_PTR(err);
622+
if (!skb) {
623+
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
624+
tr = dev->needed_tailroom;
625+
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
626+
if (unlikely(!skb))
627+
goto free_err;
612628

613-
skb_reserve(skb, hr);
614-
skb_put(skb, len);
629+
skb_reserve(skb, hr);
630+
skb_put(skb, len);
615631

616-
buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
617-
err = skb_store_bits(skb, 0, buffer, len);
618-
if (unlikely(err)) {
619-
kfree_skb(skb);
620-
return ERR_PTR(err);
632+
err = skb_store_bits(skb, 0, buffer, len);
633+
if (unlikely(err))
634+
goto free_err;
635+
} else {
636+
int nr_frags = skb_shinfo(skb)->nr_frags;
637+
struct page *page;
638+
u8 *vaddr;
639+
640+
if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
641+
err = -EFAULT;
642+
goto free_err;
643+
}
644+
645+
page = alloc_page(xs->sk.sk_allocation);
646+
if (unlikely(!page)) {
647+
err = -EAGAIN;
648+
goto free_err;
649+
}
650+
651+
vaddr = kmap_local_page(page);
652+
memcpy(vaddr, buffer, len);
653+
kunmap_local(vaddr);
654+
655+
skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
621656
}
622657
}
623658

@@ -628,6 +663,17 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
628663
xsk_set_destructor_arg(skb);
629664

630665
return skb;
666+
667+
free_err:
668+
if (err == -EAGAIN) {
669+
xsk_cq_cancel_locked(xs, 1);
670+
} else {
671+
xsk_set_destructor_arg(skb);
672+
xsk_drop_skb(skb);
673+
xskq_cons_release(xs->tx);
674+
}
675+
676+
return ERR_PTR(err);
631677
}
632678

633679
static int __xsk_generic_xmit(struct sock *sk)
@@ -667,30 +713,45 @@ static int __xsk_generic_xmit(struct sock *sk)
667713
skb = xsk_build_skb(xs, &desc);
668714
if (IS_ERR(skb)) {
669715
err = PTR_ERR(skb);
670-
xsk_cq_cancel_locked(xs, 1);
671-
goto out;
716+
if (err == -EAGAIN)
717+
goto out;
718+
err = 0;
719+
continue;
720+
}
721+
722+
xskq_cons_release(xs->tx);
723+
724+
if (xp_mb_desc(&desc)) {
725+
xs->skb = skb;
726+
continue;
672727
}
673728

674729
err = __dev_direct_xmit(skb, xs->queue_id);
675730
if (err == NETDEV_TX_BUSY) {
676731
/* Tell user-space to retry the send */
732+
xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
677733
xsk_consume_skb(skb);
678734
err = -EAGAIN;
679735
goto out;
680736
}
681737

682-
xskq_cons_release(xs->tx);
683738
/* Ignore NET_XMIT_CN as packet might have been sent */
684739
if (err == NET_XMIT_DROP) {
685740
/* SKB completed but not sent */
686741
err = -EBUSY;
742+
xs->skb = NULL;
687743
goto out;
688744
}
689745

690746
sent_frame = true;
747+
xs->skb = NULL;
691748
}
692749

693-
xs->tx->queue_empty_descs++;
750+
if (xskq_has_descs(xs->tx)) {
751+
if (xs->skb)
752+
xsk_drop_skb(xs->skb);
753+
xskq_cons_release(xs->tx);
754+
}
694755

695756
out:
696757
if (sent_frame)
@@ -940,6 +1001,9 @@ static int xsk_release(struct socket *sock)
9401001

9411002
net = sock_net(sk);
9421003

1004+
if (xs->skb)
1005+
xsk_drop_skb(xs->skb);
1006+
9431007
mutex_lock(&net->xdp.lock);
9441008
sk_del_node_init_rcu(sk);
9451009
mutex_unlock(&net->xdp.lock);

net/xdp/xsk_queue.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,11 @@ static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
175175
xp_aligned_validate_desc(pool, desc);
176176
}
177177

178+
static inline bool xskq_has_descs(struct xsk_queue *q)
179+
{
180+
return q->cached_cons != q->cached_prod;
181+
}
182+
178183
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
179184
struct xdp_desc *d,
180185
struct xsk_buff_pool *pool)
@@ -190,17 +195,15 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
190195
struct xdp_desc *desc,
191196
struct xsk_buff_pool *pool)
192197
{
193-
while (q->cached_cons != q->cached_prod) {
198+
if (q->cached_cons != q->cached_prod) {
194199
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
195200
u32 idx = q->cached_cons & q->ring_mask;
196201

197202
*desc = ring->desc[idx];
198-
if (xskq_cons_is_valid_desc(q, desc, pool))
199-
return true;
200-
201-
q->cached_cons++;
203+
return xskq_cons_is_valid_desc(q, desc, pool);
202204
}
203205

206+
q->queue_empty_descs++;
204207
return false;
205208
}
206209

0 commit comments

Comments
 (0)