Skip to content

Commit 0690899

Browse files
mstsirkindavem330
authored andcommitted
tun: experimental zero copy tx support
Let vhost-net utilize zero copy tx when used with tun. Signed-off-by: Michael S. Tsirkin <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent dcc0fb7 commit 0690899

File tree

1 file changed

+134
-12
lines changed

1 file changed

+134
-12
lines changed

drivers/net/tun.c

Lines changed: 134 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ do { \
100100
} while (0)
101101
#endif
102102

103+
#define GOODCOPY_LEN 128
104+
103105
#define FLT_EXACT_COUNT 8
104106
struct tap_filter {
105107
unsigned int count; /* Number of addrs. Zero means disabled */
@@ -604,19 +606,100 @@ static struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
604606
return skb;
605607
}
606608

609+
/* set skb frags from iovec, this can move to core network code for reuse */
610+
static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
611+
int offset, size_t count)
612+
{
613+
int len = iov_length(from, count) - offset;
614+
int copy = skb_headlen(skb);
615+
int size, offset1 = 0;
616+
int i = 0;
617+
618+
/* Skip over from offset */
619+
while (count && (offset >= from->iov_len)) {
620+
offset -= from->iov_len;
621+
++from;
622+
--count;
623+
}
624+
625+
/* copy up to skb headlen */
626+
while (count && (copy > 0)) {
627+
size = min_t(unsigned int, copy, from->iov_len - offset);
628+
if (copy_from_user(skb->data + offset1, from->iov_base + offset,
629+
size))
630+
return -EFAULT;
631+
if (copy > size) {
632+
++from;
633+
--count;
634+
offset = 0;
635+
} else
636+
offset += size;
637+
copy -= size;
638+
offset1 += size;
639+
}
640+
641+
if (len == offset1)
642+
return 0;
643+
644+
while (count--) {
645+
struct page *page[MAX_SKB_FRAGS];
646+
int num_pages;
647+
unsigned long base;
648+
unsigned long truesize;
649+
650+
len = from->iov_len - offset;
651+
if (!len) {
652+
offset = 0;
653+
++from;
654+
continue;
655+
}
656+
base = (unsigned long)from->iov_base + offset;
657+
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
658+
if (i + size > MAX_SKB_FRAGS)
659+
return -EMSGSIZE;
660+
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
661+
if (num_pages != size) {
662+
for (i = 0; i < num_pages; i++)
663+
put_page(page[i]);
664+
return -EFAULT;
665+
}
666+
truesize = size * PAGE_SIZE;
667+
skb->data_len += len;
668+
skb->len += len;
669+
skb->truesize += truesize;
670+
atomic_add(truesize, &skb->sk->sk_wmem_alloc);
671+
while (len) {
672+
int off = base & ~PAGE_MASK;
673+
int size = min_t(int, len, PAGE_SIZE - off);
674+
__skb_fill_page_desc(skb, i, page[i], off, size);
675+
skb_shinfo(skb)->nr_frags++;
676+
/* increase sk_wmem_alloc */
677+
base += size;
678+
len -= size;
679+
i++;
680+
}
681+
offset = 0;
682+
++from;
683+
}
684+
return 0;
685+
}
686+
607687
/* Get packet from user space buffer */
608-
static ssize_t tun_get_user(struct tun_struct *tun,
609-
const struct iovec *iv, size_t count,
610-
int noblock)
688+
static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control,
689+
const struct iovec *iv, size_t total_len,
690+
size_t count, int noblock)
611691
{
612692
struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
613693
struct sk_buff *skb;
614-
size_t len = count, align = NET_SKB_PAD;
694+
size_t len = total_len, align = NET_SKB_PAD;
615695
struct virtio_net_hdr gso = { 0 };
616696
int offset = 0;
697+
int copylen;
698+
bool zerocopy = false;
699+
int err;
617700

618701
if (!(tun->flags & TUN_NO_PI)) {
619-
if ((len -= sizeof(pi)) > count)
702+
if ((len -= sizeof(pi)) > total_len)
620703
return -EINVAL;
621704

622705
if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi)))
@@ -625,7 +708,7 @@ static ssize_t tun_get_user(struct tun_struct *tun,
625708
}
626709

627710
if (tun->flags & TUN_VNET_HDR) {
628-
if ((len -= tun->vnet_hdr_sz) > count)
711+
if ((len -= tun->vnet_hdr_sz) > total_len)
629712
return -EINVAL;
630713

631714
if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
@@ -647,14 +730,46 @@ static ssize_t tun_get_user(struct tun_struct *tun,
647730
return -EINVAL;
648731
}
649732

650-
skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock);
733+
if (msg_control)
734+
zerocopy = true;
735+
736+
if (zerocopy) {
737+
/* Userspace may produce vectors with count greater than
738+
* MAX_SKB_FRAGS, so we need to linearize parts of the skb
739+
* to let the rest of data to be fit in the frags.
740+
*/
741+
if (count > MAX_SKB_FRAGS) {
742+
copylen = iov_length(iv, count - MAX_SKB_FRAGS);
743+
if (copylen < offset)
744+
copylen = 0;
745+
else
746+
copylen -= offset;
747+
} else
748+
copylen = 0;
749+
/* There are 256 bytes to be copied in skb, so there is enough
750+
* room for skb expand head in case it is used.
751+
* The rest of the buffer is mapped from userspace.
752+
*/
753+
if (copylen < gso.hdr_len)
754+
copylen = gso.hdr_len;
755+
if (!copylen)
756+
copylen = GOODCOPY_LEN;
757+
} else
758+
copylen = len;
759+
760+
skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock);
651761
if (IS_ERR(skb)) {
652762
if (PTR_ERR(skb) != -EAGAIN)
653763
tun->dev->stats.rx_dropped++;
654764
return PTR_ERR(skb);
655765
}
656766

657-
if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) {
767+
if (zerocopy)
768+
err = zerocopy_sg_from_iovec(skb, iv, offset, count);
769+
else
770+
err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len);
771+
772+
if (err) {
658773
tun->dev->stats.rx_dropped++;
659774
kfree_skb(skb);
660775
return -EFAULT;
@@ -728,12 +843,18 @@ static ssize_t tun_get_user(struct tun_struct *tun,
728843
skb_shinfo(skb)->gso_segs = 0;
729844
}
730845

846+
/* copy skb_ubuf_info for callback when skb has no error */
847+
if (zerocopy) {
848+
skb_shinfo(skb)->destructor_arg = msg_control;
849+
skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
850+
}
851+
731852
netif_rx_ni(skb);
732853

733854
tun->dev->stats.rx_packets++;
734855
tun->dev->stats.rx_bytes += len;
735856

736-
return count;
857+
return total_len;
737858
}
738859

739860
static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
@@ -748,7 +869,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
748869

749870
tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count);
750871

751-
result = tun_get_user(tun, iv, iov_length(iv, count),
872+
result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count,
752873
file->f_flags & O_NONBLOCK);
753874

754875
tun_put(tun);
@@ -962,8 +1083,8 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
9621083
struct msghdr *m, size_t total_len)
9631084
{
9641085
struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
965-
return tun_get_user(tun, m->msg_iov, total_len,
966-
m->msg_flags & MSG_DONTWAIT);
1086+
return tun_get_user(tun, m->msg_control, m->msg_iov, total_len,
1087+
m->msg_iovlen, m->msg_flags & MSG_DONTWAIT);
9671088
}
9681089

9691090
static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
@@ -1133,6 +1254,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
11331254
sock_init_data(&tun->socket, sk);
11341255
sk->sk_write_space = tun_sock_write_space;
11351256
sk->sk_sndbuf = INT_MAX;
1257+
sock_set_flag(sk, SOCK_ZEROCOPY);
11361258

11371259
tun_sk(sk)->tun = tun;
11381260

0 commit comments

Comments
 (0)