Skip to content

Commit e370a72

Browse files
Eric Dumazetdavem330
authored andcommitted
af_unix: improve STREAM behavior with fragmented memory
unix_stream_sendmsg() currently uses order-2 allocations, and we had numerous reports this can fail. The __GFP_REPEAT flag present in sock_alloc_send_pskb() is not helping. This patch extends the work done in commit eb6a248 ("af_unix: reduce high order page allocations) for datagram sockets. This opens the possibility of zero copy IO (splice() and friends) The trick is to not use skb_pull() anymore in recvmsg() path, and instead add a @consumed field in UNIXCB() to track amount of already read payload in the skb. There is a performance regression for large sends because of extra page allocations that will be addressed in a follow-up patch, allowing sock_alloc_send_pskb() to attempt high order page allocations. Signed-off-by: Eric Dumazet <[email protected]> Cc: David Rientjes <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 149479d commit e370a72

File tree

2 files changed

+31
-35
lines changed

2 files changed

+31
-35
lines changed

include/net/af_unix.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ struct unix_skb_parms {
3535
#ifdef CONFIG_SECURITY_NETWORK
3636
u32 secid; /* Security ID */
3737
#endif
38+
u32 consumed;
3839
};
3940

4041
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))

net/unix/af_unix.c

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,6 +1596,10 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
15961596
return err;
15971597
}
15981598

1599+
/* We use paged skbs for stream sockets, and limit occupancy to 32768
1600+
* bytes, and a minimun of a full page.
1601+
*/
1602+
#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
15991603

16001604
static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
16011605
struct msghdr *msg, size_t len)
@@ -1609,6 +1613,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
16091613
struct scm_cookie tmp_scm;
16101614
bool fds_sent = false;
16111615
int max_level;
1616+
int data_len;
16121617

16131618
if (NULL == siocb->scm)
16141619
siocb->scm = &tmp_scm;
@@ -1635,40 +1640,21 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
16351640
goto pipe_err;
16361641

16371642
while (sent < len) {
1638-
/*
1639-
* Optimisation for the fact that under 0.01% of X
1640-
* messages typically need breaking up.
1641-
*/
1642-
1643-
size = len-sent;
1643+
size = len - sent;
16441644

16451645
/* Keep two messages in the pipe so it schedules better */
1646-
if (size > ((sk->sk_sndbuf >> 1) - 64))
1647-
size = (sk->sk_sndbuf >> 1) - 64;
1646+
size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
16481647

1649-
if (size > SKB_MAX_ALLOC)
1650-
size = SKB_MAX_ALLOC;
1651-
1652-
/*
1653-
* Grab a buffer
1654-
*/
1648+
/* allow fallback to order-0 allocations */
1649+
size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
16551650

1656-
skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1657-
&err);
1651+
data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
16581652

1659-
if (skb == NULL)
1653+
skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1654+
msg->msg_flags & MSG_DONTWAIT, &err);
1655+
if (!skb)
16601656
goto out_err;
16611657

1662-
/*
1663-
* If you pass two values to the sock_alloc_send_skb
1664-
* it tries to grab the large buffer with GFP_NOFS
1665-
* (which can fail easily), and if it fails grab the
1666-
* fallback size buffer which is under a page and will
1667-
* succeed. [Alan]
1668-
*/
1669-
size = min_t(int, size, skb_tailroom(skb));
1670-
1671-
16721658
/* Only send the fds in the first buffer */
16731659
err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
16741660
if (err < 0) {
@@ -1678,7 +1664,10 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
16781664
max_level = err + 1;
16791665
fds_sent = true;
16801666

1681-
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1667+
skb_put(skb, size - data_len);
1668+
skb->data_len = data_len;
1669+
skb->len = size;
1670+
err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, size);
16821671
if (err) {
16831672
kfree_skb(skb);
16841673
goto out_err;
@@ -1890,6 +1879,11 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
18901879
return timeo;
18911880
}
18921881

1882+
static unsigned int unix_skb_len(const struct sk_buff *skb)
1883+
{
1884+
return skb->len - UNIXCB(skb).consumed;
1885+
}
1886+
18931887
static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
18941888
struct msghdr *msg, size_t size,
18951889
int flags)
@@ -1977,8 +1971,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
19771971
}
19781972

19791973
skip = sk_peek_offset(sk, flags);
1980-
while (skip >= skb->len) {
1981-
skip -= skb->len;
1974+
while (skip >= unix_skb_len(skb)) {
1975+
skip -= unix_skb_len(skb);
19821976
last = skb;
19831977
skb = skb_peek_next(skb, &sk->sk_receive_queue);
19841978
if (!skb)
@@ -2005,8 +1999,9 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
20051999
sunaddr = NULL;
20062000
}
20072001

2008-
chunk = min_t(unsigned int, skb->len - skip, size);
2009-
if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2002+
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2003+
if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
2004+
msg->msg_iov, chunk)) {
20102005
if (copied == 0)
20112006
copied = -EFAULT;
20122007
break;
@@ -2016,14 +2011,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
20162011

20172012
/* Mark read part of skb as used */
20182013
if (!(flags & MSG_PEEK)) {
2019-
skb_pull(skb, chunk);
2014+
UNIXCB(skb).consumed += chunk;
20202015

20212016
sk_peek_offset_bwd(sk, chunk);
20222017

20232018
if (UNIXCB(skb).fp)
20242019
unix_detach_fds(siocb->scm, skb);
20252020

2026-
if (skb->len)
2021+
if (unix_skb_len(skb))
20272022
break;
20282023

20292024
skb_unlink(skb, &sk->sk_receive_queue);
@@ -2107,7 +2102,7 @@ long unix_inq_len(struct sock *sk)
21072102
if (sk->sk_type == SOCK_STREAM ||
21082103
sk->sk_type == SOCK_SEQPACKET) {
21092104
skb_queue_walk(&sk->sk_receive_queue, skb)
2110-
amount += skb->len;
2105+
amount += unix_skb_len(skb);
21112106
} else {
21122107
skb = skb_peek(&sk->sk_receive_queue);
21132108
if (skb)

0 commit comments

Comments
 (0)