Skip to content

Commit 3c73419

Browse files
Rainer Weikusatdavem330
authored andcommitted
af_unix: fix 'poll for write'/ connected DGRAM sockets
The unix_dgram_sendmsg routine implements a (somewhat crude) form of receiver-imposed flow control by comparing the length of the receive queue of the 'peer socket' with the max_ack_backlog value stored in the corresponding sock structure, either blocking the thread which caused the send-routine to be called or returning EAGAIN. This routine is used by both SOCK_DGRAM and SOCK_SEQPACKET sockets. The poll-implementation for these socket types is datagram_poll from core/datagram.c. A socket is deemed to be writeable by this routine when the memory presently consumed by datagrams owned by it is less than the configured socket send buffer size. This is always wrong for connected PF_UNIX non-stream sockets when the abovementioned receive queue is currently considered to be full. 'poll' will then return, indicating that the socket is writeable, but a subsequent write result in EAGAIN, effectively causing an (usual) application to 'poll for writeability by repeated send request with O_NONBLOCK set' until it has consumed its time quantum. The change below uses a suitably modified variant of the datagram_poll routines for both type of PF_UNIX sockets, which tests if the recv-queue of the peer a socket is connected to is presently considered to be 'full' as part of the 'is this socket writeable'-checking code. The socket being polled is additionally put onto the peer_wait wait queue associated with its peer, because the unix_dgram_sendmsg routine does a wake up on this queue after a datagram was received and the 'other wakeup call' is done implicitly as part of skb destruction, meaning, a process blocked in poll because of a full peer receive queue could otherwise sleep forever if no datagram owned by its socket was already sitting on this queue. Among this change is a small (inline) helper routine named 'unix_recvq_full', which consolidates the actual testing code (in three different places) into a single location. Signed-off-by: Rainer Weikusat <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 4552e11 commit 3c73419

File tree

1 file changed

+70
-9
lines changed

1 file changed

+70
-9
lines changed

net/unix/af_unix.c

Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,11 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
169169
return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
170170
}
171171

172+
static inline int unix_recvq_full(struct sock const *sk)
173+
{
174+
return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
175+
}
176+
172177
static struct sock *unix_peer_get(struct sock *s)
173178
{
174179
struct sock *peer;
@@ -482,6 +487,8 @@ static int unix_socketpair(struct socket *, struct socket *);
482487
static int unix_accept(struct socket *, struct socket *, int);
483488
static int unix_getname(struct socket *, struct sockaddr *, int *, int);
484489
static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490+
static unsigned int unix_datagram_poll(struct file *, struct socket *,
491+
poll_table *);
485492
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
486493
static int unix_shutdown(struct socket *, int);
487494
static int unix_stream_sendmsg(struct kiocb *, struct socket *,
@@ -527,7 +534,7 @@ static const struct proto_ops unix_dgram_ops = {
527534
.socketpair = unix_socketpair,
528535
.accept = sock_no_accept,
529536
.getname = unix_getname,
530-
.poll = datagram_poll,
537+
.poll = unix_datagram_poll,
531538
.ioctl = unix_ioctl,
532539
.listen = sock_no_listen,
533540
.shutdown = unix_shutdown,
@@ -548,7 +555,7 @@ static const struct proto_ops unix_seqpacket_ops = {
548555
.socketpair = unix_socketpair,
549556
.accept = unix_accept,
550557
.getname = unix_getname,
551-
.poll = datagram_poll,
558+
.poll = unix_datagram_poll,
552559
.ioctl = unix_ioctl,
553560
.listen = unix_listen,
554561
.shutdown = unix_shutdown,
@@ -983,8 +990,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo)
983990

984991
sched = !sock_flag(other, SOCK_DEAD) &&
985992
!(other->sk_shutdown & RCV_SHUTDOWN) &&
986-
(skb_queue_len(&other->sk_receive_queue) >
987-
other->sk_max_ack_backlog);
993+
unix_recvq_full(other);
988994

989995
unix_state_unlock(other);
990996

@@ -1058,8 +1064,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
10581064
if (other->sk_state != TCP_LISTEN)
10591065
goto out_unlock;
10601066

1061-
if (skb_queue_len(&other->sk_receive_queue) >
1062-
other->sk_max_ack_backlog) {
1067+
if (unix_recvq_full(other)) {
10631068
err = -EAGAIN;
10641069
if (!timeo)
10651070
goto out_unlock;
@@ -1428,9 +1433,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
14281433
goto out_unlock;
14291434
}
14301435

1431-
if (unix_peer(other) != sk &&
1432-
(skb_queue_len(&other->sk_receive_queue) >
1433-
other->sk_max_ack_backlog)) {
1436+
if (unix_peer(other) != sk && unix_recvq_full(other)) {
14341437
if (!timeo) {
14351438
err = -EAGAIN;
14361439
goto out_unlock;
@@ -1991,6 +1994,64 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
19911994
return mask;
19921995
}
19931996

1997+
static unsigned int unix_datagram_poll(struct file *file, struct socket *sock,
1998+
poll_table *wait)
1999+
{
2000+
struct sock *sk = sock->sk, *peer;
2001+
unsigned int mask;
2002+
2003+
poll_wait(file, sk->sk_sleep, wait);
2004+
2005+
peer = unix_peer_get(sk);
2006+
if (peer) {
2007+
if (peer != sk) {
2008+
/*
2009+
* Writability of a connected socket additionally
2010+
* depends on the state of the receive queue of the
2011+
* peer.
2012+
*/
2013+
poll_wait(file, &unix_sk(peer)->peer_wait, wait);
2014+
} else {
2015+
sock_put(peer);
2016+
peer = NULL;
2017+
}
2018+
}
2019+
2020+
mask = 0;
2021+
2022+
/* exceptional events? */
2023+
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2024+
mask |= POLLERR;
2025+
if (sk->sk_shutdown & RCV_SHUTDOWN)
2026+
mask |= POLLRDHUP;
2027+
if (sk->sk_shutdown == SHUTDOWN_MASK)
2028+
mask |= POLLHUP;
2029+
2030+
/* readable? */
2031+
if (!skb_queue_empty(&sk->sk_receive_queue) ||
2032+
(sk->sk_shutdown & RCV_SHUTDOWN))
2033+
mask |= POLLIN | POLLRDNORM;
2034+
2035+
/* Connection-based need to check for termination and startup */
2036+
if (sk->sk_type == SOCK_SEQPACKET) {
2037+
if (sk->sk_state == TCP_CLOSE)
2038+
mask |= POLLHUP;
2039+
/* connection hasn't started yet? */
2040+
if (sk->sk_state == TCP_SYN_SENT)
2041+
return mask;
2042+
}
2043+
2044+
/* writable? */
2045+
if (unix_writable(sk) && !(peer && unix_recvq_full(peer)))
2046+
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2047+
else
2048+
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2049+
2050+
if (peer)
2051+
sock_put(peer);
2052+
2053+
return mask;
2054+
}
19942055

19952056
#ifdef CONFIG_PROC_FS
19962057
static struct sock *first_unix_socket(int *i)

0 commit comments

Comments
 (0)