Skip to content

Commit d136184

Browse files
edumazetdavem330
authored andcommitted
tcp: fix SO_RCVLOWAT and RCVBUF autotuning
Applications might use SO_RCVLOWAT on TCP socket hoping to receive one [E]POLLIN event only when a given amount of bytes are ready in socket receive queue. Problem is that receive autotuning is not aware of this constraint, meaning sk_rcvbuf might be too small to allow all bytes to be stored. Add a new (struct proto_ops)->set_rcvlowat method so that a protocol can override the default setsockopt(SO_RCVLOWAT) behavior. Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 10b19ae commit d136184

File tree

6 files changed

+29
-1
lines changed

6 files changed

+29
-1
lines changed

include/linux/net.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ struct proto_ops {
197197
int offset, size_t size, int flags);
198198
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
199199
size_t size);
200+
int (*set_rcvlowat)(struct sock *sk, int val);
200201
};
201202

202203
#define DECLARE_SOCKADDR(type, dst, src) \

include/net/tcp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ void tcp_set_keepalive(struct sock *sk, int val);
402402
void tcp_syn_ack_timeout(const struct request_sock *req);
403403
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
404404
int flags, int *addr_len);
405+
int tcp_set_rcvlowat(struct sock *sk, int val);
405406
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
406407
struct tcp_options_received *opt_rx,
407408
int estab, struct tcp_fastopen_cookie *foc);

net/core/sock.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -905,7 +905,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
905905
case SO_RCVLOWAT:
906906
if (val < 0)
907907
val = INT_MAX;
908-
sk->sk_rcvlowat = val ? : 1;
908+
if (sock->ops->set_rcvlowat)
909+
ret = sock->ops->set_rcvlowat(sk, val);
910+
else
911+
sk->sk_rcvlowat = val ? : 1;
909912
break;
910913

911914
case SO_RCVTIMEO:

net/ipv4/af_inet.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,7 @@ const struct proto_ops inet_stream_ops = {
10061006
.compat_getsockopt = compat_sock_common_getsockopt,
10071007
.compat_ioctl = inet_compat_ioctl,
10081008
#endif
1009+
.set_rcvlowat = tcp_set_rcvlowat,
10091010
};
10101011
EXPORT_SYMBOL(inet_stream_ops);
10111012

net/ipv4/tcp.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,27 @@ int tcp_peek_len(struct socket *sock)
17011701
}
17021702
EXPORT_SYMBOL(tcp_peek_len);
17031703

1704+
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
1705+
int tcp_set_rcvlowat(struct sock *sk, int val)
1706+
{
1707+
sk->sk_rcvlowat = val ? : 1;
1708+
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1709+
return 0;
1710+
1711+
/* val comes from user space and might be close to INT_MAX */
1712+
val <<= 1;
1713+
if (val < 0)
1714+
val = INT_MAX;
1715+
1716+
val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
1717+
if (val > sk->sk_rcvbuf) {
1718+
sk->sk_rcvbuf = val;
1719+
tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
1720+
}
1721+
return 0;
1722+
}
1723+
EXPORT_SYMBOL(tcp_set_rcvlowat);
1724+
17041725
static void tcp_update_recv_tstamps(struct sk_buff *skb,
17051726
struct scm_timestamping *tss)
17061727
{

net/ipv6/af_inet6.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ const struct proto_ops inet6_stream_ops = {
590590
.compat_setsockopt = compat_sock_common_setsockopt,
591591
.compat_getsockopt = compat_sock_common_getsockopt,
592592
#endif
593+
.set_rcvlowat = tcp_set_rcvlowat,
593594
};
594595

595596
const struct proto_ops inet6_dgram_ops = {

0 commit comments

Comments
 (0)