Skip to content

Commit 945da83

Browse files
sowminivMukesh Kacker
authored andcommitted
RDS-TCP: Always create a new rds_sock for an incoming connection.
When running RDS over TCP, the active (client) side connects to the listening ("passive") side at the RDS_TCP_PORT. After the connection is established, if the client side reboots (potentially without even sending a FIN) the server still has a TCP socket in the esablished state. If the server-side now gets a new SYN comes from the client with a different client port, TCP will create a new socket-pair, but the RDS layer will incorrectly pull up the old rds_connection (which is still associated with the stale t_sock and RDS socket state). This patch corrects this behavior by having rds_tcp_accept_one() always create a new connection for an incoming TCP SYN. The rds and tcp state associated with the old socket-pair is cleaned up via the rds_tcp_state_change() callback which would typically be invoked in most cases when the client-TCP sends a FIN on TCP restart, triggering a transition to CLOSE_WAIT state. In the rarer event of client death without a FIN, TCP_KEEPALIVE probes on the socket will detect the stale socket, and the TCP transition to CLOSE state will trigger the RDS state cleanup. Orabug: 20930687 Upstream commit-id: f711a6a Signed-off-by: Sowmini Varadhan <[email protected]> Signed-off-by: David S. Miller <[email protected]> Signed-off-by: Mukesh Kacker <[email protected]>
1 parent 57f2563 commit 945da83

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

net/rds/connection.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,10 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
128128
struct rds_transport *loop_trans;
129129
unsigned long flags;
130130
int ret;
131+
struct rds_transport *otrans = trans;
131132

133+
if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
134+
goto new_conn;
132135
rcu_read_lock();
133136
conn = rds_conn_lookup(head, laddr, faddr, trans, tos);
134137
if (conn
@@ -147,6 +150,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
147150
if (conn)
148151
goto out;
149152

153+
new_conn:
150154
conn = kmem_cache_alloc(rds_conn_slab, gfp);
151155
if (!conn) {
152156
conn = ERR_PTR(-ENOMEM);

net/rds/tcp_connect.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ void rds_tcp_state_change(struct sock *sk)
6262
case TCP_ESTABLISHED:
6363
rds_connect_complete(conn);
6464
break;
65+
case TCP_CLOSE_WAIT:
6566
case TCP_CLOSE:
6667
rds_conn_drop(conn);
6768
default:

net/rds/tcp_listen.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,45 @@ static void rds_tcp_accept_worker(struct work_struct *work);
4444
static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker);
4545
static struct socket *rds_tcp_listen_sock;
4646

47+
static int rds_tcp_keepalive(struct socket *sock)
48+
{
49+
/* values below based on xs_udp_default_timeout */
50+
int keepidle = 5; /* send a probe 'keepidle' secs after last data */
51+
int keepcnt = 5; /* number of unack'ed probes before declaring dead */
52+
int keepalive = 1;
53+
int ret = 0;
54+
55+
ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
56+
(char *)&keepalive, sizeof(keepalive));
57+
if (ret < 0)
58+
goto bail;
59+
60+
ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
61+
(char *)&keepcnt, sizeof(keepcnt));
62+
if (ret < 0)
63+
goto bail;
64+
65+
ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE,
66+
(char *)&keepidle, sizeof(keepidle));
67+
if (ret < 0)
68+
goto bail;
69+
70+
/* KEEPINTVL is the interval between successive probes. We follow
71+
* the model in xs_tcp_finish_connecting() and re-use keepidle.
72+
*/
73+
ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL,
74+
(char *)&keepidle, sizeof(keepidle));
75+
bail:
76+
return ret;
77+
}
78+
4779
static int rds_tcp_accept_one(struct socket *sock)
4880
{
4981
struct socket *new_sock = NULL;
5082
struct rds_connection *conn;
5183
int ret;
5284
struct inet_sock *inet;
85+
struct rds_tcp_connection *rs_tcp;
5386

5487
ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
5588
sock->sk->sk_protocol, &new_sock);
@@ -62,6 +95,10 @@ static int rds_tcp_accept_one(struct socket *sock)
6295
if (ret < 0)
6396
goto out;
6497

98+
ret = rds_tcp_keepalive(new_sock);
99+
if (ret < 0)
100+
goto out;
101+
65102
rds_tcp_tune(new_sock);
66103

67104
inet = inet_sk(new_sock->sk);
@@ -76,6 +113,15 @@ static int rds_tcp_accept_one(struct socket *sock)
76113
ret = PTR_ERR(conn);
77114
goto out;
78115
}
116+
/* An incoming SYN request came in, and TCP just accepted it.
117+
* We always create a new conn for listen side of TCP, and do not
118+
* add it to the c_hash_list.
119+
*
120+
* If the client reboots, this conn will need to be cleaned up.
121+
* rds_tcp_state_change() will do that cleanup
122+
*/
123+
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
124+
WARN_ON(!rs_tcp || rs_tcp->t_sock);
79125

80126
/*
81127
* see the comment above rds_queue_delayed_reconnect()

0 commit comments

Comments
 (0)