Skip to content

Commit b63fbdb

Browse files
Dag MoxnesSomasundaram Krishnasamy
authored andcommitted
rds: ib: need to flush neighbor cache for local peer connections on failover
On RDS connections to other hosts, the neighbor cache on the peer side normally will get flushed by means of GARP packets. For local connections (connections on the same host), GARP packets does not cause neighbor cache to be updated. To prevent RDS to use bad entries in the neighbor cache we need to flush the neighbor cache entries TO the IP-address when the IP-address is moved due to an ADDR_CHANGED event. Orabug: 30417190 Signed-off-by: Dag Moxnes <[email protected]> Reviewed-by: Ka-Cheong Poon <[email protected]> Signed-off-by: Somasundaram Krishnasamy <[email protected]>
1 parent e9cbb94 commit b63fbdb

File tree

3 files changed

+47
-16
lines changed

3 files changed

+47
-16
lines changed

net/rds/ib.c

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -851,14 +851,8 @@ static unsigned int neigh_flush_interval = 750;
851851
/* Should be large enough to hold the flush message. */
852852
static unsigned int flush_buf_len = 48;
853853

854-
/* Given an rds_connection, flush the peer address' neighbor cache entry.
855-
* If the peer is not in the same network as us, nothing will be flushed.
856-
*
857-
* @net: the connection's namespace
858-
* @conn: pointer to the connection
859-
*/
860-
void rds_ib_flush_neigh(struct net *net,
861-
struct rds_connection *conn)
854+
static void __flush_neigh_conn(struct net *net,
855+
struct rds_connection *conn)
862856
{
863857
struct sockaddr_nl nlsa = { .nl_family = AF_NETLINK };
864858
u64 timenow = jiffies_to_msecs(get_jiffies_64());
@@ -954,4 +948,30 @@ void rds_ib_flush_neigh(struct net *net,
954948
kfree(sndbuf);
955949
}
956950

951+
/* Given an rds_connection, flush the peer address' neighbor cache entry.
952+
* If the peer is not in the same network as us, nothing will be flushed.
953+
*
954+
* @net: the connection's namespace
955+
* @conn: pointer to the connection
956+
* @flush_local_peer: Flush neighbor for the peer, if it is local, instead
957+
* of conn
958+
*/
959+
void rds_ib_flush_neigh(struct net *net,
960+
struct rds_connection *conn, bool flush_local_peer)
961+
{
962+
if (flush_local_peer && conn->c_loopback) {
963+
struct rds_connection *peer;
964+
965+
/* Note the swapped d/saddr */
966+
peer = rds_conn_find(rds_conn_net(conn),
967+
&conn->c_faddr, &conn->c_laddr,
968+
conn->c_trans, conn->c_tos,
969+
0);
970+
if (peer)
971+
__flush_neigh_conn(net, peer);
972+
} else {
973+
__flush_neigh_conn(net, conn);
974+
}
975+
}
976+
957977
MODULE_LICENSE("GPL");

net/rds/ib.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,9 @@ extern struct workqueue_struct *rds_aux_wq;
552552
extern struct rds_transport rds_ib_transport;
553553
extern void rds_ib_add_one(struct ib_device *device);
554554
extern void rds_ib_remove_one(struct ib_device *device, void *client_data);
555-
extern void rds_ib_flush_neigh(struct net *net, struct rds_connection *conn);
555+
extern void rds_ib_flush_neigh(struct net *net,
556+
struct rds_connection *conn,
557+
bool flush_local_peer);
556558
void rds_ib_srq_exit(struct rds_ib_device *rds_ibdev);
557559
int rds_ib_srq_init(struct rds_ib_device *rds_ibdev);
558560

net/rds/rdma_transport.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,15 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
9595
struct rds_connection *conn;
9696
struct rds_transport *trans = &rds_ib_transport;
9797
int ret = 0;
98+
/* ADDR_CHANGE event indicates that the local address has moved
99+
* to a different device, most likely due to failover/failback.
100+
* If this is a local connection (a connection to this host), we need
101+
* to flush the neighbor cache entry for the peer side of the
102+
* connection. In this case we do not need to flush this side of the
103+
* connection. If this is not a local connection, we still flush
104+
* the neighbor cache for the local side of the connection.
105+
*/
106+
bool flush_local_peer = event->event == RDMA_CM_EVENT_ADDR_CHANGE;
98107
int *err;
99108

100109
conn = rds_ib_get_conn(cm_id);
@@ -136,7 +145,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
136145
/* These events might indicate the IP being moved,
137146
* hence flush the address
138147
*/
139-
rds_ib_flush_neigh(&init_net, conn);
148+
rds_ib_flush_neigh(&init_net, conn, flush_local_peer);
140149
rds_rtd(RDS_RTD_CM, "Bailing, conn %p being shut down, ret: %d\n",
141150
conn, ret);
142151
goto out;
@@ -212,7 +221,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
212221

213222
case RDMA_CM_EVENT_ROUTE_ERROR:
214223
/* IP might have been moved so flush the ARP entry and retry */
215-
rds_ib_flush_neigh(&init_net, conn);
224+
rds_ib_flush_neigh(&init_net, conn, flush_local_peer);
216225

217226
rds_rtd_ptr(RDS_RTD_ERR,
218227
"ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
@@ -228,7 +237,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
228237

229238
case RDMA_CM_EVENT_ADDR_ERROR:
230239
/* IP might have been moved so flush the ARP entry and retry */
231-
rds_ib_flush_neigh(&init_net, conn);
240+
rds_ib_flush_neigh(&init_net, conn, flush_local_peer);
232241

233242
rds_rtd_ptr(RDS_RTD_ERR,
234243
"ADDR_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
@@ -242,7 +251,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
242251
case RDMA_CM_EVENT_UNREACHABLE:
243252
case RDMA_CM_EVENT_DEVICE_REMOVAL:
244253
/* IP might have been moved so flush the ARP entry and retry */
245-
rds_ib_flush_neigh(&init_net, conn);
254+
rds_ib_flush_neigh(&init_net, conn, flush_local_peer);
246255

247256
rds_rtd_ptr(RDS_RTD_ERR,
248257
"CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
@@ -254,7 +263,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
254263

255264
case RDMA_CM_EVENT_REJECTED:
256265
/* May be due to ARP cache containing an incorrect dmac, hence flush it */
257-
rds_ib_flush_neigh(&init_net, conn);
266+
rds_ib_flush_neigh(&init_net, conn, flush_local_peer);
258267

259268
err = (int *)event->param.conn.private_data;
260269

@@ -300,7 +309,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
300309

301310
case RDMA_CM_EVENT_ADDR_CHANGE:
302311
/* IP might have been moved so flush the ARP entry and retry */
303-
rds_ib_flush_neigh(&init_net, conn);
312+
rds_ib_flush_neigh(&init_net, conn, flush_local_peer);
304313

305314
rds_rtd_ptr(RDS_RTD_CM_EXT,
306315
"ADDR_CHANGE event <%pI6c,%pI6c>\n",
@@ -315,7 +324,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
315324

316325
case RDMA_CM_EVENT_DISCONNECTED:
317326
/* IP might have been moved so flush the ARP entry and retry */
318-
rds_ib_flush_neigh(&init_net, conn);
327+
rds_ib_flush_neigh(&init_net, conn, flush_local_peer);
319328

320329
rds_rtd_ptr(RDS_RTD_CM,
321330
"DISCONNECT event - dropping conn %p <%pI6c,%pI6c,%d>\n",

0 commit comments

Comments
 (0)