Skip to content

Commit 1e2b44e

Browse files
kcp-gitdavem330
authored andcommitted
rds: Enable RDS IPv6 support
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the listener is now an IPv6 endpoint which accepts both IPv4 and IPv6 connection requests. RDS/RDMA/IB uses a private data (struct rds_ib_connect_private) exchange between endpoints at RDS connection establishment time to support RDMA. This private data exchange uses a 32 bit integer to represent an IP address. This needs to be changed in order to support IPv6. A new private data struct rds6_ib_connect_private is introduced to handle this. To ensure backward compatibility, an IPv6 capable RDS stack uses another RDMA listener port (RDS_CM_PORT) to accept IPv6 connection. And it continues to use the original RDS_PORT for IPv4 RDS connections. When it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to send the connection set up request. v5: Fixed syntax problem (David Miller). v4: Changed port history comments in rds.h (Sowmini Varadhan). v3: Added support to set up IPv4 connection using mapped address (David Miller). Added support to set up connection between link local and non-link addresses. Various review comments from Santosh Shilimkar and Sowmini Varadhan. v2: Fixed bound and peer address scope mismatched issue. Added back rds_connect() IPv6 changes. Signed-off-by: Ka-Cheong Poon <[email protected]> Acked-by: Santosh Shilimkar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent eee2fa6 commit 1e2b44e

File tree

14 files changed

+459
-114
lines changed

14 files changed

+459
-114
lines changed

net/rds/af_rds.c

Lines changed: 77 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,32 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
142142
uaddr_len = sizeof(*sin6);
143143
}
144144
} else {
145-
/* If socket is not yet bound, set the return address family
146-
* to be AF_UNSPEC (value 0) and the address size to be that
147-
* of an IPv4 address.
145+
/* If socket is not yet bound and the socket is connected,
146+
* set the return address family to be the same as the
147+
* connected address, but with 0 address value. If it is not
148+
* connected, set the family to be AF_UNSPEC (value 0) and
149+
* the address size to be that of an IPv4 address.
148150
*/
149151
if (ipv6_addr_any(&rs->rs_bound_addr)) {
150-
sin = (struct sockaddr_in *)uaddr;
151-
memset(sin, 0, sizeof(*sin));
152-
sin->sin_family = AF_UNSPEC;
153-
return sizeof(*sin);
152+
if (ipv6_addr_any(&rs->rs_conn_addr)) {
153+
sin = (struct sockaddr_in *)uaddr;
154+
memset(sin, 0, sizeof(*sin));
155+
sin->sin_family = AF_UNSPEC;
156+
return sizeof(*sin);
157+
}
158+
159+
if (ipv6_addr_type(&rs->rs_conn_addr) &
160+
IPV6_ADDR_MAPPED) {
161+
sin = (struct sockaddr_in *)uaddr;
162+
memset(sin, 0, sizeof(*sin));
163+
sin->sin_family = AF_INET;
164+
return sizeof(*sin);
165+
}
166+
167+
sin6 = (struct sockaddr_in6 *)uaddr;
168+
memset(sin6, 0, sizeof(*sin6));
169+
sin6->sin6_family = AF_INET6;
170+
return sizeof(*sin6);
154171
}
155172
if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
156173
sin = (struct sockaddr_in *)uaddr;
@@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
484501
{
485502
struct sock *sk = sock->sk;
486503
struct sockaddr_in *sin;
504+
struct sockaddr_in6 *sin6;
487505
struct rds_sock *rs = rds_sk_to_rs(sk);
506+
int addr_type;
488507
int ret = 0;
489508

490509
lock_sock(sk);
491510

492-
switch (addr_len) {
493-
case sizeof(struct sockaddr_in):
511+
switch (uaddr->sa_family) {
512+
case AF_INET:
494513
sin = (struct sockaddr_in *)uaddr;
495-
if (sin->sin_family != AF_INET) {
496-
ret = -EAFNOSUPPORT;
514+
if (addr_len < sizeof(struct sockaddr_in)) {
515+
ret = -EINVAL;
497516
break;
498517
}
499518
if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
@@ -509,12 +528,56 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
509528
rs->rs_conn_port = sin->sin_port;
510529
break;
511530

512-
case sizeof(struct sockaddr_in6):
513-
ret = -EPROTONOSUPPORT;
531+
case AF_INET6:
532+
sin6 = (struct sockaddr_in6 *)uaddr;
533+
if (addr_len < sizeof(struct sockaddr_in6)) {
534+
ret = -EINVAL;
535+
break;
536+
}
537+
addr_type = ipv6_addr_type(&sin6->sin6_addr);
538+
if (!(addr_type & IPV6_ADDR_UNICAST)) {
539+
__be32 addr4;
540+
541+
if (!(addr_type & IPV6_ADDR_MAPPED)) {
542+
ret = -EPROTOTYPE;
543+
break;
544+
}
545+
546+
/* It is a mapped address. Need to do some sanity
547+
* checks.
548+
*/
549+
addr4 = sin6->sin6_addr.s6_addr32[3];
550+
if (addr4 == htonl(INADDR_ANY) ||
551+
addr4 == htonl(INADDR_BROADCAST) ||
552+
IN_MULTICAST(ntohl(addr4))) {
553+
ret = -EPROTOTYPE;
554+
break;
555+
}
556+
}
557+
558+
if (addr_type & IPV6_ADDR_LINKLOCAL) {
559+
/* If socket is arleady bound to a link local address,
560+
* the peer address must be on the same link.
561+
*/
562+
if (sin6->sin6_scope_id == 0 ||
563+
(!ipv6_addr_any(&rs->rs_bound_addr) &&
564+
rs->rs_bound_scope_id &&
565+
sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
566+
ret = -EINVAL;
567+
break;
568+
}
569+
/* Remember the connected address scope ID. It will
570+
* be checked against the binding local address when
571+
* the socket is bound.
572+
*/
573+
rs->rs_bound_scope_id = sin6->sin6_scope_id;
574+
}
575+
rs->rs_conn_addr = sin6->sin6_addr;
576+
rs->rs_conn_port = sin6->sin6_port;
514577
break;
515578

516579
default:
517-
ret = -EINVAL;
580+
ret = -EAFNOSUPPORT;
518581
break;
519582
}
520583

net/rds/bind.c

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,10 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
127127
if (!rhashtable_insert_fast(&bind_hash_table,
128128
&rs->rs_bound_node, ht_parms)) {
129129
*port = rs->rs_bound_port;
130+
rs->rs_bound_scope_id = scope_id;
130131
ret = 0;
131-
rdsdebug("rs %p binding to %pI4:%d\n",
132-
rs, &addr, (int)ntohs(*port));
132+
rdsdebug("rs %p binding to %pI6c:%d\n",
133+
rs, addr, (int)ntohs(*port));
133134
break;
134135
} else {
135136
rs->rs_bound_addr = in6addr_any;
@@ -164,23 +165,53 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
164165
struct in6_addr v6addr, *binding_addr;
165166
struct rds_transport *trans;
166167
__u32 scope_id = 0;
168+
int addr_type;
167169
int ret = 0;
168170
__be16 port;
169171

170-
/* We only allow an RDS socket to be bound to an IPv4 address. IPv6
171-
* address support will be added later.
172+
/* We allow an RDS socket to be bound to either IPv4 or IPv6
173+
* address.
172174
*/
173-
if (addr_len == sizeof(struct sockaddr_in)) {
175+
if (uaddr->sa_family == AF_INET) {
174176
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
175177

176-
if (sin->sin_family != AF_INET ||
177-
sin->sin_addr.s_addr == htonl(INADDR_ANY))
178+
if (addr_len < sizeof(struct sockaddr_in) ||
179+
sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
180+
sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
181+
IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
178182
return -EINVAL;
179183
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
180184
binding_addr = &v6addr;
181185
port = sin->sin_port;
182-
} else if (addr_len == sizeof(struct sockaddr_in6)) {
183-
return -EPROTONOSUPPORT;
186+
} else if (uaddr->sa_family == AF_INET6) {
187+
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;
188+
189+
if (addr_len < sizeof(struct sockaddr_in6))
190+
return -EINVAL;
191+
addr_type = ipv6_addr_type(&sin6->sin6_addr);
192+
if (!(addr_type & IPV6_ADDR_UNICAST)) {
193+
__be32 addr4;
194+
195+
if (!(addr_type & IPV6_ADDR_MAPPED))
196+
return -EINVAL;
197+
198+
/* It is a mapped address. Need to do some sanity
199+
* checks.
200+
*/
201+
addr4 = sin6->sin6_addr.s6_addr32[3];
202+
if (addr4 == htonl(INADDR_ANY) ||
203+
addr4 == htonl(INADDR_BROADCAST) ||
204+
IN_MULTICAST(ntohl(addr4)))
205+
return -EINVAL;
206+
}
207+
/* The scope ID must be specified for link local address. */
208+
if (addr_type & IPV6_ADDR_LINKLOCAL) {
209+
if (sin6->sin6_scope_id == 0)
210+
return -EINVAL;
211+
scope_id = sin6->sin6_scope_id;
212+
}
213+
binding_addr = &sin6->sin6_addr;
214+
port = sin6->sin6_port;
184215
} else {
185216
return -EINVAL;
186217
}
@@ -191,6 +222,16 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
191222
ret = -EINVAL;
192223
goto out;
193224
}
225+
/* Socket is connected. The binding address should have the same
226+
* scope ID as the connected address, except the case when one is
227+
* non-link local address (scope_id is 0).
228+
*/
229+
if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id &&
230+
rs->rs_bound_scope_id &&
231+
scope_id != rs->rs_bound_scope_id) {
232+
ret = -EINVAL;
233+
goto out;
234+
}
194235

195236
ret = rds_add_bound(rs, binding_addr, &port, scope_id);
196237
if (ret)

net/rds/connection.c

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
33
*
44
* This software is available to you under a choice of one of two
55
* licenses. You may choose to be licensed under the terms of the GNU
@@ -36,6 +36,7 @@
3636
#include <linux/export.h>
3737
#include <net/ipv6.h>
3838
#include <net/inet6_hashtables.h>
39+
#include <net/addrconf.h>
3940

4041
#include "rds.h"
4142
#include "loop.h"
@@ -200,6 +201,15 @@ static struct rds_connection *__rds_conn_create(struct net *net,
200201
conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
201202
conn->c_faddr = *faddr;
202203
conn->c_dev_if = dev_if;
204+
/* If the local address is link local, set c_bound_if to be the
205+
* index used for this connection. Otherwise, set it to 0 as
206+
* the socket is not bound to an interface. c_bound_if is used
207+
* to look up a socket when a packet is received
208+
*/
209+
if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL)
210+
conn->c_bound_if = dev_if;
211+
else
212+
conn->c_bound_if = 0;
203213

204214
rds_conn_net_set(conn, net);
205215

@@ -486,10 +496,18 @@ void rds_conn_destroy(struct rds_connection *conn)
486496
}
487497
EXPORT_SYMBOL_GPL(rds_conn_destroy);
488498

489-
static void rds_conn_message_info(struct socket *sock, unsigned int len,
490-
struct rds_info_iterator *iter,
491-
struct rds_info_lengths *lens,
492-
int want_send)
499+
static void __rds_inc_msg_cp(struct rds_incoming *inc,
500+
struct rds_info_iterator *iter,
501+
void *saddr, void *daddr, int flip)
502+
{
503+
rds_inc_info_copy(inc, iter, *(__be32 *)saddr,
504+
*(__be32 *)daddr, flip);
505+
}
506+
507+
static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
508+
struct rds_info_iterator *iter,
509+
struct rds_info_lengths *lens,
510+
int want_send)
493511
{
494512
struct hlist_head *head;
495513
struct list_head *list;
@@ -524,18 +542,13 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
524542

525543
/* XXX too lazy to maintain counts.. */
526544
list_for_each_entry(rm, list, m_conn_item) {
527-
__be32 laddr;
528-
__be32 faddr;
529-
530545
total++;
531-
laddr = conn->c_laddr.s6_addr32[3];
532-
faddr = conn->c_faddr.s6_addr32[3];
533546
if (total <= len)
534-
rds_inc_info_copy(&rm->m_inc,
535-
iter,
536-
laddr,
537-
faddr,
538-
0);
547+
__rds_inc_msg_cp(&rm->m_inc,
548+
iter,
549+
&conn->c_laddr,
550+
&conn->c_faddr,
551+
0);
539552
}
540553

541554
spin_unlock_irqrestore(&cp->cp_lock, flags);
@@ -548,6 +561,14 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
548561
lens->each = sizeof(struct rds_info_message);
549562
}
550563

564+
static void rds_conn_message_info(struct socket *sock, unsigned int len,
565+
struct rds_info_iterator *iter,
566+
struct rds_info_lengths *lens,
567+
int want_send)
568+
{
569+
rds_conn_message_info_cmn(sock, len, iter, lens, want_send);
570+
}
571+
551572
static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
552573
struct rds_info_iterator *iter,
553574
struct rds_info_lengths *lens)
@@ -655,6 +676,9 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
655676
struct rds_info_connection *cinfo = buffer;
656677
struct rds_connection *conn = cp->cp_conn;
657678

679+
if (conn->c_isv6)
680+
return 0;
681+
658682
cinfo->next_tx_seq = cp->cp_next_tx_seq;
659683
cinfo->next_rx_seq = cp->cp_next_rx_seq;
660684
cinfo->laddr = conn->c_laddr.s6_addr32[3];

0 commit comments

Comments
 (0)