Skip to content

Commit c79f25a

Browse files
Merge branch 'topic/uek-4.1/ofed' of git://ca-git.us.oracle.com/linux-uek-ofed into topic/uek-4.1/ofed
* 'topic/uek-4.1/ofed' of git://ca-git.us.oracle.com/linux-uek-ofed: RDS-TCP: Reset tcp callbacks if re-using an outgoing socket in rds_tcp_accept_one() RDS: Invoke ->laddr_check() in rds_bind() for explicitly bound transports. RDS: rds_conn_lookup() should factor in the struct net for a match RDS: Use a single TCP socket for both send and receive. RDS-TCP: Do not bloat sndbuf/rcvbuf in rds_tcp_tune RDS-TCP: Set up MSG_MORE and MSG_SENDPAGE_NOTLAST as appropriate in rds_tcp_ Revert "rds_rdma: rds_sendmsg should return EAGAIN if connection not setup" rds: make sure base connection is up on both sides rds_ib/iw: fixed big endianness conversion issue for dp->dp_ack_seq RDS: fix race condition when sending a message on unbound socket. RDS: verify the underlying transport exists before creating a connection mlx4: indicate memory resource exhaustion IB/mlx4: Use correct order of variables in log message mlx4_core: Introduce restrictions for PD update
2 parents 9a9e21b + 0791900 commit c79f25a

File tree

13 files changed

+105
-74
lines changed

13 files changed

+105
-74
lines changed

drivers/net/ethernet/mellanox/mlx4/cmd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1031,7 +1031,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
10311031
if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
10321032
smp->method == IB_MGMT_METHOD_GET) || network_view) {
10331033
mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
1034-
slave, smp->method, smp->mgmt_class,
1034+
slave, smp->mgmt_class, smp->method,
10351035
network_view ? "Network" : "Host",
10361036
be16_to_cpu(smp->attr_id));
10371037
return -EPERM;

drivers/net/ethernet/mellanox/mlx4/mr.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,13 @@ u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
160160
seg_order = max_t(int, order - log_mtts_per_seg, 0);
161161

162162
seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
163-
if (seg == -1)
163+
if (seg == -1) {
164+
printk_once(KERN_NOTICE
165+
"[%d]: Exhausted MTT entries, current size=%u. "
166+
"Try updating log_num_mtt module parameter\n",
167+
task_pid_nr(current), mr_table->mtt_buddy.max_order);
164168
return -1;
169+
}
165170

166171
offset = seg * (1 << log_mtts_per_seg);
167172

@@ -445,7 +450,15 @@ int __mlx4_mpt_reserve(struct mlx4_dev *dev)
445450
{
446451
struct mlx4_priv *priv = mlx4_priv(dev);
447452

448-
return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
453+
int ret = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
454+
455+
if (ret == -1)
456+
printk_once(KERN_NOTICE
457+
"[%d]: MR: Exhausted MPT entries, current size=%u. "
458+
"Try updating log_num_mpt module parameter\n",
459+
task_pid_nr(current), priv->mr_table.mpt_bitmap.max);
460+
461+
return ret;
449462
}
450463

451464
static int mlx4_mpt_reserve(struct mlx4_dev *dev)
@@ -1009,7 +1022,7 @@ EXPORT_SYMBOL_GPL(mlx4_set_fmr_pd);
10091022
int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
10101023
int npages, u64 iova, u32 *lkey, u32 *rkey)
10111024
{
1012-
u32 key;
1025+
u32 key, pdflags;
10131026
int i, err;
10141027

10151028
err = mlx4_check_fmr(fmr, page_list, npages, iova);
@@ -1041,7 +1054,11 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list
10411054
fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
10421055
fmr->mpt->start = cpu_to_be64(iova);
10431056

1044-
fmr->mpt->pd_flags = cpu_to_be32(fmr->mr.pd | MLX4_MPT_PD_FLAG_EN_INV);
1057+
pdflags = be32_to_cpu(fmr->mpt->pd_flags) & ~MLX4_MPT_PD_MASK;
1058+
if (mlx4_is_mfunc(dev))
1059+
pdflags &= ~MLX4_MPT_PD_VF_MASK;
1060+
fmr->mpt->pd_flags = cpu_to_be32(pdflags | fmr->mr.pd |
1061+
MLX4_MPT_PD_FLAG_EN_INV);
10451062
if (fmr->mr.mtt.order >= 0 && fmr->mr.mtt.page_shift == 0) {
10461063
fmr->mpt->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
10471064
MLX4_MPT_PD_FLAG_RAE);

net/rds/af_rds.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,8 @@ static int rds_user_reset(struct rds_sock *rs, char __user *optval, int optlen)
325325
sizeof(struct rds_reset)))
326326
return -EFAULT;
327327

328-
conn = rds_conn_find(reset.src.s_addr, reset.dst.s_addr,
328+
conn = rds_conn_find(sock_net(rds_rs_to_sk(rs)),
329+
reset.src.s_addr, reset.dst.s_addr,
329330
rs->rs_transport, reset.tos);
330331

331332
if (conn) {

net/rds/bind.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,14 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
198198
goto out;
199199

200200
if (rs->rs_transport) { /* previously bound */
201-
ret = 0;
201+
trans = rs->rs_transport;
202+
if (trans->laddr_check(sock_net(sock->sk),
203+
sin->sin_addr.s_addr) != 0) {
204+
ret = -ENOPROTOOPT;
205+
rds_remove_bound(rs);
206+
} else {
207+
ret = 0;
208+
}
202209
goto out;
203210
}
204211
trans = rds_trans_get_preferred(sock_net(sock->sk),

net/rds/connection.c

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
6969
} while (0)
7070

7171
/* rcu read lock must be held or the connection spinlock */
72-
static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
72+
static struct rds_connection *rds_conn_lookup(struct net *net,
73+
struct hlist_head *head,
7374
__be32 laddr, __be32 faddr,
7475
struct rds_transport *trans,
7576
u8 tos)
@@ -79,7 +80,8 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
7980
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
8081
if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
8182
conn->c_tos == tos &&
82-
conn->c_trans == trans) {
83+
conn->c_trans == trans &&
84+
net == rds_conn_net(conn)) {
8385
ret = conn;
8486
break;
8587
}
@@ -129,12 +131,9 @@ static struct rds_connection *__rds_conn_create(struct net *net,
129131
struct rds_transport *loop_trans;
130132
unsigned long flags;
131133
int ret;
132-
struct rds_transport *otrans = trans;
133134

134-
if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
135-
goto new_conn;
136135
rcu_read_lock();
137-
conn = rds_conn_lookup(head, laddr, faddr, trans, tos);
136+
conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos);
138137
if (conn
139138
&& conn->c_loopback
140139
&& conn->c_trans != &rds_loop_transport
@@ -151,7 +150,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
151150
if (conn)
152151
goto out;
153152

154-
new_conn:
155153
conn = kmem_cache_alloc(rds_conn_slab, gfp);
156154
if (!conn) {
157155
conn = ERR_PTR(-ENOMEM);
@@ -209,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
209207

210208
atomic_set(&conn->c_state, RDS_CONN_DOWN);
211209
conn->c_send_gen = 0;
210+
conn->c_outgoing = (is_outgoing ? 1 : 0);
212211
conn->c_reconnect_jiffies = 0;
213212
conn->c_reconnect_start = get_seconds();
214213
conn->c_reconnect_warn = 1;
@@ -255,22 +254,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,
255254
/* Creating normal conn */
256255
struct rds_connection *found;
257256

258-
if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
259-
found = NULL;
260-
else
261-
found = rds_conn_lookup(head, laddr, faddr, trans, tos);
257+
found = rds_conn_lookup(net, head, laddr, faddr, trans, tos);
262258
if (found) {
263259
trans->conn_free(conn->c_transport_data);
264260
kmem_cache_free(rds_conn_slab, conn);
265261
conn = found;
266262
} else {
267-
if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) ||
268-
(otrans->t_type != RDS_TRANS_TCP)) {
269-
/* Only the active side should be added to
270-
* reconnect list for RDS-TCP.
271-
*/
272-
hlist_add_head_rcu(&conn->c_hash_node, head);
273-
}
263+
hlist_add_head_rcu(&conn->c_hash_node, head);
274264
rds_cong_add_conn(conn);
275265
rds_conn_count++;
276266
}
@@ -299,14 +289,15 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
299289
}
300290
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
301291

302-
struct rds_connection *rds_conn_find(__be32 laddr, __be32 faddr,
303-
struct rds_transport *trans, u8 tos)
292+
struct rds_connection *rds_conn_find(struct net *net, __be32 laddr,
293+
__be32 faddr, struct rds_transport *trans,
294+
u8 tos)
304295
{
305296
struct rds_connection *conn;
306297
struct hlist_head *head = rds_conn_bucket(laddr, faddr);
307298

308299
rcu_read_lock();
309-
conn = rds_conn_lookup(head, laddr, faddr, trans, tos);
300+
conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos);
310301
rcu_read_unlock();
311302

312303
return conn;
@@ -369,11 +360,16 @@ void rds_conn_shutdown(struct rds_connection *conn, int restart)
369360
rcu_read_lock();
370361
if (!hlist_unhashed(&conn->c_hash_node) && restart) {
371362
rcu_read_unlock();
372-
rds_rtd(RDS_RTD_CM_EXT,
373-
"queueing reconnect request... <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
374-
NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
375-
conn->c_tos);
376-
rds_queue_reconnect(conn);
363+
if (conn->c_trans->t_type != RDS_TRANS_TCP ||
364+
conn->c_outgoing == 1) {
365+
rds_rtd(RDS_RTD_CM_EXT,
366+
"queueing reconnect request... "
367+
"<%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
368+
NIPQUAD(conn->c_laddr),
369+
NIPQUAD(conn->c_faddr),
370+
conn->c_tos);
371+
rds_queue_reconnect(conn);
372+
}
377373
} else {
378374
rcu_read_unlock();
379375
}

net/rds/ib_cm.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
290290
dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
291291
dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
292292
dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
293-
dp->dp_ack_seq = rds_ib_piggyb_ack(ic);
293+
dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic));
294294
dp->dp_tos = conn->c_tos;
295295

296296
/* Advertise flow control */
@@ -822,6 +822,20 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
822822
}
823823
}
824824

825+
/*
826+
* Make sure to have zero lane connection up on both sides,
827+
* to avoid establishing connection on non-ideal path records.
828+
*/
829+
if (dp->dp_tos && rds_conn_state(conn->c_base_conn) != RDS_CONN_UP) {
830+
printk(KERN_INFO "RDS/IB: connection "
831+
"<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
832+
"incoming REQ with base connection down, retry\n",
833+
NIPQUAD(conn->c_laddr),
834+
NIPQUAD(conn->c_faddr),
835+
conn->c_tos);
836+
rds_conn_drop(conn);
837+
}
838+
825839
/*
826840
* The connection request may occur while the
827841
* previous connection exist, e.g. in case of failover.

net/rds/iw_cm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ static void rds_iw_cm_fill_conn_param(struct rds_connection *conn,
121121
dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
122122
dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
123123
dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IW_SUPPORTED_PROTOCOLS);
124-
dp->dp_ack_seq = rds_iw_piggyb_ack(ic);
124+
dp->dp_ack_seq = cpu_to_be64(rds_iw_piggyb_ack(ic));
125125

126126
/* Advertise flow control */
127127
if (ic->i_flowctl) {

net/rds/rds.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ struct rds_connection {
9999
struct hlist_node c_hash_node;
100100
__be32 c_laddr;
101101
__be32 c_faddr;
102-
unsigned int c_loopback:1;
102+
unsigned int c_loopback:1,
103+
c_outgoing:1,
104+
c_pad_to_32:30;
103105
struct rds_connection *c_passive;
104106

105107
struct rds_cong_map *c_lcong;
@@ -706,7 +708,8 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
706708
__be32 laddr, __be32 faddr,
707709
struct rds_transport *trans,
708710
u8 tos, gfp_t gfp);
709-
struct rds_connection *rds_conn_find(__be32 laddr, __be32 faddr,
711+
struct rds_connection *rds_conn_find(struct net *net, __be32 laddr,
712+
__be32 faddr,
710713
struct rds_transport *trans, u8 tos);
711714
void rds_conn_shutdown(struct rds_connection *conn, int restart);
712715
void rds_conn_destroy(struct rds_connection *conn);

net/rds/recv.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,8 @@ rds_recv_route(struct rds_connection *conn, struct rds_incoming *inc,
358358
nconn = conn;
359359
} else {
360360
/* reroute to a new conn structure, possibly the same one */
361-
nconn = rds_conn_find(dst->saddr, dst->daddr, conn->c_trans,
361+
nconn = rds_conn_find(rds_conn_net(conn),
362+
dst->saddr, dst->daddr, conn->c_trans,
362363
conn->c_tos);
363364
}
364365

net/rds/send.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
12281228

12291229
rm->m_daddr = daddr;
12301230

1231+
/* Parse any control messages the user may have included. */
1232+
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
1233+
if (ret)
1234+
goto out;
1235+
12311236
if (rm->rdma.op_active)
12321237
total_payload_len += rm->rdma.op_bytes;
12331238

@@ -1322,16 +1327,6 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
13221327
goto out;
13231328
}
13241329

1325-
if (!rds_conn_up(conn)) {
1326-
ret = -EAGAIN;
1327-
goto out;
1328-
}
1329-
1330-
/* Parse any control messages the user may have included. */
1331-
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
1332-
if (ret)
1333-
goto out;
1334-
13351330
while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
13361331
dport, &queued)) {
13371332
rds_stats_inc(s_send_queue_full);

net/rds/tcp.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,21 +65,13 @@ void rds_tcp_nonagle(struct socket *sock)
6565
set_fs(oldfs);
6666
}
6767

68+
/* All module specific customizations to the RDS-TCP socket should be done in
69+
* rds_tcp_tune() and applied after socket creation. In general these
70+
* customizations should be tunable via module_param()
71+
*/
6872
void rds_tcp_tune(struct socket *sock)
6973
{
70-
struct sock *sk = sock->sk;
71-
7274
rds_tcp_nonagle(sock);
73-
74-
/*
75-
* We're trying to saturate gigabit with the default,
76-
* see svc_sock_setbufsize().
77-
*/
78-
lock_sock(sk);
79-
sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE;
80-
sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE;
81-
sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
82-
release_sock(sk);
8375
}
8476

8577
u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)

net/rds/tcp_listen.c

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -109,28 +109,27 @@ int rds_tcp_accept_one(struct socket *sock)
109109
goto out;
110110
}
111111
/* An incoming SYN request came in, and TCP just accepted it.
112-
* We always create a new conn for listen side of TCP, and do not
113-
* add it to the c_hash_list.
114112
*
115113
* If the client reboots, this conn will need to be cleaned up.
116114
* rds_tcp_state_change() will do that cleanup
117115
*/
118116
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
119-
WARN_ON(!rs_tcp || rs_tcp->t_sock);
120-
121-
/*
122-
* see the comment above rds_queue_delayed_reconnect()
123-
*/
124-
if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
125-
if (rds_conn_state(conn) == RDS_CONN_UP)
126-
rds_tcp_stats_inc(s_tcp_listen_closed_stale);
127-
else
128-
rds_tcp_stats_inc(s_tcp_connect_raced);
129-
rds_conn_drop(conn);
117+
if (rs_tcp->t_sock &&
118+
ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
119+
struct sock *nsk = new_sock->sk;
120+
121+
nsk->sk_user_data = NULL;
122+
nsk->sk_prot->disconnect(nsk, 0);
123+
tcp_done(nsk);
124+
new_sock = NULL;
130125
ret = 0;
131126
goto out;
127+
} else if (rs_tcp->t_sock) {
128+
rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
129+
conn->c_outgoing = 0;
132130
}
133131

132+
rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
134133
rds_tcp_set_callbacks(new_sock, conn);
135134
rds_connect_complete(conn);
136135
new_sock = NULL;

0 commit comments

Comments
 (0)