Skip to content

Commit cb2cb09

Browse files
Bang NguyenJerry Snitselaar
authored andcommitted
RDS: Remove cond_resched() in RX tasklet
Re-install the base fix 17829338 and replace spin_lock_irqsave(rx_lock)/spin_unlock_ireqrestore(rx_lock) with spin_lock_bh(rx_lock)/spin_unlock_bh(rx_lock) to resolve bugs 18413711 and 18461816. rx_lock is used to prevent concurrent reaping b/w the RX tasklet and worker. Orabug: 18801937 Signed-off-by: Bang Nguyen <[email protected]> Signed-off-by: Chien-Hua Yen <[email protected]> Tested-by: Arvind Shukla <[email protected]> (cherry picked from commit 409138bae9be49ee9782eed244a20774d61d6208) Signed-off-by: Jerry Snitselaar <[email protected]>
1 parent 37f5f84 commit cb2cb09

File tree

2 files changed

+76
-16
lines changed

2 files changed

+76
-16
lines changed

net/rds/ib.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ struct rds_ib_migrate_work {
145145
};
146146

147147
struct rds_ib_rx_work {
148-
struct delayed_work dlywork;
148+
struct delayed_work work;
149149
struct rds_ib_connection *ic;
150150
};
151151

@@ -236,6 +236,10 @@ struct rds_ib_connection {
236236
int i_rcq_vector;
237237

238238
unsigned int i_rx_poll_cq;
239+
struct rds_ib_rx_work i_rx_w;
240+
spinlock_t i_rx_lock;
241+
unsigned int i_rx_wait_for_handler;
242+
atomic_t i_worker_has_rx;
239243
};
240244

241245
/* This assumes that atomic_t is at least 32 bits */

net/rds/ib_cm.c

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,17 @@ static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
329329

330330
while ((nr = ib_poll_cq(cq, RDS_WC_MAX, wcs)) > 0) {
331331
for (i = 0; i < nr; i++) {
332-
333-
if (rx &&
334-
(++ic->i_rx_poll_cq % RDS_IB_RX_LIMIT) == 0)
335-
cond_resched();
336-
332+
if (rx) {
333+
if ((++ic->i_rx_poll_cq % RDS_IB_RX_LIMIT) == 0) {
334+
rdsdebug("connection "
335+
"<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
336+
"RX poll_cq processed %d\n",
337+
NIPQUAD(ic->conn->c_laddr),
338+
NIPQUAD(ic->conn->c_faddr),
339+
ic->conn->c_tos,
340+
ic->i_rx_poll_cq);
341+
}
342+
}
337343
wc = wcs + i;
338344
rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
339345
(unsigned long long)wc->wr_id, wc->status, wc->byte_len,
@@ -344,6 +350,10 @@ static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
344350
else
345351
rds_ib_recv_cqe_handler(ic, wc, ack_state);
346352
}
353+
354+
if (rx && ic->i_rx_poll_cq >= RDS_IB_RX_LIMIT)
355+
break;
356+
347357
}
348358
}
349359

@@ -370,9 +380,14 @@ void rds_ib_tasklet_fn_send(unsigned long data)
370380
rds_send_xmit(ic->conn);
371381
}
372382

373-
void rds_ib_tasklet_fn_recv(unsigned long data)
383+
/*
384+
* Note: rds_ib_rx(): don't call with irqs disabled.
385+
* It calls rds_send_drop_acked() which calls other
386+
* routines that reach into rds_rdma_free_op()
387+
* where irqs_disabled() warning is asserted!
388+
*/
389+
static void rds_ib_rx(struct rds_ib_connection *ic)
374390
{
375-
struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
376391
struct rds_connection *conn = ic->conn;
377392
struct rds_ib_ack_state ack_state;
378393
struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
@@ -390,22 +405,52 @@ void rds_ib_tasklet_fn_recv(unsigned long data)
390405

391406
if (ack_state.ack_next_valid)
392407
rds_ib_set_ack(ic, ack_state.ack_next, ack_state.ack_required);
393-
394408
if (ack_state.ack_recv_valid && ack_state.ack_recv > ic->i_ack_recv) {
395409
rds_send_drop_acked(conn, ack_state.ack_recv, NULL);
396410
ic->i_ack_recv = ack_state.ack_recv;
397411
}
398-
399412
if (rds_conn_up(conn))
400413
rds_ib_attempt_ack(ic);
401414

402415
if (rds_ib_srq_enabled)
403416
if ((atomic_read(&rds_ibdev->srq->s_num_posted) <
404-
rds_ib_srq_hwm_refill) &&
405-
!test_and_set_bit(0, &rds_ibdev->srq->s_refill_gate))
406-
queue_delayed_work(rds_wq, &rds_ibdev->srq->s_refill_w, 0);
417+
rds_ib_srq_hwm_refill) &&
418+
!test_and_set_bit(0, &rds_ibdev->srq->s_refill_gate))
419+
queue_delayed_work(rds_wq,
420+
&rds_ibdev->srq->s_refill_w, 0);
421+
422+
if (ic->i_rx_poll_cq >= RDS_IB_RX_LIMIT) {
423+
ic->i_rx_w.ic = ic;
424+
/* Delay 10 msecs until the RX worker starts reaping again */
425+
queue_delayed_work(rds_aux_wq, &ic->i_rx_w,
426+
msecs_to_jiffies(10));
427+
ic->i_rx_wait_for_handler = 1;
428+
}
429+
}
430+
431+
void rds_ib_tasklet_fn_recv(unsigned long data)
432+
{
433+
struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
434+
435+
spin_lock_bh(&ic->i_rx_lock);
436+
if (ic->i_rx_wait_for_handler)
437+
goto out;
438+
rds_ib_rx(ic);
439+
out:
440+
spin_unlock_bh(&ic->i_rx_lock);
407441
}
408442

443+
static void rds_ib_rx_handler(struct work_struct *_work)
444+
{
445+
struct rds_ib_rx_work *work =
446+
container_of(_work, struct rds_ib_rx_work, work.work);
447+
struct rds_ib_connection *ic = work->ic;
448+
449+
spin_lock_bh(&ic->i_rx_lock);
450+
ic->i_rx_wait_for_handler = 0;
451+
rds_ib_rx(ic);
452+
spin_unlock_bh(&ic->i_rx_lock);
453+
}
409454

410455
static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
411456
{
@@ -1064,9 +1109,18 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
10641109
}
10651110

10661111
/* quiesce tx and rx completion before tearing down */
1067-
wait_event(rds_ib_ring_empty_wait,
1068-
rds_ib_ring_empty(&ic->i_recv_ring) &&
1069-
(atomic_read(&ic->i_signaled_sends) == 0));
1112+
while (!wait_event_timeout(rds_ib_ring_empty_wait,
1113+
rds_ib_ring_empty(&ic->i_recv_ring) &&
1114+
(atomic_read(&ic->i_signaled_sends) == 0),
1115+
msecs_to_jiffies(5000))) {
1116+
1117+
/* Try to reap pending RX completions every 5 secs */
1118+
if (!rds_ib_ring_empty(&ic->i_recv_ring)) {
1119+
spin_lock_bh(&ic->i_rx_lock);
1120+
rds_ib_rx(ic);
1121+
spin_unlock_bh(&ic->i_rx_lock);
1122+
}
1123+
}
10701124

10711125
tasklet_kill(&ic->i_stasklet);
10721126
tasklet_kill(&ic->i_rtasklet);
@@ -1199,6 +1253,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
11991253
spin_lock_init(&ic->i_ack_lock);
12001254
#endif
12011255
atomic_set(&ic->i_signaled_sends, 0);
1256+
spin_lock_init(&ic->i_rx_lock);
12021257

12031258
/*
12041259
* rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -1213,6 +1268,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
12131268
init_completion(&ic->i_last_wqe_complete);
12141269

12151270
INIT_DELAYED_WORK(&ic->i_migrate_w.work, rds_ib_migrate);
1271+
INIT_DELAYED_WORK(&ic->i_rx_w.work, rds_ib_rx_handler);
12161272

12171273
spin_lock_irqsave(&ib_nodev_conns_lock, flags);
12181274
list_add_tail(&ic->ib_node, &ib_nodev_conns);

0 commit comments

Comments
 (0)