Skip to content

Commit d3e9f58

Browse files
Bang NguyenMukesh Kacker
authored andcommitted
RDS: Remove cond_resched() in RX tasklet
Re-install the base fix 17829338 and replace spin_lock_irqsave(rx_lock)/spin_unlock_ireqrestore(rx_lock) with spin_lock_bh(rx_lock)/spin_unlock_bh(rx_lock) to resolve bugs 18413711 and 18461816. rx_lock is used to prevent concurrent reaping b/w the RX tasklet and worker. Orabug: 18801937 Signed-off-by: Bang Nguyen <[email protected]> Signed-off-by: Chien-Hua Yen <[email protected]> Tested-by: Arvind Shukla <[email protected]> (cherry picked from commit 409138bae9be49ee9782eed244a20774d61d6208) Signed-off-by: Jerry Snitselaar <[email protected]> (cherry picked from commit cb2cb09)
1 parent a83382c commit d3e9f58

File tree

2 files changed

+76
-16
lines changed

2 files changed

+76
-16
lines changed

net/rds/ib.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ struct rds_ib_migrate_work {
145145
};
146146

147147
struct rds_ib_rx_work {
148-
struct delayed_work dlywork;
148+
struct delayed_work work;
149149
struct rds_ib_connection *ic;
150150
};
151151

@@ -236,6 +236,10 @@ struct rds_ib_connection {
236236
int i_rcq_vector;
237237

238238
unsigned int i_rx_poll_cq;
239+
struct rds_ib_rx_work i_rx_w;
240+
spinlock_t i_rx_lock;
241+
unsigned int i_rx_wait_for_handler;
242+
atomic_t i_worker_has_rx;
239243
};
240244

241245
/* This assumes that atomic_t is at least 32 bits */

net/rds/ib_cm.c

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -333,11 +333,17 @@ static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
333333

334334
while ((nr = ib_poll_cq(cq, RDS_WC_MAX, wcs)) > 0) {
335335
for (i = 0; i < nr; i++) {
336-
337-
if (rx &&
338-
(++ic->i_rx_poll_cq % RDS_IB_RX_LIMIT) == 0)
339-
cond_resched();
340-
336+
if (rx) {
337+
if ((++ic->i_rx_poll_cq % RDS_IB_RX_LIMIT) == 0) {
338+
rdsdebug("connection "
339+
"<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
340+
"RX poll_cq processed %d\n",
341+
NIPQUAD(ic->conn->c_laddr),
342+
NIPQUAD(ic->conn->c_faddr),
343+
ic->conn->c_tos,
344+
ic->i_rx_poll_cq);
345+
}
346+
}
341347
wc = wcs + i;
342348
rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
343349
(unsigned long long)wc->wr_id, wc->status, wc->byte_len,
@@ -348,6 +354,10 @@ static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
348354
else
349355
rds_ib_recv_cqe_handler(ic, wc, ack_state);
350356
}
357+
358+
if (rx && ic->i_rx_poll_cq >= RDS_IB_RX_LIMIT)
359+
break;
360+
351361
}
352362
}
353363

@@ -374,9 +384,14 @@ void rds_ib_tasklet_fn_send(unsigned long data)
374384
rds_send_xmit(ic->conn);
375385
}
376386

377-
void rds_ib_tasklet_fn_recv(unsigned long data)
387+
/*
388+
* Note: rds_ib_rx(): don't call with irqs disabled.
389+
* It calls rds_send_drop_acked() which calls other
390+
* routines that reach into rds_rdma_free_op()
391+
* where irqs_disabled() warning is asserted!
392+
*/
393+
static void rds_ib_rx(struct rds_ib_connection *ic)
378394
{
379-
struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
380395
struct rds_connection *conn = ic->conn;
381396
struct rds_ib_ack_state ack_state;
382397
struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
@@ -394,22 +409,52 @@ void rds_ib_tasklet_fn_recv(unsigned long data)
394409

395410
if (ack_state.ack_next_valid)
396411
rds_ib_set_ack(ic, ack_state.ack_next, ack_state.ack_required);
397-
398412
if (ack_state.ack_recv_valid && ack_state.ack_recv > ic->i_ack_recv) {
399413
rds_send_drop_acked(conn, ack_state.ack_recv, NULL);
400414
ic->i_ack_recv = ack_state.ack_recv;
401415
}
402-
403416
if (rds_conn_up(conn))
404417
rds_ib_attempt_ack(ic);
405418

406419
if (rds_ib_srq_enabled)
407420
if ((atomic_read(&rds_ibdev->srq->s_num_posted) <
408-
rds_ib_srq_hwm_refill) &&
409-
!test_and_set_bit(0, &rds_ibdev->srq->s_refill_gate))
410-
queue_delayed_work(rds_wq, &rds_ibdev->srq->s_refill_w, 0);
421+
rds_ib_srq_hwm_refill) &&
422+
!test_and_set_bit(0, &rds_ibdev->srq->s_refill_gate))
423+
queue_delayed_work(rds_wq,
424+
&rds_ibdev->srq->s_refill_w, 0);
425+
426+
if (ic->i_rx_poll_cq >= RDS_IB_RX_LIMIT) {
427+
ic->i_rx_w.ic = ic;
428+
/* Delay 10 msecs until the RX worker starts reaping again */
429+
queue_delayed_work(rds_aux_wq, &ic->i_rx_w,
430+
msecs_to_jiffies(10));
431+
ic->i_rx_wait_for_handler = 1;
432+
}
433+
}
434+
435+
void rds_ib_tasklet_fn_recv(unsigned long data)
436+
{
437+
struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
438+
439+
spin_lock_bh(&ic->i_rx_lock);
440+
if (ic->i_rx_wait_for_handler)
441+
goto out;
442+
rds_ib_rx(ic);
443+
out:
444+
spin_unlock_bh(&ic->i_rx_lock);
411445
}
412446

447+
static void rds_ib_rx_handler(struct work_struct *_work)
448+
{
449+
struct rds_ib_rx_work *work =
450+
container_of(_work, struct rds_ib_rx_work, work.work);
451+
struct rds_ib_connection *ic = work->ic;
452+
453+
spin_lock_bh(&ic->i_rx_lock);
454+
ic->i_rx_wait_for_handler = 0;
455+
rds_ib_rx(ic);
456+
spin_unlock_bh(&ic->i_rx_lock);
457+
}
413458

414459
static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
415460
{
@@ -1083,9 +1128,18 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
10831128
}
10841129

10851130
/* quiesce tx and rx completion before tearing down */
1086-
wait_event(rds_ib_ring_empty_wait,
1087-
rds_ib_ring_empty(&ic->i_recv_ring) &&
1088-
(atomic_read(&ic->i_signaled_sends) == 0));
1131+
while (!wait_event_timeout(rds_ib_ring_empty_wait,
1132+
rds_ib_ring_empty(&ic->i_recv_ring) &&
1133+
(atomic_read(&ic->i_signaled_sends) == 0),
1134+
msecs_to_jiffies(5000))) {
1135+
1136+
/* Try to reap pending RX completions every 5 secs */
1137+
if (!rds_ib_ring_empty(&ic->i_recv_ring)) {
1138+
spin_lock_bh(&ic->i_rx_lock);
1139+
rds_ib_rx(ic);
1140+
spin_unlock_bh(&ic->i_rx_lock);
1141+
}
1142+
}
10891143

10901144
tasklet_kill(&ic->i_stasklet);
10911145
tasklet_kill(&ic->i_rtasklet);
@@ -1222,6 +1276,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
12221276
spin_lock_init(&ic->i_ack_lock);
12231277
#endif
12241278
atomic_set(&ic->i_signaled_sends, 0);
1279+
spin_lock_init(&ic->i_rx_lock);
12251280

12261281
/*
12271282
* rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -1236,6 +1291,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
12361291
init_completion(&ic->i_last_wqe_complete);
12371292

12381293
INIT_DELAYED_WORK(&ic->i_migrate_w.work, rds_ib_migrate);
1294+
INIT_DELAYED_WORK(&ic->i_rx_w.work, rds_ib_rx_handler);
12391295

12401296
spin_lock_irqsave(&ib_nodev_conns_lock, flags);
12411297
list_add_tail(&ic->ib_node, &ib_nodev_conns);

0 commit comments

Comments
 (0)