Skip to content

Commit f373324

Browse files
josefbacikaxboe
authored andcommitted
nbd: handle single path failures gracefully
Currently if we have multiple connections and one of them goes down we will tear down the whole device. However there's no reason we need to do this as we could have other connections that are working fine. Deal with this by keeping track of the state of the different connections, and if we lose one we mark it as dead and send all IO destined for that socket to one of the other healthy sockets. Any outstanding requests that were on the dead socket will timeout and be re-submitted properly. Signed-off-by: Josef Bacik <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 9b1355d commit f373324

File tree

1 file changed

+125
-26
lines changed

1 file changed

+125
-26
lines changed

drivers/block/nbd.c

Lines changed: 125 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ struct nbd_sock {
4949
struct mutex tx_lock;
5050
struct request *pending;
5151
int sent;
52+
bool dead;
53+
int fallback_index;
5254
};
5355

5456
#define NBD_TIMEDOUT 0
@@ -82,6 +84,7 @@ struct nbd_device {
8284

8385
struct nbd_cmd {
8486
struct nbd_device *nbd;
87+
int index;
8588
struct completion send_complete;
8689
};
8790

@@ -124,6 +127,15 @@ static const char *nbdcmd_to_ascii(int cmd)
124127
return "invalid";
125128
}
126129

130+
static void nbd_mark_nsock_dead(struct nbd_sock *nsock)
131+
{
132+
if (!nsock->dead)
133+
kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
134+
nsock->dead = true;
135+
nsock->pending = NULL;
136+
nsock->sent = 0;
137+
}
138+
127139
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
128140
{
129141
if (bdev->bd_openers <= 1)
@@ -191,7 +203,31 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
191203
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
192204
struct nbd_device *nbd = cmd->nbd;
193205

194-
dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
206+
if (nbd->num_connections > 1) {
207+
dev_err_ratelimited(nbd_to_dev(nbd),
208+
"Connection timed out, retrying\n");
209+
mutex_lock(&nbd->config_lock);
210+
/*
211+
* Hooray we have more connections, requeue this IO, the submit
212+
* path will put it on a real connection.
213+
*/
214+
if (nbd->socks && nbd->num_connections > 1) {
215+
if (cmd->index < nbd->num_connections) {
216+
struct nbd_sock *nsock =
217+
nbd->socks[cmd->index];
218+
mutex_lock(&nsock->tx_lock);
219+
nbd_mark_nsock_dead(nsock);
220+
mutex_unlock(&nsock->tx_lock);
221+
}
222+
mutex_unlock(&nbd->config_lock);
223+
blk_mq_requeue_request(req, true);
224+
return BLK_EH_NOT_HANDLED;
225+
}
226+
mutex_unlock(&nbd->config_lock);
227+
} else {
228+
dev_err_ratelimited(nbd_to_dev(nbd),
229+
"Connection timed out\n");
230+
}
195231
set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
196232
req->errors = -EIO;
197233

@@ -301,6 +337,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
301337
}
302338
iov_iter_advance(&from, sent);
303339
}
340+
cmd->index = index;
304341
request.type = htonl(type);
305342
if (type != NBD_CMD_FLUSH) {
306343
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -328,7 +365,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
328365
}
329366
dev_err_ratelimited(disk_to_dev(nbd->disk),
330367
"Send control failed (result %d)\n", result);
331-
return -EIO;
368+
return -EAGAIN;
332369
}
333370
send_pages:
334371
if (type != NBD_CMD_WRITE)
@@ -370,7 +407,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
370407
dev_err(disk_to_dev(nbd->disk),
371408
"Send data failed (result %d)\n",
372409
result);
373-
return -EIO;
410+
return -EAGAIN;
374411
}
375412
/*
376413
* The completion might already have come in,
@@ -389,6 +426,12 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
389426
return 0;
390427
}
391428

429+
static int nbd_disconnected(struct nbd_device *nbd)
430+
{
431+
return test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) ||
432+
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags);
433+
}
434+
392435
/* NULL returned = something went wrong, inform userspace */
393436
static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
394437
{
@@ -405,8 +448,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
405448
iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
406449
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
407450
if (result <= 0) {
408-
if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
409-
!test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
451+
if (!nbd_disconnected(nbd))
410452
dev_err(disk_to_dev(nbd->disk),
411453
"Receive control failed (result %d)\n", result);
412454
return ERR_PTR(result);
@@ -449,8 +491,19 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
449491
if (result <= 0) {
450492
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
451493
result);
452-
req->errors = -EIO;
453-
return cmd;
494+
/*
495+
* If we've disconnected or we only have 1
496+
* connection then we need to make sure we
497+
* complete this request, otherwise error out
498+
* and let the timeout stuff handle resubmitting
499+
* this request onto another connection.
500+
*/
501+
if (nbd_disconnected(nbd) ||
502+
nbd->num_connections <= 1) {
503+
req->errors = -EIO;
504+
return cmd;
505+
}
506+
return ERR_PTR(-EIO);
454507
}
455508
dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
456509
cmd, bvec.bv_len);
@@ -495,19 +548,17 @@ static void recv_work(struct work_struct *work)
495548
while (1) {
496549
cmd = nbd_read_stat(nbd, args->index);
497550
if (IS_ERR(cmd)) {
551+
struct nbd_sock *nsock = nbd->socks[args->index];
552+
553+
mutex_lock(&nsock->tx_lock);
554+
nbd_mark_nsock_dead(nsock);
555+
mutex_unlock(&nsock->tx_lock);
498556
ret = PTR_ERR(cmd);
499557
break;
500558
}
501559

502560
nbd_end_request(cmd);
503561
}
504-
505-
/*
506-
* We got an error, shut everybody down if this wasn't the result of a
507-
* disconnect request.
508-
*/
509-
if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
510-
sock_shutdown(nbd);
511562
atomic_dec(&nbd->recv_threads);
512563
wake_up(&nbd->recv_wq);
513564
}
@@ -531,6 +582,47 @@ static void nbd_clear_que(struct nbd_device *nbd)
531582
dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
532583
}
533584

585+
static int find_fallback(struct nbd_device *nbd, int index)
586+
{
587+
int new_index = -1;
588+
struct nbd_sock *nsock = nbd->socks[index];
589+
int fallback = nsock->fallback_index;
590+
591+
if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
592+
return new_index;
593+
594+
if (nbd->num_connections <= 1) {
595+
dev_err_ratelimited(disk_to_dev(nbd->disk),
596+
"Attempted send on invalid socket\n");
597+
return new_index;
598+
}
599+
600+
if (fallback >= 0 && fallback < nbd->num_connections &&
601+
!nbd->socks[fallback]->dead)
602+
return fallback;
603+
604+
if (nsock->fallback_index < 0 ||
605+
nsock->fallback_index >= nbd->num_connections ||
606+
nbd->socks[nsock->fallback_index]->dead) {
607+
int i;
608+
for (i = 0; i < nbd->num_connections; i++) {
609+
if (i == index)
610+
continue;
611+
if (!nbd->socks[i]->dead) {
612+
new_index = i;
613+
break;
614+
}
615+
}
616+
nsock->fallback_index = new_index;
617+
if (new_index < 0) {
618+
dev_err_ratelimited(disk_to_dev(nbd->disk),
619+
"Dead connection, failed to find a fallback\n");
620+
return new_index;
621+
}
622+
}
623+
new_index = nsock->fallback_index;
624+
return new_index;
625+
}
534626

535627
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
536628
{
@@ -544,22 +636,16 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
544636
"Attempted send on invalid socket\n");
545637
return -EINVAL;
546638
}
547-
548-
if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
549-
dev_err_ratelimited(disk_to_dev(nbd->disk),
550-
"Attempted send on closed socket\n");
551-
return -EINVAL;
552-
}
553-
554639
req->errors = 0;
555-
640+
again:
556641
nsock = nbd->socks[index];
557642
mutex_lock(&nsock->tx_lock);
558-
if (unlikely(!nsock->sock)) {
643+
if (nsock->dead) {
644+
index = find_fallback(nbd, index);
559645
mutex_unlock(&nsock->tx_lock);
560-
dev_err_ratelimited(disk_to_dev(nbd->disk),
561-
"Attempted send on closed socket\n");
562-
return -EINVAL;
646+
if (index < 0)
647+
return -EIO;
648+
goto again;
563649
}
564650

565651
/* Handle the case that we have a pending request that was partially
@@ -572,7 +658,18 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
572658
ret = 0;
573659
goto out;
574660
}
661+
/*
662+
* Some failures are related to the link going down, so anything that
663+
* returns EAGAIN can be retried on a different socket.
664+
*/
575665
ret = nbd_send_cmd(nbd, cmd, index);
666+
if (ret == -EAGAIN) {
667+
dev_err_ratelimited(disk_to_dev(nbd->disk),
668+
"Request send failed trying another connection\n");
669+
nbd_mark_nsock_dead(nsock);
670+
mutex_unlock(&nsock->tx_lock);
671+
goto again;
672+
}
576673
out:
577674
mutex_unlock(&nsock->tx_lock);
578675
return ret;
@@ -646,6 +743,8 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
646743

647744
nbd->socks = socks;
648745

746+
nsock->fallback_index = -1;
747+
nsock->dead = false;
649748
mutex_init(&nsock->tx_lock);
650749
nsock->sock = sock;
651750
nsock->pending = NULL;

0 commit comments

Comments
 (0)