Skip to content

Commit f9ff0da

Browse files
lgeaxboe
authored andcommitted
drbd: allow parallel flushes for multi-volume resources
To maintain write-order fidelity accros all volumes in a DRBD resource, the receiver of a P_BARRIER needs to issue flushes to all volumes. We used to do this by calling blkdev_issue_flush(), synchronously, one volume at a time. We now submit all flushes to all volumes in parallel, then wait for all completions, to reduce worst-case latencies on multi-volume resources. Signed-off-by: Philipp Reisner <[email protected]> Signed-off-by: Lars Ellenberg <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 0982368 commit f9ff0da

File tree

1 file changed

+89
-25
lines changed

1 file changed

+89
-25
lines changed

drivers/block/drbd/drbd_receiver.c

Lines changed: 89 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,13 +1204,84 @@ static int drbd_recv_header(struct drbd_connection *connection, struct packet_in
12041204
return err;
12051205
}
12061206

1207-
static void drbd_flush(struct drbd_connection *connection)
1207+
/* This is blkdev_issue_flush, but asynchronous.
1208+
* We want to submit to all component volumes in parallel,
1209+
* then wait for all completions.
1210+
*/
1211+
struct issue_flush_context {
1212+
atomic_t pending;
1213+
int error;
1214+
struct completion done;
1215+
};
1216+
struct one_flush_context {
1217+
struct drbd_device *device;
1218+
struct issue_flush_context *ctx;
1219+
};
1220+
1221+
void one_flush_endio(struct bio *bio)
12081222
{
1209-
int rv;
1210-
struct drbd_peer_device *peer_device;
1211-
int vnr;
1223+
struct one_flush_context *octx = bio->bi_private;
1224+
struct drbd_device *device = octx->device;
1225+
struct issue_flush_context *ctx = octx->ctx;
1226+
1227+
if (bio->bi_error) {
1228+
ctx->error = bio->bi_error;
1229+
drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
1230+
}
1231+
kfree(octx);
1232+
bio_put(bio);
1233+
1234+
clear_bit(FLUSH_PENDING, &device->flags);
1235+
put_ldev(device);
1236+
kref_put(&device->kref, drbd_destroy_device);
1237+
1238+
if (atomic_dec_and_test(&ctx->pending))
1239+
complete(&ctx->done);
1240+
}
1241+
1242+
static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1243+
{
1244+
struct bio *bio = bio_alloc(GFP_NOIO, 0);
1245+
struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
1246+
if (!bio || !octx) {
1247+
drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
1248+
/* FIXME: what else can I do now? disconnecting or detaching
1249+
* really does not help to improve the state of the world, either.
1250+
*/
1251+
kfree(octx);
1252+
if (bio)
1253+
bio_put(bio);
12121254

1255+
ctx->error = -ENOMEM;
1256+
put_ldev(device);
1257+
kref_put(&device->kref, drbd_destroy_device);
1258+
return;
1259+
}
1260+
1261+
octx->device = device;
1262+
octx->ctx = ctx;
1263+
bio->bi_bdev = device->ldev->backing_bdev;
1264+
bio->bi_private = octx;
1265+
bio->bi_end_io = one_flush_endio;
1266+
bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH);
1267+
1268+
device->flush_jif = jiffies;
1269+
set_bit(FLUSH_PENDING, &device->flags);
1270+
atomic_inc(&ctx->pending);
1271+
submit_bio(bio);
1272+
}
1273+
1274+
static void drbd_flush(struct drbd_connection *connection)
1275+
{
12131276
if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1277+
struct drbd_peer_device *peer_device;
1278+
struct issue_flush_context ctx;
1279+
int vnr;
1280+
1281+
atomic_set(&ctx.pending, 1);
1282+
ctx.error = 0;
1283+
init_completion(&ctx.done);
1284+
12141285
rcu_read_lock();
12151286
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
12161287
struct drbd_device *device = peer_device->device;
@@ -1220,31 +1291,24 @@ static void drbd_flush(struct drbd_connection *connection)
12201291
kref_get(&device->kref);
12211292
rcu_read_unlock();
12221293

1223-
/* Right now, we have only this one synchronous code path
1224-
* for flushes between request epochs.
1225-
* We may want to make those asynchronous,
1226-
* or at least parallelize the flushes to the volume devices.
1227-
*/
1228-
device->flush_jif = jiffies;
1229-
set_bit(FLUSH_PENDING, &device->flags);
1230-
rv = blkdev_issue_flush(device->ldev->backing_bdev,
1231-
GFP_NOIO, NULL);
1232-
clear_bit(FLUSH_PENDING, &device->flags);
1233-
if (rv) {
1234-
drbd_info(device, "local disk flush failed with status %d\n", rv);
1235-
/* would rather check on EOPNOTSUPP, but that is not reliable.
1236-
* don't try again for ANY return value != 0
1237-
* if (rv == -EOPNOTSUPP) */
1238-
drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1239-
}
1240-
put_ldev(device);
1241-
kref_put(&device->kref, drbd_destroy_device);
1294+
submit_one_flush(device, &ctx);
12421295

12431296
rcu_read_lock();
1244-
if (rv)
1245-
break;
12461297
}
12471298
rcu_read_unlock();
1299+
1300+
/* Do we want to add a timeout,
1301+
* if disk-timeout is set? */
1302+
if (!atomic_dec_and_test(&ctx.pending))
1303+
wait_for_completion(&ctx.done);
1304+
1305+
if (ctx.error) {
1306+
/* would rather check on EOPNOTSUPP, but that is not reliable.
1307+
* don't try again for ANY return value != 0
1308+
* if (rv == -EOPNOTSUPP) */
1309+
/* Any error is already reported by bio_endio callback. */
1310+
drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1311+
}
12481312
}
12491313
}
12501314

0 commit comments

Comments
 (0)