Skip to content

Commit c140e1c

Browse files
committed
dm thin: use per thin device deferred bio lists
The thin-pool previously only had a single deferred_bios list that would collect bios for all thin devices in the pool. Split this per-pool deferred_bios list out to per-thin deferred_bios_list -- doing so enables increased parallelism when processing deferred bios. And now that each thin device has it's own deferred_bios_list we can sort all bios in the list using logical sector. The requeue code in error handling path is also cleaner as a side-effect. Signed-off-by: Mike Snitzer <[email protected]> Acked-by: Joe Thornber <[email protected]>
1 parent 760fe67 commit c140e1c

File tree

1 file changed

+104
-61
lines changed

1 file changed

+104
-61
lines changed

drivers/md/dm-thin.c

Lines changed: 104 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/dm-io.h>
1313
#include <linux/dm-kcopyd.h>
1414
#include <linux/list.h>
15+
#include <linux/rculist.h>
1516
#include <linux/init.h>
1617
#include <linux/module.h>
1718
#include <linux/slab.h>
@@ -178,12 +179,10 @@ struct pool {
178179
unsigned ref_count;
179180

180181
spinlock_t lock;
181-
struct bio_list deferred_bios;
182182
struct bio_list deferred_flush_bios;
183183
struct list_head prepared_mappings;
184184
struct list_head prepared_discards;
185-
186-
struct bio_list retry_on_resume_list;
185+
struct list_head active_thins;
187186

188187
struct dm_deferred_set *shared_read_ds;
189188
struct dm_deferred_set *all_io_ds;
@@ -220,13 +219,17 @@ struct pool_c {
220219
* Target context for a thin.
221220
*/
222221
struct thin_c {
222+
struct list_head list;
223223
struct dm_dev *pool_dev;
224224
struct dm_dev *origin_dev;
225225
dm_thin_id dev_id;
226226

227227
struct pool *pool;
228228
struct dm_thin_device *td;
229229
bool requeue_mode:1;
230+
spinlock_t lock;
231+
struct bio_list deferred_bio_list;
232+
struct bio_list retry_on_resume_list;
230233
};
231234

232235
/*----------------------------------------------------------------*/
@@ -287,9 +290,9 @@ static void cell_defer_no_holder_no_free(struct thin_c *tc,
287290
struct pool *pool = tc->pool;
288291
unsigned long flags;
289292

290-
spin_lock_irqsave(&pool->lock, flags);
291-
dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios);
292-
spin_unlock_irqrestore(&pool->lock, flags);
293+
spin_lock_irqsave(&tc->lock, flags);
294+
dm_cell_release_no_holder(pool->prison, cell, &tc->deferred_bio_list);
295+
spin_unlock_irqrestore(&tc->lock, flags);
293296

294297
wake_worker(pool);
295298
}
@@ -378,46 +381,48 @@ static void requeue_bio_list(struct thin_c *tc, struct bio_list *master)
378381

379382
bio_list_init(&bios);
380383

381-
spin_lock_irqsave(&tc->pool->lock, flags);
384+
spin_lock_irqsave(&tc->lock, flags);
382385
bio_list_merge(&bios, master);
383386
bio_list_init(master);
384-
spin_unlock_irqrestore(&tc->pool->lock, flags);
387+
spin_unlock_irqrestore(&tc->lock, flags);
385388

386-
while ((bio = bio_list_pop(&bios))) {
387-
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
388-
389-
if (h->tc == tc)
390-
bio_endio(bio, DM_ENDIO_REQUEUE);
391-
else
392-
bio_list_add(master, bio);
393-
}
389+
while ((bio = bio_list_pop(&bios)))
390+
bio_endio(bio, DM_ENDIO_REQUEUE);
394391
}
395392

396393
static void requeue_io(struct thin_c *tc)
397394
{
398-
struct pool *pool = tc->pool;
399-
400-
requeue_bio_list(tc, &pool->deferred_bios);
401-
requeue_bio_list(tc, &pool->retry_on_resume_list);
395+
requeue_bio_list(tc, &tc->deferred_bio_list);
396+
requeue_bio_list(tc, &tc->retry_on_resume_list);
402397
}
403398

404-
static void error_retry_list(struct pool *pool)
399+
static void error_thin_retry_list(struct thin_c *tc)
405400
{
406401
struct bio *bio;
407402
unsigned long flags;
408403
struct bio_list bios;
409404

410405
bio_list_init(&bios);
411406

412-
spin_lock_irqsave(&pool->lock, flags);
413-
bio_list_merge(&bios, &pool->retry_on_resume_list);
414-
bio_list_init(&pool->retry_on_resume_list);
415-
spin_unlock_irqrestore(&pool->lock, flags);
407+
spin_lock_irqsave(&tc->lock, flags);
408+
bio_list_merge(&bios, &tc->retry_on_resume_list);
409+
bio_list_init(&tc->retry_on_resume_list);
410+
spin_unlock_irqrestore(&tc->lock, flags);
416411

417412
while ((bio = bio_list_pop(&bios)))
418413
bio_io_error(bio);
419414
}
420415

416+
static void error_retry_list(struct pool *pool)
417+
{
418+
struct thin_c *tc;
419+
420+
rcu_read_lock();
421+
list_for_each_entry_rcu(tc, &pool->active_thins, list)
422+
error_thin_retry_list(tc);
423+
rcu_read_unlock();
424+
}
425+
421426
/*
422427
* This section of code contains the logic for processing a thin device's IO.
423428
* Much of the code depends on pool object resources (lists, workqueues, etc)
@@ -608,9 +613,9 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell)
608613
struct pool *pool = tc->pool;
609614
unsigned long flags;
610615

611-
spin_lock_irqsave(&pool->lock, flags);
612-
cell_release(pool, cell, &pool->deferred_bios);
613-
spin_unlock_irqrestore(&tc->pool->lock, flags);
616+
spin_lock_irqsave(&tc->lock, flags);
617+
cell_release(pool, cell, &tc->deferred_bio_list);
618+
spin_unlock_irqrestore(&tc->lock, flags);
614619

615620
wake_worker(pool);
616621
}
@@ -623,9 +628,9 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c
623628
struct pool *pool = tc->pool;
624629
unsigned long flags;
625630

626-
spin_lock_irqsave(&pool->lock, flags);
627-
cell_release_no_holder(pool, cell, &pool->deferred_bios);
628-
spin_unlock_irqrestore(&pool->lock, flags);
631+
spin_lock_irqsave(&tc->lock, flags);
632+
cell_release_no_holder(pool, cell, &tc->deferred_bio_list);
633+
spin_unlock_irqrestore(&tc->lock, flags);
629634

630635
wake_worker(pool);
631636
}
@@ -1001,12 +1006,11 @@ static void retry_on_resume(struct bio *bio)
10011006
{
10021007
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
10031008
struct thin_c *tc = h->tc;
1004-
struct pool *pool = tc->pool;
10051009
unsigned long flags;
10061010

1007-
spin_lock_irqsave(&pool->lock, flags);
1008-
bio_list_add(&pool->retry_on_resume_list, bio);
1009-
spin_unlock_irqrestore(&pool->lock, flags);
1011+
spin_lock_irqsave(&tc->lock, flags);
1012+
bio_list_add(&tc->retry_on_resume_list, bio);
1013+
spin_unlock_irqrestore(&tc->lock, flags);
10101014
}
10111015

10121016
static bool should_error_unserviceable_bio(struct pool *pool)
@@ -1363,38 +1367,36 @@ static int need_commit_due_to_time(struct pool *pool)
13631367
jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
13641368
}
13651369

1366-
static void process_deferred_bios(struct pool *pool)
1370+
static void process_thin_deferred_bios(struct thin_c *tc)
13671371
{
1372+
struct pool *pool = tc->pool;
13681373
unsigned long flags;
13691374
struct bio *bio;
13701375
struct bio_list bios;
13711376

1377+
if (tc->requeue_mode) {
1378+
requeue_bio_list(tc, &tc->deferred_bio_list);
1379+
return;
1380+
}
1381+
13721382
bio_list_init(&bios);
13731383

1374-
spin_lock_irqsave(&pool->lock, flags);
1375-
bio_list_merge(&bios, &pool->deferred_bios);
1376-
bio_list_init(&pool->deferred_bios);
1377-
spin_unlock_irqrestore(&pool->lock, flags);
1384+
spin_lock_irqsave(&tc->lock, flags);
1385+
bio_list_merge(&bios, &tc->deferred_bio_list);
1386+
bio_list_init(&tc->deferred_bio_list);
1387+
spin_unlock_irqrestore(&tc->lock, flags);
13781388

13791389
while ((bio = bio_list_pop(&bios))) {
1380-
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1381-
struct thin_c *tc = h->tc;
1382-
1383-
if (tc->requeue_mode) {
1384-
bio_endio(bio, DM_ENDIO_REQUEUE);
1385-
continue;
1386-
}
1387-
13881390
/*
13891391
* If we've got no free new_mapping structs, and processing
13901392
* this bio might require one, we pause until there are some
13911393
* prepared mappings to process.
13921394
*/
13931395
if (ensure_next_mapping(pool)) {
1394-
spin_lock_irqsave(&pool->lock, flags);
1395-
bio_list_add(&pool->deferred_bios, bio);
1396-
bio_list_merge(&pool->deferred_bios, &bios);
1397-
spin_unlock_irqrestore(&pool->lock, flags);
1396+
spin_lock_irqsave(&tc->lock, flags);
1397+
bio_list_add(&tc->deferred_bio_list, bio);
1398+
bio_list_merge(&tc->deferred_bio_list, &bios);
1399+
spin_unlock_irqrestore(&tc->lock, flags);
13981400
break;
13991401
}
14001402

@@ -1403,6 +1405,19 @@ static void process_deferred_bios(struct pool *pool)
14031405
else
14041406
pool->process_bio(tc, bio);
14051407
}
1408+
}
1409+
1410+
static void process_deferred_bios(struct pool *pool)
1411+
{
1412+
unsigned long flags;
1413+
struct bio *bio;
1414+
struct bio_list bios;
1415+
struct thin_c *tc;
1416+
1417+
rcu_read_lock();
1418+
list_for_each_entry_rcu(tc, &pool->active_thins, list)
1419+
process_thin_deferred_bios(tc);
1420+
rcu_read_unlock();
14061421

14071422
/*
14081423
* If there are any deferred flush bios, we must commit
@@ -1634,9 +1649,9 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
16341649
unsigned long flags;
16351650
struct pool *pool = tc->pool;
16361651

1637-
spin_lock_irqsave(&pool->lock, flags);
1638-
bio_list_add(&pool->deferred_bios, bio);
1639-
spin_unlock_irqrestore(&pool->lock, flags);
1652+
spin_lock_irqsave(&tc->lock, flags);
1653+
bio_list_add(&tc->deferred_bio_list, bio);
1654+
spin_unlock_irqrestore(&tc->lock, flags);
16401655

16411656
wake_worker(pool);
16421657
}
@@ -1767,10 +1782,19 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
17671782
return bdi_congested(&q->backing_dev_info, bdi_bits);
17681783
}
17691784

1770-
static void __requeue_bios(struct pool *pool)
1785+
static void requeue_bios(struct pool *pool)
17711786
{
1772-
bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list);
1773-
bio_list_init(&pool->retry_on_resume_list);
1787+
unsigned long flags;
1788+
struct thin_c *tc;
1789+
1790+
rcu_read_lock();
1791+
list_for_each_entry_rcu(tc, &pool->active_thins, list) {
1792+
spin_lock_irqsave(&tc->lock, flags);
1793+
bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list);
1794+
bio_list_init(&tc->retry_on_resume_list);
1795+
spin_unlock_irqrestore(&tc->lock, flags);
1796+
}
1797+
rcu_read_unlock();
17741798
}
17751799

17761800
/*----------------------------------------------------------------
@@ -1951,12 +1975,11 @@ static struct pool *pool_create(struct mapped_device *pool_md,
19511975
INIT_WORK(&pool->worker, do_worker);
19521976
INIT_DELAYED_WORK(&pool->waker, do_waker);
19531977
spin_lock_init(&pool->lock);
1954-
bio_list_init(&pool->deferred_bios);
19551978
bio_list_init(&pool->deferred_flush_bios);
19561979
INIT_LIST_HEAD(&pool->prepared_mappings);
19571980
INIT_LIST_HEAD(&pool->prepared_discards);
1981+
INIT_LIST_HEAD(&pool->active_thins);
19581982
pool->low_water_triggered = false;
1959-
bio_list_init(&pool->retry_on_resume_list);
19601983

19611984
pool->shared_read_ds = dm_deferred_set_create();
19621985
if (!pool->shared_read_ds) {
@@ -2501,8 +2524,8 @@ static void pool_resume(struct dm_target *ti)
25012524

25022525
spin_lock_irqsave(&pool->lock, flags);
25032526
pool->low_water_triggered = false;
2504-
__requeue_bios(pool);
25052527
spin_unlock_irqrestore(&pool->lock, flags);
2528+
requeue_bios(pool);
25062529

25072530
do_waker(&pool->waker.work);
25082531
}
@@ -2962,6 +2985,12 @@ static struct target_type pool_target = {
29622985
static void thin_dtr(struct dm_target *ti)
29632986
{
29642987
struct thin_c *tc = ti->private;
2988+
unsigned long flags;
2989+
2990+
spin_lock_irqsave(&tc->pool->lock, flags);
2991+
list_del_rcu(&tc->list);
2992+
spin_unlock_irqrestore(&tc->pool->lock, flags);
2993+
synchronize_rcu();
29652994

29662995
mutex_lock(&dm_thin_pool_table.mutex);
29672996

@@ -3008,6 +3037,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
30083037
r = -ENOMEM;
30093038
goto out_unlock;
30103039
}
3040+
spin_lock_init(&tc->lock);
3041+
bio_list_init(&tc->deferred_bio_list);
3042+
bio_list_init(&tc->retry_on_resume_list);
30113043

30123044
if (argc == 3) {
30133045
r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
@@ -3079,6 +3111,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
30793111

30803112
mutex_unlock(&dm_thin_pool_table.mutex);
30813113

3114+
spin_lock(&tc->pool->lock);
3115+
list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
3116+
spin_unlock(&tc->pool->lock);
3117+
/*
3118+
* This synchronize_rcu() call is needed here otherwise we risk a
3119+
* wake_worker() call finding no bios to process (because the newly
3120+
* added tc isn't yet visible). So this reduces latency since we
3121+
* aren't then dependent on the periodic commit to wake_worker().
3122+
*/
3123+
synchronize_rcu();
3124+
30823125
return 0;
30833126

30843127
bad_target_max_io_len:

0 commit comments

Comments
 (0)