12
12
#include <linux/dm-io.h>
13
13
#include <linux/dm-kcopyd.h>
14
14
#include <linux/list.h>
15
+ #include <linux/rculist.h>
15
16
#include <linux/init.h>
16
17
#include <linux/module.h>
17
18
#include <linux/slab.h>
@@ -178,12 +179,10 @@ struct pool {
178
179
unsigned ref_count ;
179
180
180
181
spinlock_t lock ;
181
- struct bio_list deferred_bios ;
182
182
struct bio_list deferred_flush_bios ;
183
183
struct list_head prepared_mappings ;
184
184
struct list_head prepared_discards ;
185
-
186
- struct bio_list retry_on_resume_list ;
185
+ struct list_head active_thins ;
187
186
188
187
struct dm_deferred_set * shared_read_ds ;
189
188
struct dm_deferred_set * all_io_ds ;
@@ -220,13 +219,17 @@ struct pool_c {
220
219
* Target context for a thin.
221
220
*/
222
221
struct thin_c {
222
+ struct list_head list ;
223
223
struct dm_dev * pool_dev ;
224
224
struct dm_dev * origin_dev ;
225
225
dm_thin_id dev_id ;
226
226
227
227
struct pool * pool ;
228
228
struct dm_thin_device * td ;
229
229
bool requeue_mode :1 ;
230
+ spinlock_t lock ;
231
+ struct bio_list deferred_bio_list ;
232
+ struct bio_list retry_on_resume_list ;
230
233
};
231
234
232
235
/*----------------------------------------------------------------*/
@@ -287,9 +290,9 @@ static void cell_defer_no_holder_no_free(struct thin_c *tc,
287
290
struct pool * pool = tc -> pool ;
288
291
unsigned long flags ;
289
292
290
- spin_lock_irqsave (& pool -> lock , flags );
291
- dm_cell_release_no_holder (pool -> prison , cell , & pool -> deferred_bios );
292
- spin_unlock_irqrestore (& pool -> lock , flags );
293
+ spin_lock_irqsave (& tc -> lock , flags );
294
+ dm_cell_release_no_holder (pool -> prison , cell , & tc -> deferred_bio_list );
295
+ spin_unlock_irqrestore (& tc -> lock , flags );
293
296
294
297
wake_worker (pool );
295
298
}
@@ -378,46 +381,48 @@ static void requeue_bio_list(struct thin_c *tc, struct bio_list *master)
378
381
379
382
bio_list_init (& bios );
380
383
381
- spin_lock_irqsave (& tc -> pool -> lock , flags );
384
+ spin_lock_irqsave (& tc -> lock , flags );
382
385
bio_list_merge (& bios , master );
383
386
bio_list_init (master );
384
- spin_unlock_irqrestore (& tc -> pool -> lock , flags );
387
+ spin_unlock_irqrestore (& tc -> lock , flags );
385
388
386
- while ((bio = bio_list_pop (& bios ))) {
387
- struct dm_thin_endio_hook * h = dm_per_bio_data (bio , sizeof (struct dm_thin_endio_hook ));
388
-
389
- if (h -> tc == tc )
390
- bio_endio (bio , DM_ENDIO_REQUEUE );
391
- else
392
- bio_list_add (master , bio );
393
- }
389
+ while ((bio = bio_list_pop (& bios )))
390
+ bio_endio (bio , DM_ENDIO_REQUEUE );
394
391
}
395
392
396
393
static void requeue_io (struct thin_c * tc )
397
394
{
398
- struct pool * pool = tc -> pool ;
399
-
400
- requeue_bio_list (tc , & pool -> deferred_bios );
401
- requeue_bio_list (tc , & pool -> retry_on_resume_list );
395
+ requeue_bio_list (tc , & tc -> deferred_bio_list );
396
+ requeue_bio_list (tc , & tc -> retry_on_resume_list );
402
397
}
403
398
404
- static void error_retry_list (struct pool * pool )
399
+ static void error_thin_retry_list (struct thin_c * tc )
405
400
{
406
401
struct bio * bio ;
407
402
unsigned long flags ;
408
403
struct bio_list bios ;
409
404
410
405
bio_list_init (& bios );
411
406
412
- spin_lock_irqsave (& pool -> lock , flags );
413
- bio_list_merge (& bios , & pool -> retry_on_resume_list );
414
- bio_list_init (& pool -> retry_on_resume_list );
415
- spin_unlock_irqrestore (& pool -> lock , flags );
407
+ spin_lock_irqsave (& tc -> lock , flags );
408
+ bio_list_merge (& bios , & tc -> retry_on_resume_list );
409
+ bio_list_init (& tc -> retry_on_resume_list );
410
+ spin_unlock_irqrestore (& tc -> lock , flags );
416
411
417
412
while ((bio = bio_list_pop (& bios )))
418
413
bio_io_error (bio );
419
414
}
420
415
416
+ static void error_retry_list (struct pool * pool )
417
+ {
418
+ struct thin_c * tc ;
419
+
420
+ rcu_read_lock ();
421
+ list_for_each_entry_rcu (tc , & pool -> active_thins , list )
422
+ error_thin_retry_list (tc );
423
+ rcu_read_unlock ();
424
+ }
425
+
421
426
/*
422
427
* This section of code contains the logic for processing a thin device's IO.
423
428
* Much of the code depends on pool object resources (lists, workqueues, etc)
@@ -608,9 +613,9 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell)
608
613
struct pool * pool = tc -> pool ;
609
614
unsigned long flags ;
610
615
611
- spin_lock_irqsave (& pool -> lock , flags );
612
- cell_release (pool , cell , & pool -> deferred_bios );
613
- spin_unlock_irqrestore (& tc -> pool -> lock , flags );
616
+ spin_lock_irqsave (& tc -> lock , flags );
617
+ cell_release (pool , cell , & tc -> deferred_bio_list );
618
+ spin_unlock_irqrestore (& tc -> lock , flags );
614
619
615
620
wake_worker (pool );
616
621
}
@@ -623,9 +628,9 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c
623
628
struct pool * pool = tc -> pool ;
624
629
unsigned long flags ;
625
630
626
- spin_lock_irqsave (& pool -> lock , flags );
627
- cell_release_no_holder (pool , cell , & pool -> deferred_bios );
628
- spin_unlock_irqrestore (& pool -> lock , flags );
631
+ spin_lock_irqsave (& tc -> lock , flags );
632
+ cell_release_no_holder (pool , cell , & tc -> deferred_bio_list );
633
+ spin_unlock_irqrestore (& tc -> lock , flags );
629
634
630
635
wake_worker (pool );
631
636
}
@@ -1001,12 +1006,11 @@ static void retry_on_resume(struct bio *bio)
1001
1006
{
1002
1007
struct dm_thin_endio_hook * h = dm_per_bio_data (bio , sizeof (struct dm_thin_endio_hook ));
1003
1008
struct thin_c * tc = h -> tc ;
1004
- struct pool * pool = tc -> pool ;
1005
1009
unsigned long flags ;
1006
1010
1007
- spin_lock_irqsave (& pool -> lock , flags );
1008
- bio_list_add (& pool -> retry_on_resume_list , bio );
1009
- spin_unlock_irqrestore (& pool -> lock , flags );
1011
+ spin_lock_irqsave (& tc -> lock , flags );
1012
+ bio_list_add (& tc -> retry_on_resume_list , bio );
1013
+ spin_unlock_irqrestore (& tc -> lock , flags );
1010
1014
}
1011
1015
1012
1016
static bool should_error_unserviceable_bio (struct pool * pool )
@@ -1363,38 +1367,36 @@ static int need_commit_due_to_time(struct pool *pool)
1363
1367
jiffies > pool -> last_commit_jiffies + COMMIT_PERIOD ;
1364
1368
}
1365
1369
1366
- static void process_deferred_bios (struct pool * pool )
1370
+ static void process_thin_deferred_bios (struct thin_c * tc )
1367
1371
{
1372
+ struct pool * pool = tc -> pool ;
1368
1373
unsigned long flags ;
1369
1374
struct bio * bio ;
1370
1375
struct bio_list bios ;
1371
1376
1377
+ if (tc -> requeue_mode ) {
1378
+ requeue_bio_list (tc , & tc -> deferred_bio_list );
1379
+ return ;
1380
+ }
1381
+
1372
1382
bio_list_init (& bios );
1373
1383
1374
- spin_lock_irqsave (& pool -> lock , flags );
1375
- bio_list_merge (& bios , & pool -> deferred_bios );
1376
- bio_list_init (& pool -> deferred_bios );
1377
- spin_unlock_irqrestore (& pool -> lock , flags );
1384
+ spin_lock_irqsave (& tc -> lock , flags );
1385
+ bio_list_merge (& bios , & tc -> deferred_bio_list );
1386
+ bio_list_init (& tc -> deferred_bio_list );
1387
+ spin_unlock_irqrestore (& tc -> lock , flags );
1378
1388
1379
1389
while ((bio = bio_list_pop (& bios ))) {
1380
- struct dm_thin_endio_hook * h = dm_per_bio_data (bio , sizeof (struct dm_thin_endio_hook ));
1381
- struct thin_c * tc = h -> tc ;
1382
-
1383
- if (tc -> requeue_mode ) {
1384
- bio_endio (bio , DM_ENDIO_REQUEUE );
1385
- continue ;
1386
- }
1387
-
1388
1390
/*
1389
1391
* If we've got no free new_mapping structs, and processing
1390
1392
* this bio might require one, we pause until there are some
1391
1393
* prepared mappings to process.
1392
1394
*/
1393
1395
if (ensure_next_mapping (pool )) {
1394
- spin_lock_irqsave (& pool -> lock , flags );
1395
- bio_list_add (& pool -> deferred_bios , bio );
1396
- bio_list_merge (& pool -> deferred_bios , & bios );
1397
- spin_unlock_irqrestore (& pool -> lock , flags );
1396
+ spin_lock_irqsave (& tc -> lock , flags );
1397
+ bio_list_add (& tc -> deferred_bio_list , bio );
1398
+ bio_list_merge (& tc -> deferred_bio_list , & bios );
1399
+ spin_unlock_irqrestore (& tc -> lock , flags );
1398
1400
break ;
1399
1401
}
1400
1402
@@ -1403,6 +1405,19 @@ static void process_deferred_bios(struct pool *pool)
1403
1405
else
1404
1406
pool -> process_bio (tc , bio );
1405
1407
}
1408
+ }
1409
+
1410
+ static void process_deferred_bios (struct pool * pool )
1411
+ {
1412
+ unsigned long flags ;
1413
+ struct bio * bio ;
1414
+ struct bio_list bios ;
1415
+ struct thin_c * tc ;
1416
+
1417
+ rcu_read_lock ();
1418
+ list_for_each_entry_rcu (tc , & pool -> active_thins , list )
1419
+ process_thin_deferred_bios (tc );
1420
+ rcu_read_unlock ();
1406
1421
1407
1422
/*
1408
1423
* If there are any deferred flush bios, we must commit
@@ -1634,9 +1649,9 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
1634
1649
unsigned long flags ;
1635
1650
struct pool * pool = tc -> pool ;
1636
1651
1637
- spin_lock_irqsave (& pool -> lock , flags );
1638
- bio_list_add (& pool -> deferred_bios , bio );
1639
- spin_unlock_irqrestore (& pool -> lock , flags );
1652
+ spin_lock_irqsave (& tc -> lock , flags );
1653
+ bio_list_add (& tc -> deferred_bio_list , bio );
1654
+ spin_unlock_irqrestore (& tc -> lock , flags );
1640
1655
1641
1656
wake_worker (pool );
1642
1657
}
@@ -1767,10 +1782,19 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
1767
1782
return bdi_congested (& q -> backing_dev_info , bdi_bits );
1768
1783
}
1769
1784
1770
- static void __requeue_bios (struct pool * pool )
1785
+ static void requeue_bios (struct pool * pool )
1771
1786
{
1772
- bio_list_merge (& pool -> deferred_bios , & pool -> retry_on_resume_list );
1773
- bio_list_init (& pool -> retry_on_resume_list );
1787
+ unsigned long flags ;
1788
+ struct thin_c * tc ;
1789
+
1790
+ rcu_read_lock ();
1791
+ list_for_each_entry_rcu (tc , & pool -> active_thins , list ) {
1792
+ spin_lock_irqsave (& tc -> lock , flags );
1793
+ bio_list_merge (& tc -> deferred_bio_list , & tc -> retry_on_resume_list );
1794
+ bio_list_init (& tc -> retry_on_resume_list );
1795
+ spin_unlock_irqrestore (& tc -> lock , flags );
1796
+ }
1797
+ rcu_read_unlock ();
1774
1798
}
1775
1799
1776
1800
/*----------------------------------------------------------------
@@ -1951,12 +1975,11 @@ static struct pool *pool_create(struct mapped_device *pool_md,
1951
1975
INIT_WORK (& pool -> worker , do_worker );
1952
1976
INIT_DELAYED_WORK (& pool -> waker , do_waker );
1953
1977
spin_lock_init (& pool -> lock );
1954
- bio_list_init (& pool -> deferred_bios );
1955
1978
bio_list_init (& pool -> deferred_flush_bios );
1956
1979
INIT_LIST_HEAD (& pool -> prepared_mappings );
1957
1980
INIT_LIST_HEAD (& pool -> prepared_discards );
1981
+ INIT_LIST_HEAD (& pool -> active_thins );
1958
1982
pool -> low_water_triggered = false;
1959
- bio_list_init (& pool -> retry_on_resume_list );
1960
1983
1961
1984
pool -> shared_read_ds = dm_deferred_set_create ();
1962
1985
if (!pool -> shared_read_ds ) {
@@ -2501,8 +2524,8 @@ static void pool_resume(struct dm_target *ti)
2501
2524
2502
2525
spin_lock_irqsave (& pool -> lock , flags );
2503
2526
pool -> low_water_triggered = false;
2504
- __requeue_bios (pool );
2505
2527
spin_unlock_irqrestore (& pool -> lock , flags );
2528
+ requeue_bios (pool );
2506
2529
2507
2530
do_waker (& pool -> waker .work );
2508
2531
}
@@ -2962,6 +2985,12 @@ static struct target_type pool_target = {
2962
2985
static void thin_dtr (struct dm_target * ti )
2963
2986
{
2964
2987
struct thin_c * tc = ti -> private ;
2988
+ unsigned long flags ;
2989
+
2990
+ spin_lock_irqsave (& tc -> pool -> lock , flags );
2991
+ list_del_rcu (& tc -> list );
2992
+ spin_unlock_irqrestore (& tc -> pool -> lock , flags );
2993
+ synchronize_rcu ();
2965
2994
2966
2995
mutex_lock (& dm_thin_pool_table .mutex );
2967
2996
@@ -3008,6 +3037,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
3008
3037
r = - ENOMEM ;
3009
3038
goto out_unlock ;
3010
3039
}
3040
+ spin_lock_init (& tc -> lock );
3041
+ bio_list_init (& tc -> deferred_bio_list );
3042
+ bio_list_init (& tc -> retry_on_resume_list );
3011
3043
3012
3044
if (argc == 3 ) {
3013
3045
r = dm_get_device (ti , argv [2 ], FMODE_READ , & origin_dev );
@@ -3079,6 +3111,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
3079
3111
3080
3112
mutex_unlock (& dm_thin_pool_table .mutex );
3081
3113
3114
+ spin_lock (& tc -> pool -> lock );
3115
+ list_add_tail_rcu (& tc -> list , & tc -> pool -> active_thins );
3116
+ spin_unlock (& tc -> pool -> lock );
3117
+ /*
3118
+ * This synchronize_rcu() call is needed here otherwise we risk a
3119
+ * wake_worker() call finding no bios to process (because the newly
3120
+ * added tc isn't yet visible). So this reduces latency since we
3121
+ * aren't then dependent on the periodic commit to wake_worker().
3122
+ */
3123
+ synchronize_rcu ();
3124
+
3082
3125
return 0 ;
3083
3126
3084
3127
bad_target_max_io_len :
0 commit comments