Skip to content

Commit b10ebd3

Browse files
jthornbersnitm
authored andcommitted
dm thin: fix rcu_read_lock being held in code that can sleep
Commit c140e1c ("dm thin: use per thin device deferred bio lists") introduced the use of an rculist for all active thin devices. The use of rcu_read_lock() in process_deferred_bios() can result in a BUG if a dm_bio_prison_cell must be allocated as a side-effect of bio_detain(): BUG: sleeping function called from invalid context at mm/mempool.c:203 in_atomic(): 1, irqs_disabled(): 0, pid: 6, name: kworker/u8:0 3 locks held by kworker/u8:0/6: #0: ("dm-" "thin"){.+.+..}, at: [<ffffffff8106be42>] process_one_work+0x192/0x550 #1: ((&pool->worker)){+.+...}, at: [<ffffffff8106be42>] process_one_work+0x192/0x550 #2: (rcu_read_lock){.+.+..}, at: [<ffffffff816360b5>] do_worker+0x5/0x4d0 We can't process deferred bios with the rcu lock held, since dm_bio_prison_cell allocation may block if the bio-prison's cell mempool is exhausted. To fix: - Introduce a refcount and completion field to each thin_c - Add thin_get/put methods for adjusting the refcount. If the refcount hits zero then the completion is triggered. - Initialise refcount to 1 when creating thin_c - When iterating the active_thins list we thin_get() whilst the rcu lock is held. - After the rcu lock is dropped we process the deferred bios for that thin. - When destroying a thin_c we thin_put() and then wait for the completion -- to avoid a race between the worker thread iterating from that thin_c and destroying the thin_c. Signed-off-by: Joe Thornber <[email protected]> Signed-off-by: Mike Snitzer <[email protected]>
1 parent 5e3283e commit b10ebd3

File tree

1 file changed

+67
-3
lines changed

1 file changed

+67
-3
lines changed

drivers/md/dm-thin.c

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,13 @@ struct thin_c {
232232
struct bio_list deferred_bio_list;
233233
struct bio_list retry_on_resume_list;
234234
struct rb_root sort_bio_list; /* sorted list of deferred bios */
235+
236+
/*
237+
* Ensures the thin is not destroyed until the worker has finished
238+
* iterating the active_thins list.
239+
*/
240+
atomic_t refcount;
241+
struct completion can_destroy;
235242
};
236243

237244
/*----------------------------------------------------------------*/
@@ -1486,17 +1493,57 @@ static void process_thin_deferred_bios(struct thin_c *tc)
14861493
blk_finish_plug(&plug);
14871494
}
14881495

1496+
static void thin_get(struct thin_c *tc);
1497+
static void thin_put(struct thin_c *tc);
1498+
1499+
/*
1500+
* We can't hold rcu_read_lock() around code that can block. So we
1501+
* find a thin with the rcu lock held; bump a refcount; then drop
1502+
* the lock.
1503+
*/
1504+
static struct thin_c *get_first_thin(struct pool *pool)
1505+
{
1506+
struct thin_c *tc = NULL;
1507+
1508+
rcu_read_lock();
1509+
if (!list_empty(&pool->active_thins)) {
1510+
tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
1511+
thin_get(tc);
1512+
}
1513+
rcu_read_unlock();
1514+
1515+
return tc;
1516+
}
1517+
1518+
static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
1519+
{
1520+
struct thin_c *old_tc = tc;
1521+
1522+
rcu_read_lock();
1523+
list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
1524+
thin_get(tc);
1525+
thin_put(old_tc);
1526+
rcu_read_unlock();
1527+
return tc;
1528+
}
1529+
thin_put(old_tc);
1530+
rcu_read_unlock();
1531+
1532+
return NULL;
1533+
}
1534+
14891535
static void process_deferred_bios(struct pool *pool)
14901536
{
14911537
unsigned long flags;
14921538
struct bio *bio;
14931539
struct bio_list bios;
14941540
struct thin_c *tc;
14951541

1496-
rcu_read_lock();
1497-
list_for_each_entry_rcu(tc, &pool->active_thins, list)
1542+
tc = get_first_thin(pool);
1543+
while (tc) {
14981544
process_thin_deferred_bios(tc);
1499-
rcu_read_unlock();
1545+
tc = get_next_thin(pool, tc);
1546+
}
15001547

15011548
/*
15021549
* If there are any deferred flush bios, we must commit
@@ -3061,11 +3108,25 @@ static struct target_type pool_target = {
30613108
/*----------------------------------------------------------------
30623109
* Thin target methods
30633110
*--------------------------------------------------------------*/
3111+
static void thin_get(struct thin_c *tc)
3112+
{
3113+
atomic_inc(&tc->refcount);
3114+
}
3115+
3116+
static void thin_put(struct thin_c *tc)
3117+
{
3118+
if (atomic_dec_and_test(&tc->refcount))
3119+
complete(&tc->can_destroy);
3120+
}
3121+
30643122
static void thin_dtr(struct dm_target *ti)
30653123
{
30663124
struct thin_c *tc = ti->private;
30673125
unsigned long flags;
30683126

3127+
thin_put(tc);
3128+
wait_for_completion(&tc->can_destroy);
3129+
30693130
spin_lock_irqsave(&tc->pool->lock, flags);
30703131
list_del_rcu(&tc->list);
30713132
spin_unlock_irqrestore(&tc->pool->lock, flags);
@@ -3192,6 +3253,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
31923253

31933254
mutex_unlock(&dm_thin_pool_table.mutex);
31943255

3256+
atomic_set(&tc->refcount, 1);
3257+
init_completion(&tc->can_destroy);
3258+
31953259
spin_lock_irqsave(&tc->pool->lock, flags);
31963260
list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
31973261
spin_unlock_irqrestore(&tc->pool->lock, flags);

0 commit comments

Comments
 (0)