Skip to content

Commit c4db59d

Browse files
Christoph Hellwigaxboe
authored andcommitted
fs: don't reassign dirty inodes to default_backing_dev_info
If we have dirty inodes we need to call the filesystem for it, even if the device has been removed and the filesystem will error out early. The current code does that by reassining all dirty inodes to the default backing_dev_info when a bdi is unlinked, but that's pretty pointless given that the bdi must always outlive the super block. Instead of stopping writeback at unregister time and moving inodes to the default bdi just keep the current bdi alive until it is destroyed. The containing objects of the bdi ensure this doesn't happen until all writeback has finished by erroring out. Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Tejun Heo <[email protected]> Reviewed-by: Jan Kara <[email protected]> Killed the redundant WARN_ON(), as noticed by Jan. Signed-off-by: Jens Axboe <[email protected]>
1 parent 7b14a21 commit c4db59d

File tree

1 file changed

+23
-67
lines changed

1 file changed

+23
-67
lines changed

mm/backing-dev.c

Lines changed: 23 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,6 @@ LIST_HEAD(bdi_list);
3737
/* bdi_wq serves all asynchronous writeback tasks */
3838
struct workqueue_struct *bdi_wq;
3939

40-
static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
41-
{
42-
if (wb1 < wb2) {
43-
spin_lock(&wb1->list_lock);
44-
spin_lock_nested(&wb2->list_lock, 1);
45-
} else {
46-
spin_lock(&wb2->list_lock);
47-
spin_lock_nested(&wb1->list_lock, 1);
48-
}
49-
}
50-
5140
#ifdef CONFIG_DEBUG_FS
5241
#include <linux/debugfs.h>
5342
#include <linux/seq_file.h>
@@ -352,57 +341,42 @@ EXPORT_SYMBOL(bdi_register_dev);
352341
*/
353342
static void bdi_wb_shutdown(struct backing_dev_info *bdi)
354343
{
355-
if (!bdi_cap_writeback_dirty(bdi))
344+
/* Make sure nobody queues further work */
345+
spin_lock_bh(&bdi->wb_lock);
346+
if (!test_and_clear_bit(BDI_registered, &bdi->state)) {
347+
spin_unlock_bh(&bdi->wb_lock);
356348
return;
349+
}
350+
spin_unlock_bh(&bdi->wb_lock);
357351

358352
/*
359353
* Make sure nobody finds us on the bdi_list anymore
360354
*/
361355
bdi_remove_from_list(bdi);
362356

363-
/* Make sure nobody queues further work */
364-
spin_lock_bh(&bdi->wb_lock);
365-
clear_bit(BDI_registered, &bdi->state);
366-
spin_unlock_bh(&bdi->wb_lock);
367-
368357
/*
369358
* Drain work list and shutdown the delayed_work. At this point,
370359
* @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
371360
* is dying and its work_list needs to be drained no matter what.
372361
*/
373362
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
374363
flush_delayed_work(&bdi->wb.dwork);
375-
WARN_ON(!list_empty(&bdi->work_list));
376-
WARN_ON(delayed_work_pending(&bdi->wb.dwork));
377364
}
378365

379366
/*
380-
* This bdi is going away now, make sure that no super_blocks point to it
367+
* Called when the device behind @bdi has been removed or ejected.
368+
*
369+
* We can't really do much here except for reducing the dirty ratio at
370+
* the moment. In the future we should be able to set a flag so that
371+
* the filesystem can handle errors at mark_inode_dirty time instead
372+
* of only at writeback time.
381373
*/
382-
static void bdi_prune_sb(struct backing_dev_info *bdi)
383-
{
384-
struct super_block *sb;
385-
386-
spin_lock(&sb_lock);
387-
list_for_each_entry(sb, &super_blocks, s_list) {
388-
if (sb->s_bdi == bdi)
389-
sb->s_bdi = &default_backing_dev_info;
390-
}
391-
spin_unlock(&sb_lock);
392-
}
393-
394374
void bdi_unregister(struct backing_dev_info *bdi)
395375
{
396-
if (bdi->dev) {
397-
bdi_set_min_ratio(bdi, 0);
398-
trace_writeback_bdi_unregister(bdi);
399-
bdi_prune_sb(bdi);
376+
if (WARN_ON_ONCE(!bdi->dev))
377+
return;
400378

401-
bdi_wb_shutdown(bdi);
402-
bdi_debug_unregister(bdi);
403-
device_unregister(bdi->dev);
404-
bdi->dev = NULL;
405-
}
379+
bdi_set_min_ratio(bdi, 0);
406380
}
407381
EXPORT_SYMBOL(bdi_unregister);
408382

@@ -471,37 +445,19 @@ void bdi_destroy(struct backing_dev_info *bdi)
471445
{
472446
int i;
473447

474-
/*
475-
* Splice our entries to the default_backing_dev_info. This
476-
* condition shouldn't happen. @wb must be empty at this point and
477-
* dirty inodes on it might cause other issues. This workaround is
478-
* added by ce5f8e779519 ("writeback: splice dirty inode entries to
479-
* default bdi on bdi_destroy()") without root-causing the issue.
480-
*
481-
* http://lkml.kernel.org/g/[email protected]
482-
* http://thread.gmane.org/gmane.linux.file-systems/35341/focus=35350
483-
*
484-
* We should probably add WARN_ON() to find out whether it still
485-
* happens and track it down if so.
486-
*/
487-
if (bdi_has_dirty_io(bdi)) {
488-
struct bdi_writeback *dst = &default_backing_dev_info.wb;
489-
490-
bdi_lock_two(&bdi->wb, dst);
491-
list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
492-
list_splice(&bdi->wb.b_io, &dst->b_io);
493-
list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
494-
spin_unlock(&bdi->wb.list_lock);
495-
spin_unlock(&dst->list_lock);
496-
}
497-
498-
bdi_unregister(bdi);
448+
bdi_wb_shutdown(bdi);
499449

450+
WARN_ON(!list_empty(&bdi->work_list));
500451
WARN_ON(delayed_work_pending(&bdi->wb.dwork));
501452

453+
if (bdi->dev) {
454+
bdi_debug_unregister(bdi);
455+
device_unregister(bdi->dev);
456+
bdi->dev = NULL;
457+
}
458+
502459
for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
503460
percpu_counter_destroy(&bdi->bdi_stat[i]);
504-
505461
fprop_local_destroy_percpu(&bdi->completions);
506462
}
507463
EXPORT_SYMBOL(bdi_destroy);

0 commit comments

Comments
 (0)