Skip to content

Commit a13f35e

Browse files
htejunaxboe
authored andcommitted
writeback: don't embed root bdi_writeback_congested in bdi_writeback
52ebea7 ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") made bdi (backing_dev_info) host per-cgroup wb's (bdi_writeback's). As the congested state needs to be per-wb and referenced from blkcg side and multiple wbs, the patch made all non-root cong's (bdi_writeback_congested's) reference counted and indexed on bdi. When a bdi is destroyed, cgwb_bdi_destroy() tries to drain all non-root cong's; however, this can hang indefinitely because wb's can also be referenced from blkcg_gq's which are destroyed after bdi destruction is complete. To fix the bug, bdi destruction will be updated to not wait for cong's to drain, which naturally means that cong's may outlive the associated bdi. This is fine for non-root cong's but is problematic for the root cong's which are embedded in their bdi's as they may end up getting dereferenced after the containing bdi's are freed. This patch makes root cong's behave the same as non-root cong's. They are no longer embedded in their bdi's but allocated separately during bdi initialization, indexed and reference counted the same way. * As cong handling is the same for all wb's, wb->congested initialization is moved into wb_init(). * When !CONFIG_CGROUP_WRITEBACK, there was no indexing or refcnting. bdi->wb_congested is now a pointer pointing to the root cong allocated during bdi init and minimal refcnting operations are implemented. * The above makes root wb init paths diverge depending on CONFIG_CGROUP_WRITEBACK. root wb init is moved to cgwb_bdi_init(). This patch in itself shouldn't cause any consequential behavior differences but prepares for the actual fix. Signed-off-by: Tejun Heo <[email protected]> Reported-by: Jon Christopherson <[email protected]> Link: https://bugzilla.kernel.org/show_bug.cgi?id=100681 Tested-by: Jon Christopherson <[email protected]> Added <linux/slab.h> include to backing-dev.h for kfree() definition. Signed-off-by: Jens Axboe <[email protected]>
1 parent 4da3064 commit a13f35e

File tree

3 files changed

+54
-44
lines changed

3 files changed

+54
-44
lines changed

include/linux/backing-dev-defs.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ enum wb_stat_item {
5050
*/
5151
struct bdi_writeback_congested {
5252
unsigned long state; /* WB_[a]sync_congested flags */
53+
atomic_t refcnt; /* nr of attached wb's and blkg */
5354

5455
#ifdef CONFIG_CGROUP_WRITEBACK
5556
struct backing_dev_info *bdi; /* the associated bdi */
56-
atomic_t refcnt; /* nr of attached wb's and blkg */
5757
int blkcg_id; /* ID of the associated blkcg */
5858
struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */
5959
#endif
@@ -150,11 +150,12 @@ struct backing_dev_info {
150150
atomic_long_t tot_write_bandwidth;
151151

152152
struct bdi_writeback wb; /* the root writeback info for this bdi */
153-
struct bdi_writeback_congested wb_congested; /* its congested state */
154153
#ifdef CONFIG_CGROUP_WRITEBACK
155154
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
156155
struct rb_root cgwb_congested_tree; /* their congested states */
157156
atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */
157+
#else
158+
struct bdi_writeback_congested *wb_congested;
158159
#endif
159160
wait_queue_head_t wb_waitq;
160161

include/linux/backing-dev.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/writeback.h>
1616
#include <linux/blk-cgroup.h>
1717
#include <linux/backing-dev-defs.h>
18+
#include <linux/slab.h>
1819

1920
int __must_check bdi_init(struct backing_dev_info *bdi);
2021
void bdi_destroy(struct backing_dev_info *bdi);
@@ -465,11 +466,14 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
465466
static inline struct bdi_writeback_congested *
466467
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
467468
{
468-
return bdi->wb.congested;
469+
atomic_inc(&bdi->wb_congested->refcnt);
470+
return bdi->wb_congested;
469471
}
470472

471473
static inline void wb_congested_put(struct bdi_writeback_congested *congested)
472474
{
475+
if (atomic_dec_and_test(&congested->refcnt))
476+
kfree(congested);
473477
}
474478

475479
static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)

mm/backing-dev.c

Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
287287
#define INIT_BW (100 << (20 - PAGE_SHIFT))
288288

289289
static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
290-
gfp_t gfp)
290+
int blkcg_id, gfp_t gfp)
291291
{
292292
int i, err;
293293

@@ -311,21 +311,29 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
311311
INIT_LIST_HEAD(&wb->work_list);
312312
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
313313

314+
wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
315+
if (!wb->congested)
316+
return -ENOMEM;
317+
314318
err = fprop_local_init_percpu(&wb->completions, gfp);
315319
if (err)
316-
return err;
320+
goto out_put_cong;
317321

318322
for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
319323
err = percpu_counter_init(&wb->stat[i], 0, gfp);
320-
if (err) {
321-
while (--i)
322-
percpu_counter_destroy(&wb->stat[i]);
323-
fprop_local_destroy_percpu(&wb->completions);
324-
return err;
325-
}
324+
if (err)
325+
goto out_destroy_stat;
326326
}
327327

328328
return 0;
329+
330+
out_destroy_stat:
331+
while (--i)
332+
percpu_counter_destroy(&wb->stat[i]);
333+
fprop_local_destroy_percpu(&wb->completions);
334+
out_put_cong:
335+
wb_congested_put(wb->congested);
336+
return err;
329337
}
330338

331339
/*
@@ -361,6 +369,7 @@ static void wb_exit(struct bdi_writeback *wb)
361369
percpu_counter_destroy(&wb->stat[i]);
362370

363371
fprop_local_destroy_percpu(&wb->completions);
372+
wb_congested_put(wb->congested);
364373
}
365374

366375
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -392,9 +401,6 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
392401
struct bdi_writeback_congested *new_congested = NULL, *congested;
393402
struct rb_node **node, *parent;
394403
unsigned long flags;
395-
396-
if (blkcg_id == 1)
397-
return &bdi->wb_congested;
398404
retry:
399405
spin_lock_irqsave(&cgwb_lock, flags);
400406

@@ -453,9 +459,6 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
453459
struct backing_dev_info *bdi = congested->bdi;
454460
unsigned long flags;
455461

456-
if (congested->blkcg_id == 1)
457-
return;
458-
459462
local_irq_save(flags);
460463
if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
461464
local_irq_restore(flags);
@@ -480,7 +483,6 @@ static void cgwb_release_workfn(struct work_struct *work)
480483

481484
css_put(wb->memcg_css);
482485
css_put(wb->blkcg_css);
483-
wb_congested_put(wb->congested);
484486

485487
fprop_local_destroy_percpu(&wb->memcg_completions);
486488
percpu_ref_exit(&wb->refcnt);
@@ -541,7 +543,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
541543
if (!wb)
542544
return -ENOMEM;
543545

544-
ret = wb_init(wb, bdi, gfp);
546+
ret = wb_init(wb, bdi, blkcg_css->id, gfp);
545547
if (ret)
546548
goto err_free;
547549

@@ -553,12 +555,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
553555
if (ret)
554556
goto err_ref_exit;
555557

556-
wb->congested = wb_congested_get_create(bdi, blkcg_css->id, gfp);
557-
if (!wb->congested) {
558-
ret = -ENOMEM;
559-
goto err_fprop_exit;
560-
}
561-
562558
wb->memcg_css = memcg_css;
563559
wb->blkcg_css = blkcg_css;
564560
INIT_WORK(&wb->release_work, cgwb_release_workfn);
@@ -588,12 +584,10 @@ static int cgwb_create(struct backing_dev_info *bdi,
588584
if (ret) {
589585
if (ret == -EEXIST)
590586
ret = 0;
591-
goto err_put_congested;
587+
goto err_fprop_exit;
592588
}
593589
goto out_put;
594590

595-
err_put_congested:
596-
wb_congested_put(wb->congested);
597591
err_fprop_exit:
598592
fprop_local_destroy_percpu(&wb->memcg_completions);
599593
err_ref_exit:
@@ -662,14 +656,20 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
662656
return wb;
663657
}
664658

665-
static void cgwb_bdi_init(struct backing_dev_info *bdi)
659+
static int cgwb_bdi_init(struct backing_dev_info *bdi)
666660
{
667-
bdi->wb.memcg_css = mem_cgroup_root_css;
668-
bdi->wb.blkcg_css = blkcg_root_css;
669-
bdi->wb_congested.blkcg_id = 1;
661+
int ret;
662+
670663
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
671664
bdi->cgwb_congested_tree = RB_ROOT;
672665
atomic_set(&bdi->usage_cnt, 1);
666+
667+
ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
668+
if (!ret) {
669+
bdi->wb.memcg_css = mem_cgroup_root_css;
670+
bdi->wb.blkcg_css = blkcg_root_css;
671+
}
672+
return ret;
673673
}
674674

675675
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
@@ -732,15 +732,28 @@ void wb_blkcg_offline(struct blkcg *blkcg)
732732

733733
#else /* CONFIG_CGROUP_WRITEBACK */
734734

735-
static void cgwb_bdi_init(struct backing_dev_info *bdi) { }
735+
static int cgwb_bdi_init(struct backing_dev_info *bdi)
736+
{
737+
int err;
738+
739+
bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
740+
if (!bdi->wb_congested)
741+
return -ENOMEM;
742+
743+
err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
744+
if (err) {
745+
kfree(bdi->wb_congested);
746+
return err;
747+
}
748+
return 0;
749+
}
750+
736751
static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
737752

738753
#endif /* CONFIG_CGROUP_WRITEBACK */
739754

740755
int bdi_init(struct backing_dev_info *bdi)
741756
{
742-
int err;
743-
744757
bdi->dev = NULL;
745758

746759
bdi->min_ratio = 0;
@@ -749,15 +762,7 @@ int bdi_init(struct backing_dev_info *bdi)
749762
INIT_LIST_HEAD(&bdi->bdi_list);
750763
init_waitqueue_head(&bdi->wb_waitq);
751764

752-
err = wb_init(&bdi->wb, bdi, GFP_KERNEL);
753-
if (err)
754-
return err;
755-
756-
bdi->wb_congested.state = 0;
757-
bdi->wb.congested = &bdi->wb_congested;
758-
759-
cgwb_bdi_init(bdi);
760-
return 0;
765+
return cgwb_bdi_init(bdi);
761766
}
762767
EXPORT_SYMBOL(bdi_init);
763768

0 commit comments

Comments
 (0)