Skip to content

Commit ca16eb3

Browse files
committed
Merge tag 'for-linus-20180906' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Small collection of fixes that should go into this release. This contains: - Small series that fixes a race between blkcg teardown and writeback (Dennis Zhou) - Fix disallowing invalid block size settings from the nbd ioctl (me) - BFQ fix for a use-after-free on last release of a bfqg (Konstantin Khlebnikov) - Fix for the "don't warn for flush" fix (Mikulas)" * tag 'for-linus-20180906' of git://git.kernel.dk/linux-block: block: bfq: swap puts in bfqg_and_blkg_put block: don't warn when doing fsync on read-only devices nbd: don't allow invalid blocksize settings blkcg: use tryget logic when associating a blkg with a bio blkcg: delay blkg destruction until after writeback has finished Revert "blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()"
2 parents db44bf4 + d5274b3 commit ca16eb3

File tree

8 files changed

+111
-64
lines changed

8 files changed

+111
-64
lines changed

block/bfq-cgroup.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,9 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg)
275275

276276
void bfqg_and_blkg_put(struct bfq_group *bfqg)
277277
{
278-
bfqg_put(bfqg);
279-
280278
blkg_put(bfqg_to_blkg(bfqg));
279+
280+
bfqg_put(bfqg);
281281
}
282282

283283
/* @stats = 0 */

block/bio.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2015,7 +2015,8 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
20152015
{
20162016
if (unlikely(bio->bi_blkg))
20172017
return -EBUSY;
2018-
blkg_get(blkg);
2018+
if (!blkg_try_get(blkg))
2019+
return -ENODEV;
20192020
bio->bi_blkg = blkg;
20202021
return 0;
20212022
}

block/blk-cgroup.c

Lines changed: 48 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
310310
}
311311
}
312312

313-
static void blkg_pd_offline(struct blkcg_gq *blkg)
314-
{
315-
int i;
316-
317-
lockdep_assert_held(blkg->q->queue_lock);
318-
lockdep_assert_held(&blkg->blkcg->lock);
319-
320-
for (i = 0; i < BLKCG_MAX_POLS; i++) {
321-
struct blkcg_policy *pol = blkcg_policy[i];
322-
323-
if (blkg->pd[i] && !blkg->pd[i]->offline &&
324-
pol->pd_offline_fn) {
325-
pol->pd_offline_fn(blkg->pd[i]);
326-
blkg->pd[i]->offline = true;
327-
}
328-
}
329-
}
330-
331313
static void blkg_destroy(struct blkcg_gq *blkg)
332314
{
333315
struct blkcg *blkcg = blkg->blkcg;
334316
struct blkcg_gq *parent = blkg->parent;
317+
int i;
335318

336319
lockdep_assert_held(blkg->q->queue_lock);
337320
lockdep_assert_held(&blkcg->lock);
@@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg)
340323
WARN_ON_ONCE(list_empty(&blkg->q_node));
341324
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
342325

326+
for (i = 0; i < BLKCG_MAX_POLS; i++) {
327+
struct blkcg_policy *pol = blkcg_policy[i];
328+
329+
if (blkg->pd[i] && pol->pd_offline_fn)
330+
pol->pd_offline_fn(blkg->pd[i]);
331+
}
332+
343333
if (parent) {
344334
blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
345335
blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
@@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q)
382372
struct blkcg *blkcg = blkg->blkcg;
383373

384374
spin_lock(&blkcg->lock);
385-
blkg_pd_offline(blkg);
386375
blkg_destroy(blkg);
387376
spin_unlock(&blkcg->lock);
388377
}
@@ -1053,59 +1042,64 @@ static struct cftype blkcg_legacy_files[] = {
10531042
{ } /* terminate */
10541043
};
10551044

1045+
/*
1046+
* blkcg destruction is a three-stage process.
1047+
*
1048+
* 1. Destruction starts. The blkcg_css_offline() callback is invoked
1049+
* which offlines writeback. Here we tie the next stage of blkg destruction
1050+
* to the completion of writeback associated with the blkcg. This lets us
1051+
* avoid punting potentially large amounts of outstanding writeback to root
1052+
* while maintaining any ongoing policies. The next stage is triggered when
1053+
* the nr_cgwbs count goes to zero.
1054+
*
1055+
* 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
1056+
* and handles the destruction of blkgs. Here the css reference held by
1057+
* the blkg is put back eventually allowing blkcg_css_free() to be called.
1058+
* This work may occur in cgwb_release_workfn() on the cgwb_release
1059+
* workqueue. Any submitted ios that fail to get the blkg ref will be
1060+
* punted to the root_blkg.
1061+
*
1062+
* 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
1063+
* This finally frees the blkcg.
1064+
*/
1065+
10561066
/**
10571067
* blkcg_css_offline - cgroup css_offline callback
10581068
* @css: css of interest
10591069
*
1060-
* This function is called when @css is about to go away and responsible
1061-
* for offlining all blkgs pd and killing all wbs associated with @css.
1062-
* blkgs pd offline should be done while holding both q and blkcg locks.
1063-
* As blkcg lock is nested inside q lock, this function performs reverse
1064-
* double lock dancing.
1065-
*
1066-
* This is the blkcg counterpart of ioc_release_fn().
1070+
* This function is called when @css is about to go away. Here the cgwbs are
1071+
* offlined first and only once writeback associated with the blkcg has
1072+
* finished do we start step 2 (see above).
10671073
*/
10681074
static void blkcg_css_offline(struct cgroup_subsys_state *css)
10691075
{
10701076
struct blkcg *blkcg = css_to_blkcg(css);
1071-
struct blkcg_gq *blkg;
1072-
1073-
spin_lock_irq(&blkcg->lock);
1074-
1075-
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1076-
struct request_queue *q = blkg->q;
1077-
1078-
if (spin_trylock(q->queue_lock)) {
1079-
blkg_pd_offline(blkg);
1080-
spin_unlock(q->queue_lock);
1081-
} else {
1082-
spin_unlock_irq(&blkcg->lock);
1083-
cpu_relax();
1084-
spin_lock_irq(&blkcg->lock);
1085-
}
1086-
}
1087-
1088-
spin_unlock_irq(&blkcg->lock);
10891077

1078+
/* this prevents anyone from attaching or migrating to this blkcg */
10901079
wb_blkcg_offline(blkcg);
1080+
1081+
/* put the base cgwb reference allowing step 2 to be triggered */
1082+
blkcg_cgwb_put(blkcg);
10911083
}
10921084

10931085
/**
1094-
* blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
1086+
* blkcg_destroy_blkgs - responsible for shooting down blkgs
10951087
* @blkcg: blkcg of interest
10961088
*
1097-
* This function is called when blkcg css is about to free and responsible for
1098-
* destroying all blkgs associated with @blkcg.
1099-
* blkgs should be removed while holding both q and blkcg locks. As blkcg lock
1089+
* blkgs should be removed while holding both q and blkcg locks. As blkcg lock
11001090
* is nested inside q lock, this function performs reverse double lock dancing.
1091+
* Destroying the blkgs releases the reference held on the blkcg's css allowing
1092+
* blkcg_css_free to eventually be called.
1093+
*
1094+
* This is the blkcg counterpart of ioc_release_fn().
11011095
*/
1102-
static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
1096+
void blkcg_destroy_blkgs(struct blkcg *blkcg)
11031097
{
11041098
spin_lock_irq(&blkcg->lock);
1099+
11051100
while (!hlist_empty(&blkcg->blkg_list)) {
11061101
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
1107-
struct blkcg_gq,
1108-
blkcg_node);
1102+
struct blkcg_gq, blkcg_node);
11091103
struct request_queue *q = blkg->q;
11101104

11111105
if (spin_trylock(q->queue_lock)) {
@@ -1117,6 +1111,7 @@ static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
11171111
spin_lock_irq(&blkcg->lock);
11181112
}
11191113
}
1114+
11201115
spin_unlock_irq(&blkcg->lock);
11211116
}
11221117

@@ -1125,8 +1120,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
11251120
struct blkcg *blkcg = css_to_blkcg(css);
11261121
int i;
11271122

1128-
blkcg_destroy_all_blkgs(blkcg);
1129-
11301123
mutex_lock(&blkcg_pol_mutex);
11311124

11321125
list_del(&blkcg->all_blkcgs_node);
@@ -1189,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
11891182
INIT_HLIST_HEAD(&blkcg->blkg_list);
11901183
#ifdef CONFIG_CGROUP_WRITEBACK
11911184
INIT_LIST_HEAD(&blkcg->cgwb_list);
1185+
refcount_set(&blkcg->cgwb_refcnt, 1);
11921186
#endif
11931187
list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
11941188

@@ -1480,11 +1474,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
14801474

14811475
list_for_each_entry(blkg, &q->blkg_list, q_node) {
14821476
if (blkg->pd[pol->plid]) {
1483-
if (!blkg->pd[pol->plid]->offline &&
1484-
pol->pd_offline_fn) {
1477+
if (pol->pd_offline_fn)
14851478
pol->pd_offline_fn(blkg->pd[pol->plid]);
1486-
blkg->pd[pol->plid]->offline = true;
1487-
}
14881479
pol->pd_free_fn(blkg->pd[pol->plid]);
14891480
blkg->pd[pol->plid] = NULL;
14901481
}

block/blk-core.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2163,9 +2163,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
21632163
{
21642164
const int op = bio_op(bio);
21652165

2166-
if (part->policy && (op_is_write(op) && !op_is_flush(op))) {
2166+
if (part->policy && op_is_write(op)) {
21672167
char b[BDEVNAME_SIZE];
21682168

2169+
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
2170+
return false;
2171+
21692172
WARN_ONCE(1,
21702173
"generic_make_request: Trying to write "
21712174
"to read-only block-device %s (partno %d)\n",

block/blk-throttle.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,8 +2129,9 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
21292129
static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
21302130
{
21312131
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2132-
if (bio->bi_css)
2133-
bio_associate_blkg(bio, tg_to_blkg(tg));
2132+
/* fallback to root_blkg if we fail to get a blkg ref */
2133+
if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
2134+
bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
21342135
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
21352136
#endif
21362137
}

drivers/block/nbd.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
12391239
case NBD_SET_SOCK:
12401240
return nbd_add_socket(nbd, arg, false);
12411241
case NBD_SET_BLKSIZE:
1242+
if (!arg || !is_power_of_2(arg) || arg < 512 ||
1243+
arg > PAGE_SIZE)
1244+
return -EINVAL;
12421245
nbd_size_set(nbd, arg,
12431246
div_s64(config->bytesize, arg));
12441247
return 0;

include/linux/blk-cgroup.h

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ struct blkcg {
5656
struct list_head all_blkcgs_node;
5757
#ifdef CONFIG_CGROUP_WRITEBACK
5858
struct list_head cgwb_list;
59+
refcount_t cgwb_refcnt;
5960
#endif
6061
};
6162

@@ -89,7 +90,6 @@ struct blkg_policy_data {
8990
/* the blkg and policy id this per-policy data belongs to */
9091
struct blkcg_gq *blkg;
9192
int plid;
92-
bool offline;
9393
};
9494

9595
/*
@@ -387,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
387387
return cpd ? cpd->blkcg : NULL;
388388
}
389389

390+
extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
391+
392+
#ifdef CONFIG_CGROUP_WRITEBACK
393+
394+
/**
395+
* blkcg_cgwb_get - get a reference for blkcg->cgwb_list
396+
* @blkcg: blkcg of interest
397+
*
398+
* This is used to track the number of active wb's related to a blkcg.
399+
*/
400+
static inline void blkcg_cgwb_get(struct blkcg *blkcg)
401+
{
402+
refcount_inc(&blkcg->cgwb_refcnt);
403+
}
404+
405+
/**
406+
* blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
407+
* @blkcg: blkcg of interest
408+
*
409+
* This is used to track the number of active wb's related to a blkcg.
410+
* When this count goes to zero, all active wb has finished so the
411+
* blkcg can continue destruction by calling blkcg_destroy_blkgs().
412+
* This work may occur in cgwb_release_workfn() on the cgwb_release
413+
* workqueue.
414+
*/
415+
static inline void blkcg_cgwb_put(struct blkcg *blkcg)
416+
{
417+
if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
418+
blkcg_destroy_blkgs(blkcg);
419+
}
420+
421+
#else
422+
423+
static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
424+
425+
static inline void blkcg_cgwb_put(struct blkcg *blkcg)
426+
{
427+
/* wb isn't being accounted, so trigger destruction right away */
428+
blkcg_destroy_blkgs(blkcg);
429+
}
430+
431+
#endif
432+
390433
/**
391434
* blkg_path - format cgroup path of blkg
392435
* @blkg: blkg of interest

mm/backing-dev.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work)
491491
{
492492
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
493493
release_work);
494+
struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
494495

495496
mutex_lock(&wb->bdi->cgwb_release_mutex);
496497
wb_shutdown(wb);
@@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work)
499500
css_put(wb->blkcg_css);
500501
mutex_unlock(&wb->bdi->cgwb_release_mutex);
501502

503+
/* triggers blkg destruction if cgwb_refcnt becomes zero */
504+
blkcg_cgwb_put(blkcg);
505+
502506
fprop_local_destroy_percpu(&wb->memcg_completions);
503507
percpu_ref_exit(&wb->refcnt);
504508
wb_exit(wb);
@@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
597601
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
598602
list_add(&wb->memcg_node, memcg_cgwb_list);
599603
list_add(&wb->blkcg_node, blkcg_cgwb_list);
604+
blkcg_cgwb_get(blkcg);
600605
css_get(memcg_css);
601606
css_get(blkcg_css);
602607
}

0 commit comments

Comments
 (0)