@@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
310
310
}
311
311
}
312
312
313
- static void blkg_pd_offline (struct blkcg_gq * blkg )
314
- {
315
- int i ;
316
-
317
- lockdep_assert_held (blkg -> q -> queue_lock );
318
- lockdep_assert_held (& blkg -> blkcg -> lock );
319
-
320
- for (i = 0 ; i < BLKCG_MAX_POLS ; i ++ ) {
321
- struct blkcg_policy * pol = blkcg_policy [i ];
322
-
323
- if (blkg -> pd [i ] && !blkg -> pd [i ]-> offline &&
324
- pol -> pd_offline_fn ) {
325
- pol -> pd_offline_fn (blkg -> pd [i ]);
326
- blkg -> pd [i ]-> offline = true;
327
- }
328
- }
329
- }
330
-
331
313
static void blkg_destroy (struct blkcg_gq * blkg )
332
314
{
333
315
struct blkcg * blkcg = blkg -> blkcg ;
334
316
struct blkcg_gq * parent = blkg -> parent ;
317
+ int i ;
335
318
336
319
lockdep_assert_held (blkg -> q -> queue_lock );
337
320
lockdep_assert_held (& blkcg -> lock );
@@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg)
340
323
WARN_ON_ONCE (list_empty (& blkg -> q_node ));
341
324
WARN_ON_ONCE (hlist_unhashed (& blkg -> blkcg_node ));
342
325
326
+ for (i = 0 ; i < BLKCG_MAX_POLS ; i ++ ) {
327
+ struct blkcg_policy * pol = blkcg_policy [i ];
328
+
329
+ if (blkg -> pd [i ] && pol -> pd_offline_fn )
330
+ pol -> pd_offline_fn (blkg -> pd [i ]);
331
+ }
332
+
343
333
if (parent ) {
344
334
blkg_rwstat_add_aux (& parent -> stat_bytes , & blkg -> stat_bytes );
345
335
blkg_rwstat_add_aux (& parent -> stat_ios , & blkg -> stat_ios );
@@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q)
382
372
struct blkcg * blkcg = blkg -> blkcg ;
383
373
384
374
spin_lock (& blkcg -> lock );
385
- blkg_pd_offline (blkg );
386
375
blkg_destroy (blkg );
387
376
spin_unlock (& blkcg -> lock );
388
377
}
@@ -1053,59 +1042,64 @@ static struct cftype blkcg_legacy_files[] = {
1053
1042
{ } /* terminate */
1054
1043
};
1055
1044
1045
+ /*
1046
+ * blkcg destruction is a three-stage process.
1047
+ *
1048
+ * 1. Destruction starts. The blkcg_css_offline() callback is invoked
1049
+ * which offlines writeback. Here we tie the next stage of blkg destruction
1050
+ * to the completion of writeback associated with the blkcg. This lets us
1051
+ * avoid punting potentially large amounts of outstanding writeback to root
1052
+ * while maintaining any ongoing policies. The next stage is triggered when
1053
+ * the nr_cgwbs count goes to zero.
1054
+ *
1055
+ * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
1056
+ * and handles the destruction of blkgs. Here the css reference held by
1057
+ * the blkg is put back eventually allowing blkcg_css_free() to be called.
1058
+ * This work may occur in cgwb_release_workfn() on the cgwb_release
1059
+ * workqueue. Any submitted ios that fail to get the blkg ref will be
1060
+ * punted to the root_blkg.
1061
+ *
1062
+ * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
1063
+ * This finally frees the blkcg.
1064
+ */
1065
+
1056
1066
/**
1057
1067
* blkcg_css_offline - cgroup css_offline callback
1058
1068
* @css: css of interest
1059
1069
*
1060
- * This function is called when @css is about to go away and responsible
1061
- * for offlining all blkgs pd and killing all wbs associated with @css.
1062
- * blkgs pd offline should be done while holding both q and blkcg locks.
1063
- * As blkcg lock is nested inside q lock, this function performs reverse
1064
- * double lock dancing.
1065
- *
1066
- * This is the blkcg counterpart of ioc_release_fn().
1070
+ * This function is called when @css is about to go away. Here the cgwbs are
1071
+ * offlined first and only once writeback associated with the blkcg has
1072
+ * finished do we start step 2 (see above).
1067
1073
*/
1068
1074
static void blkcg_css_offline (struct cgroup_subsys_state * css )
1069
1075
{
1070
1076
struct blkcg * blkcg = css_to_blkcg (css );
1071
- struct blkcg_gq * blkg ;
1072
-
1073
- spin_lock_irq (& blkcg -> lock );
1074
-
1075
- hlist_for_each_entry (blkg , & blkcg -> blkg_list , blkcg_node ) {
1076
- struct request_queue * q = blkg -> q ;
1077
-
1078
- if (spin_trylock (q -> queue_lock )) {
1079
- blkg_pd_offline (blkg );
1080
- spin_unlock (q -> queue_lock );
1081
- } else {
1082
- spin_unlock_irq (& blkcg -> lock );
1083
- cpu_relax ();
1084
- spin_lock_irq (& blkcg -> lock );
1085
- }
1086
- }
1087
-
1088
- spin_unlock_irq (& blkcg -> lock );
1089
1077
1078
+ /* this prevents anyone from attaching or migrating to this blkcg */
1090
1079
wb_blkcg_offline (blkcg );
1080
+
1081
+ /* put the base cgwb reference allowing step 2 to be triggered */
1082
+ blkcg_cgwb_put (blkcg );
1091
1083
}
1092
1084
1093
1085
/**
1094
- * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
1086
+ * blkcg_destroy_blkgs - responsible for shooting down blkgs
1095
1087
* @blkcg: blkcg of interest
1096
1088
*
1097
- * This function is called when blkcg css is about to free and responsible for
1098
- * destroying all blkgs associated with @blkcg.
1099
- * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
1089
+ * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
1100
1090
* is nested inside q lock, this function performs reverse double lock dancing.
1091
+ * Destroying the blkgs releases the reference held on the blkcg's css allowing
1092
+ * blkcg_css_free to eventually be called.
1093
+ *
1094
+ * This is the blkcg counterpart of ioc_release_fn().
1101
1095
*/
1102
- static void blkcg_destroy_all_blkgs (struct blkcg * blkcg )
1096
+ void blkcg_destroy_blkgs (struct blkcg * blkcg )
1103
1097
{
1104
1098
spin_lock_irq (& blkcg -> lock );
1099
+
1105
1100
while (!hlist_empty (& blkcg -> blkg_list )) {
1106
1101
struct blkcg_gq * blkg = hlist_entry (blkcg -> blkg_list .first ,
1107
- struct blkcg_gq ,
1108
- blkcg_node );
1102
+ struct blkcg_gq , blkcg_node );
1109
1103
struct request_queue * q = blkg -> q ;
1110
1104
1111
1105
if (spin_trylock (q -> queue_lock )) {
@@ -1117,6 +1111,7 @@ static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
1117
1111
spin_lock_irq (& blkcg -> lock );
1118
1112
}
1119
1113
}
1114
+
1120
1115
spin_unlock_irq (& blkcg -> lock );
1121
1116
}
1122
1117
@@ -1125,8 +1120,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
1125
1120
struct blkcg * blkcg = css_to_blkcg (css );
1126
1121
int i ;
1127
1122
1128
- blkcg_destroy_all_blkgs (blkcg );
1129
-
1130
1123
mutex_lock (& blkcg_pol_mutex );
1131
1124
1132
1125
list_del (& blkcg -> all_blkcgs_node );
@@ -1189,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
1189
1182
INIT_HLIST_HEAD (& blkcg -> blkg_list );
1190
1183
#ifdef CONFIG_CGROUP_WRITEBACK
1191
1184
INIT_LIST_HEAD (& blkcg -> cgwb_list );
1185
+ refcount_set (& blkcg -> cgwb_refcnt , 1 );
1192
1186
#endif
1193
1187
list_add_tail (& blkcg -> all_blkcgs_node , & all_blkcgs );
1194
1188
@@ -1480,11 +1474,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
1480
1474
1481
1475
list_for_each_entry (blkg , & q -> blkg_list , q_node ) {
1482
1476
if (blkg -> pd [pol -> plid ]) {
1483
- if (!blkg -> pd [pol -> plid ]-> offline &&
1484
- pol -> pd_offline_fn ) {
1477
+ if (pol -> pd_offline_fn )
1485
1478
pol -> pd_offline_fn (blkg -> pd [pol -> plid ]);
1486
- blkg -> pd [pol -> plid ]-> offline = true;
1487
- }
1488
1479
pol -> pd_free_fn (blkg -> pd [pol -> plid ]);
1489
1480
blkg -> pd [pol -> plid ] = NULL ;
1490
1481
}
0 commit comments