Skip to content

Commit 4717687

Browse files
Muchun Songjfvogel
authored andcommitted
mm: memcontrol: make all the callers of {folio,page}_memcg() safe
When we use objcg APIs to charge the LRU pages, the page will not hold a reference to the memcg associated with the page. So the caller of the {folio,page}_memcg() should hold an rcu read lock or obtain a reference to the memcg associated with the page to protect memcg from being released. So introduce get_mem_cgroup_from_{page,folio}() to obtain a reference to the memory cgroup associated with the page. In this patch, make all the callers hold an rcu read lock or obtain a reference to the memcg to protect memcg from being released when the LRU pages reparented. We do not need to adjust the callers of {folio,page}_memcg() during the whole process of mem_cgroup_move_task(). Because the cgroup migration and memory cgroup offlining are serialized by @cgroup_mutex. In this routine, the LRU pages cannot be reparented to its parent memory cgroup. So {folio,page}_memcg() is stable and cannot be released. This is a preparation for reparenting the LRU pages. Signed-off-by: Muchun Song <[email protected]> Acked-by: Roman Gushchin <[email protected]> Link: https://lore.kernel.org/all/[email protected]/ Orabug: 37405594 Conflicts: fs/buffer.c fs/fs-writeback.c include/linux/memcontrol.h mm/memcontrol.c mm/page_io.c (Due to presence of following commits in UEK-8: i. 'commits c71124a buffer: add folio_alloc_buffers() helper' ii. 'commit 75376c6 mm: convert mem_cgroup_css_from_page() to mem_cgroup_css_from_folio()' iii. 'commit 98630cf mm/page_io: convert bio_associate_blkg_from_page() to take in a folio' Signed-off-by: Imran Khan <[email protected]> Reviewed-by: Kamalesh Babulal <[email protected]>
1 parent a801752 commit 4717687

File tree

7 files changed

+116
-54
lines changed

7 files changed

+116
-54
lines changed

fs/buffer.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -918,8 +918,7 @@ struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
918918
long offset;
919919
struct mem_cgroup *memcg, *old_memcg;
920920

921-
/* The folio lock pins the memcg */
922-
memcg = folio_memcg(folio);
921+
memcg = get_mem_cgroup_from_folio(folio);
923922
old_memcg = set_active_memcg(memcg);
924923

925924
head = NULL;
@@ -939,6 +938,7 @@ struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
939938
folio_set_bh(bh, folio, offset);
940939
}
941940
out:
941+
mem_cgroup_put(memcg);
942942
set_active_memcg(old_memcg);
943943
return head;
944944
/*

fs/fs-writeback.c

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -269,15 +269,13 @@ void __inode_attach_wb(struct inode *inode, struct folio *folio)
269269
if (inode_cgwb_enabled(inode)) {
270270
struct cgroup_subsys_state *memcg_css;
271271

272-
if (folio) {
273-
memcg_css = mem_cgroup_css_from_folio(folio);
274-
wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
275-
} else {
276-
/* must pin memcg_css, see wb_get_create() */
272+
/* must pin memcg_css, see wb_get_create() */
273+
if (folio)
274+
memcg_css = get_mem_cgroup_css_from_folio(folio);
275+
else
277276
memcg_css = task_get_css(current, memory_cgrp_id);
278-
wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
279-
css_put(memcg_css);
280-
}
277+
wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
278+
css_put(memcg_css);
281279
}
282280

283281
if (!wb)
@@ -912,16 +910,16 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio
912910
if (!wbc->wb || wbc->no_cgroup_owner)
913911
return;
914912

915-
css = mem_cgroup_css_from_folio(folio);
913+
css = get_mem_cgroup_css_from_folio(folio);
916914
/* dead cgroups shouldn't contribute to inode ownership arbitration */
917915
if (!(css->flags & CSS_ONLINE))
918-
return;
916+
goto out;
919917

920918
id = css->id;
921919

922920
if (id == wbc->wb_id) {
923921
wbc->wb_bytes += bytes;
924-
return;
922+
goto out;
925923
}
926924

927925
if (id == wbc->wb_lcand_id)
@@ -934,6 +932,9 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio
934932
wbc->wb_tcand_bytes += bytes;
935933
else
936934
wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
935+
936+
out:
937+
css_put(css);
937938
}
938939
EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
939940

include/linux/memcontrol.h

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ static inline bool folio_memcg_kmem(struct folio *folio);
382382
* a valid memcg, but can be atomically swapped to the parent memcg.
383383
*
384384
* The caller must ensure that the returned memcg won't be released.
385+
* e.g. acquire the rcu_read_lock or objcg_lock or cgroup_mutex.
385386
*/
386387
static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
387388
{
@@ -449,8 +450,8 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
449450
* - exclusive reference
450451
* - mem_cgroup_trylock_pages()
451452
*
452-
* For a kmem folio a caller should hold an rcu read lock to protect memcg
453-
* associated with a kmem folio from being released.
453+
* Note: The caller should hold an rcu read lock to protect memcg associated
454+
* with a folio from being released.
454455
*/
455456
static inline struct mem_cgroup *folio_memcg(struct folio *folio)
456457
{
@@ -472,7 +473,37 @@ static inline bool folio_memcg_charged(struct folio *folio)
472473
return __folio_memcg(folio) != NULL;
473474
}
474475

475-
/**
476+
/*
477+
* get_mem_cgroup_from_folio - Obtain a reference on the memory cgroup
478+
* associated with a folio.
479+
* @folio: Pointer to the folio.
480+
*
481+
* Returns a pointer to the memory cgroup (and obtain a reference on it)
482+
* associated with the folio, or NULL. This function assumes that the
483+
* folio is known to have a proper memory cgroup pointer. It's not safe
484+
* to call this function against some type of pages, e.g. slab pages or
485+
* ex-slab pages.
486+
*/
487+
static inline struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio)
488+
{
489+
struct mem_cgroup *memcg;
490+
491+
rcu_read_lock();
492+
retry:
493+
memcg = folio_memcg(folio);
494+
if (unlikely(memcg && !css_tryget(&memcg->css)))
495+
goto retry;
496+
rcu_read_unlock();
497+
498+
return memcg;
499+
}
500+
501+
static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
502+
{
503+
return get_mem_cgroup_from_folio(page_folio(page));
504+
}
505+
506+
/*
476507
* folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
477508
* @folio: Pointer to the folio.
478509
*
@@ -926,7 +957,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
926957
return match;
927958
}
928959

929-
struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio);
960+
struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio);
930961
ino_t page_cgroup_ino(struct page *page);
931962

932963
static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
@@ -1030,10 +1061,13 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
10301061
static inline void count_memcg_folio_events(struct folio *folio,
10311062
enum vm_event_item idx, unsigned long nr)
10321063
{
1033-
struct mem_cgroup *memcg = folio_memcg(folio);
1064+
struct mem_cgroup *memcg;
10341065

1066+
rcu_read_lock();
1067+
memcg = folio_memcg(folio);
10351068
if (memcg)
10361069
count_memcg_events(memcg, idx, nr);
1070+
rcu_read_unlock();
10371071
}
10381072

10391073
static inline void count_memcg_events_mm(struct mm_struct *mm,
@@ -1108,6 +1142,16 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
11081142
return NULL;
11091143
}
11101144

1145+
static inline struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio)
1146+
{
1147+
return NULL;
1148+
}
1149+
1150+
static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
1151+
{
1152+
return NULL;
1153+
}
1154+
11111155
static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
11121156
{
11131157
WARN_ON_ONCE(!rcu_read_lock_held());

include/trace/events/writeback.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,11 @@ TRACE_EVENT(track_foreign_dirty,
266266
__entry->ino = inode ? inode->i_ino : 0;
267267
__entry->memcg_id = wb->memcg_css->id;
268268
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
269+
/*
270+
* TP_fast_assign() is under preemption disabled which can
271+
* serve as an RCU read-side critical section so that the
272+
* memcg returned by folio_memcg() cannot be freed.
273+
*/
269274
__entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
270275
),
271276

mm/memcontrol.c

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key);
230230
EXPORT_SYMBOL(memcg_bpf_enabled_key);
231231

232232
/**
233-
* mem_cgroup_css_from_folio - css of the memcg associated with a folio
233+
* get_mem_cgroup_css_from_folio - css of the memcg associated with a folio
234234
* @folio: folio of interest
235235
*
236236
* If memcg is bound to the default hierarchy, css of the memcg associated
@@ -240,11 +240,15 @@ EXPORT_SYMBOL(memcg_bpf_enabled_key);
240240
* If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup
241241
* is returned.
242242
*/
243-
struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)
243+
struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio)
244244
{
245-
struct mem_cgroup *memcg = folio_memcg(folio);
245+
struct mem_cgroup *memcg;
246+
247+
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
248+
return &root_mem_cgroup->css;
246249

247-
if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
250+
memcg = get_mem_cgroup_from_folio(folio);
251+
if (!memcg)
248252
memcg = root_mem_cgroup;
249253

250254
return &memcg->css;
@@ -963,24 +967,6 @@ struct mem_cgroup *get_mem_cgroup_from_current(void)
963967
return memcg;
964968
}
965969

966-
/**
967-
* get_mem_cgroup_from_folio - Obtain a reference on a given folio's memcg.
968-
* @folio: folio from which memcg should be extracted.
969-
*/
970-
struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio)
971-
{
972-
struct mem_cgroup *memcg = folio_memcg(folio);
973-
974-
if (mem_cgroup_disabled())
975-
return NULL;
976-
977-
rcu_read_lock();
978-
if (!memcg || WARN_ON_ONCE(!css_tryget(&memcg->css)))
979-
memcg = root_mem_cgroup;
980-
rcu_read_unlock();
981-
return memcg;
982-
}
983-
984970
/**
985971
* mem_cgroup_iter - iterate over memory cgroup hierarchy
986972
* @root: hierarchy root
@@ -3054,6 +3040,7 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
30543040
void split_page_memcg(struct page *head, int old_order, int new_order)
30553041
{
30563042
struct folio *folio = page_folio(head);
3043+
struct mem_cgroup *memcg = get_mem_cgroup_from_folio(folio);
30573044
int i;
30583045
unsigned int old_nr = 1 << old_order;
30593046
unsigned int new_nr = 1 << new_order;
@@ -3067,7 +3054,9 @@ void split_page_memcg(struct page *head, int old_order, int new_order)
30673054
if (folio_memcg_kmem(folio))
30683055
obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1);
30693056
else
3070-
css_get_many(&folio_memcg(folio)->css, old_nr / new_nr - 1);
3057+
css_get_many(&memcg->css, old_nr / new_nr - 1);
3058+
3059+
css_put(&memcg->css);
30713060
}
30723061

30733062
unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
@@ -3237,7 +3226,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
32373226
void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
32383227
struct bdi_writeback *wb)
32393228
{
3240-
struct mem_cgroup *memcg = folio_memcg(folio);
3229+
struct mem_cgroup *memcg = get_mem_cgroup_from_folio(folio);
32413230
struct memcg_cgwb_frn *frn;
32423231
u64 now = get_jiffies_64();
32433232
u64 oldest_at = now;
@@ -3284,6 +3273,7 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
32843273
frn->memcg_id = wb->memcg_css->id;
32853274
frn->at = now;
32863275
}
3276+
css_put(&memcg->css);
32873277
}
32883278

32893279
/* issue foreign writeback flushes for recorded foreign dirtying events */
@@ -4759,7 +4749,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
47594749
if (folio_memcg_charged(new))
47604750
return;
47614751

4762-
memcg = folio_memcg(old);
4752+
memcg = get_mem_cgroup_from_folio(old);
47634753
VM_WARN_ON_ONCE_FOLIO(!memcg, old);
47644754
if (!memcg)
47654755
return;
@@ -4774,6 +4764,7 @@ void mem_cgroup_replace_folio(struct folio *old, struct folio *new)
47744764
css_get(&memcg->css);
47754765
commit_charge(new, memcg);
47764766
memcg1_commit_charge(new, memcg);
4767+
css_put(&memcg->css); //for get_mem_cgroup_from_folio
47774768
}
47784769

47794770
/**
@@ -4800,7 +4791,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
48004791
if (mem_cgroup_disabled())
48014792
return;
48024793

4803-
memcg = folio_memcg(old);
4794+
memcg = get_mem_cgroup_from_folio(old);
48044795
/*
48054796
* Note that it is normal to see !memcg for a hugetlb folio.
48064797
* For e.g, itt could have been allocated when memory_hugetlb_accounting
@@ -4816,6 +4807,7 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new)
48164807
/* Warning should never happen, so don't worry about refcount non-0 */
48174808
WARN_ON_ONCE(folio_unqueue_deferred_split(old));
48184809
old->memcg_data = 0;
4810+
css_put(&memcg->css);
48194811
}
48204812

48214813
DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
@@ -4978,6 +4970,10 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
49784970
if (!do_memsw_account())
49794971
return;
49804972

4973+
/*
4974+
* Interrupts should be disabled by the caller (see the comments below),
4975+
* which can serve as RCU read-side critical sections.
4976+
*/
49814977
memcg = folio_memcg(folio);
49824978

49834979
VM_WARN_ON_ONCE_FOLIO(!memcg, folio);
@@ -5030,19 +5026,21 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
50305026
struct page_counter *counter;
50315027
struct mem_cgroup *memcg;
50325028
unsigned short oldid;
5029+
int ret = 0;
50335030

50345031
if (do_memsw_account())
50355032
return 0;
50365033

5034+
rcu_read_lock();
50375035
memcg = folio_memcg(folio);
50385036

50395037
VM_WARN_ON_ONCE_FOLIO(!memcg, folio);
50405038
if (!memcg)
5041-
return 0;
5039+
goto out;
50425040

50435041
if (!entry.val) {
50445042
memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
5045-
return 0;
5043+
goto out;
50465044
}
50475045

50485046
memcg = mem_cgroup_id_get_online(memcg);
@@ -5052,7 +5050,8 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
50525050
memcg_memory_event(memcg, MEMCG_SWAP_MAX);
50535051
memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
50545052
mem_cgroup_id_put(memcg);
5055-
return -ENOMEM;
5053+
ret = -ENOMEM;
5054+
goto out;
50565055
}
50575056

50585057
/* Get references for the tail pages, too */
@@ -5061,8 +5060,10 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
50615060
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages);
50625061
VM_BUG_ON_FOLIO(oldid, folio);
50635062
mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
5063+
out:
5064+
rcu_read_unlock();
50645065

5065-
return 0;
5066+
return ret;
50665067
}
50675068

50685069
/**
@@ -5107,6 +5108,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
51075108
bool mem_cgroup_swap_full(struct folio *folio)
51085109
{
51095110
struct mem_cgroup *memcg;
5111+
bool ret = false;
51105112

51115113
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
51125114

@@ -5115,19 +5117,24 @@ bool mem_cgroup_swap_full(struct folio *folio)
51155117
if (do_memsw_account())
51165118
return false;
51175119

5120+
rcu_read_lock();
51185121
memcg = folio_memcg(folio);
51195122
if (!memcg)
5120-
return false;
5123+
goto out;
51215124

51225125
for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
51235126
unsigned long usage = page_counter_read(&memcg->swap);
51245127

51255128
if (usage * 2 >= READ_ONCE(memcg->swap.high) ||
5126-
usage * 2 >= READ_ONCE(memcg->swap.max))
5127-
return true;
5129+
usage * 2 >= READ_ONCE(memcg->swap.max)) {
5130+
ret = true;
5131+
goto out;
5132+
}
51285133
}
5134+
out:
5135+
rcu_read_unlock();
51295136

5130-
return false;
5137+
return ret;
51315138
}
51325139

51335140
static int __init setup_swap_account(char *s)

0 commit comments

Comments
 (0)