Skip to content

Commit 391655f

Browse files
yuzhaogoogleakpm00
authored andcommitted
mm: multi-gen LRU: rename lru_gen_struct to lru_gen_folio
Patch series "mm: multi-gen LRU: memcg LRU", v3. Overview ======== An memcg LRU is a per-node LRU of memcgs. It is also an LRU of LRUs, since each node and memcg combination has an LRU of folios (see mem_cgroup_lruvec()). Its goal is to improve the scalability of global reclaim, which is critical to system-wide memory overcommit in data centers. Note that memcg reclaim is currently out of scope. Its memory bloat is a pointer to each lruvec and negligible to each pglist_data. In terms of traversing memcgs during global reclaim, it improves the best-case complexity from O(n) to O(1) and does not affect the worst-case complexity O(n). Therefore, on average, it has a sublinear complexity in contrast to the current linear complexity. The basic structure of an memcg LRU can be understood by an analogy to the active/inactive LRU (of folios): 1. It has the young and the old (generations), i.e., the counterparts to the active and the inactive; 2. The increment of max_seq triggers promotion, i.e., the counterpart to activation; 3. Other events trigger similar operations, e.g., offlining an memcg triggers demotion, i.e., the counterpart to deactivation. In terms of global reclaim, it has two distinct features: 1. Sharding, which allows each thread to start at a random memcg (in the old generation) and improves parallelism; 2. Eventual fairness, which allows direct reclaim to bail out at will and reduces latency without affecting fairness over some time. The commit message in patch 6 details the workflow: https://lore.kernel.org/r/[email protected]/ The following is a simple test to quickly verify its effectiveness. Test design: 1. Create multiple memcgs. 2. Each memcg contains a job (fio). 3. All jobs access the same amount of memory randomly. 4. The system does not experience global memory pressure. 5. Periodically write to the root memory.reclaim. Desired outcome: 1. All memcgs have similar pgsteal counts, i.e., stddev(pgsteal) over mean(pgsteal) is close to 0%. 2. The total pgsteal is close to the total requested through memory.reclaim, i.e., sum(pgsteal) over sum(requested) is close to 100%. Actual outcome [1]: MGLRU off MGLRU on stddev(pgsteal) / mean(pgsteal) 75% 20% sum(pgsteal) / sum(requested) 425% 95% #################################################################### MEMCGS=128 for ((memcg = 0; memcg < $MEMCGS; memcg++)); do mkdir /sys/fs/cgroup/memcg$memcg done start() { echo $BASHPID > /sys/fs/cgroup/memcg$memcg/cgroup.procs fio -name=memcg$memcg --numjobs=1 --ioengine=mmap \ --filename=/dev/zero --size=1920M --rw=randrw \ --rate=64m,64m --random_distribution=random \ --fadvise_hint=0 --time_based --runtime=10h \ --group_reporting --minimal } for ((memcg = 0; memcg < $MEMCGS; memcg++)); do start & done sleep 600 for ((i = 0; i < 600; i++)); do echo 256m >/sys/fs/cgroup/memory.reclaim sleep 6 done for ((memcg = 0; memcg < $MEMCGS; memcg++)); do grep "pgsteal " /sys/fs/cgroup/memcg$memcg/memory.stat done #################################################################### [1]: This was obtained from running the above script (touches less than 256GB memory) on an EPYC 7B13 with 512GB DRAM for over an hour. This patch (of 8): The new name lru_gen_folio will be more distinct from the coming lru_gen_memcg. Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Yu Zhao <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: Michael Larabel <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 1468761 commit 391655f

File tree

4 files changed

+24
-24
lines changed

4 files changed

+24
-24
lines changed

include/linux/mm_inline.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli
178178
int zone = folio_zonenum(folio);
179179
int delta = folio_nr_pages(folio);
180180
enum lru_list lru = type * LRU_INACTIVE_FILE;
181-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
181+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
182182

183183
VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
184184
VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
@@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
224224
int gen = folio_lru_gen(folio);
225225
int type = folio_is_file_lru(folio);
226226
int zone = folio_zonenum(folio);
227-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
227+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
228228

229229
VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
230230

include/linux/mmzone.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ enum {
404404
* The number of pages in each generation is eventually consistent and therefore
405405
* can be transiently negative when reset_batch_size() is pending.
406406
*/
407-
struct lru_gen_struct {
407+
struct lru_gen_folio {
408408
/* the aging increments the youngest generation number */
409409
unsigned long max_seq;
410410
/* the eviction increments the oldest generation numbers */
@@ -461,7 +461,7 @@ struct lru_gen_mm_state {
461461
struct lru_gen_mm_walk {
462462
/* the lruvec under reclaim */
463463
struct lruvec *lruvec;
464-
/* unstable max_seq from lru_gen_struct */
464+
/* unstable max_seq from lru_gen_folio */
465465
unsigned long max_seq;
466466
/* the next address within an mm to scan */
467467
unsigned long next_addr;
@@ -524,7 +524,7 @@ struct lruvec {
524524
unsigned long flags;
525525
#ifdef CONFIG_LRU_GEN
526526
/* evictable pages divided into generations */
527-
struct lru_gen_struct lrugen;
527+
struct lru_gen_folio lrugen;
528528
/* to concurrently iterate lru_gen_mm_list */
529529
struct lru_gen_mm_state mm_state;
530530
#endif

mm/vmscan.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3215,7 +3215,7 @@ static int get_nr_gens(struct lruvec *lruvec, int type)
32153215

32163216
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
32173217
{
3218-
/* see the comment on lru_gen_struct */
3218+
/* see the comment on lru_gen_folio */
32193219
return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
32203220
get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
32213221
get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
@@ -3612,7 +3612,7 @@ struct ctrl_pos {
36123612
static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
36133613
struct ctrl_pos *pos)
36143614
{
3615-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
3615+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
36163616
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
36173617

36183618
pos->refaulted = lrugen->avg_refaulted[type][tier] +
@@ -3627,7 +3627,7 @@ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
36273627
static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
36283628
{
36293629
int hist, tier;
3630-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
3630+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
36313631
bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
36323632
unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
36333633

@@ -3704,7 +3704,7 @@ static int folio_update_gen(struct folio *folio, int gen)
37043704
static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
37053705
{
37063706
int type = folio_is_file_lru(folio);
3707-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
3707+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
37083708
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
37093709
unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
37103710

@@ -3749,7 +3749,7 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
37493749
static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
37503750
{
37513751
int gen, type, zone;
3752-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
3752+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
37533753

37543754
walk->batched = 0;
37553755

@@ -4263,7 +4263,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
42634263
{
42644264
int zone;
42654265
int remaining = MAX_LRU_BATCH;
4266-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
4266+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
42674267
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
42684268

42694269
if (type == LRU_GEN_ANON && !can_swap)
@@ -4299,7 +4299,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
42994299
{
43004300
int gen, type, zone;
43014301
bool success = false;
4302-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
4302+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
43034303
DEFINE_MIN_SEQ(lruvec);
43044304

43054305
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
@@ -4320,7 +4320,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
43204320
;
43214321
}
43224322

4323-
/* see the comment on lru_gen_struct */
4323+
/* see the comment on lru_gen_folio */
43244324
if (can_swap) {
43254325
min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
43264326
min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
@@ -4342,7 +4342,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
43424342
{
43434343
int prev, next;
43444344
int type, zone;
4345-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
4345+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
43464346

43474347
spin_lock_irq(&lruvec->lru_lock);
43484348

@@ -4400,7 +4400,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
44004400
bool success;
44014401
struct lru_gen_mm_walk *walk;
44024402
struct mm_struct *mm = NULL;
4403-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
4403+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
44044404

44054405
VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
44064406

@@ -4465,7 +4465,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
44654465
unsigned long old = 0;
44664466
unsigned long young = 0;
44674467
unsigned long total = 0;
4468-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
4468+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
44694469
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
44704470

44714471
for (type = !can_swap; type < ANON_AND_FILE; type++) {
@@ -4750,7 +4750,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
47504750
int delta = folio_nr_pages(folio);
47514751
int refs = folio_lru_refs(folio);
47524752
int tier = lru_tier_from_refs(refs);
4753-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
4753+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
47544754

47554755
VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
47564756

@@ -4850,7 +4850,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
48504850
int scanned = 0;
48514851
int isolated = 0;
48524852
int remaining = MAX_LRU_BATCH;
4853-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
4853+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
48544854
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
48554855

48564856
VM_WARN_ON_ONCE(!list_empty(list));
@@ -5251,7 +5251,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
52515251

52525252
static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
52535253
{
5254-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
5254+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
52555255

52565256
if (lrugen->enabled) {
52575257
enum lru_list lru;
@@ -5530,7 +5530,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
55305530
int i;
55315531
int type, tier;
55325532
int hist = lru_hist_from_seq(seq);
5533-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
5533+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
55345534

55355535
for (tier = 0; tier < MAX_NR_TIERS; tier++) {
55365536
seq_printf(m, " %10d", tier);
@@ -5580,7 +5580,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v)
55805580
unsigned long seq;
55815581
bool full = !debugfs_real_fops(m->file)->write;
55825582
struct lruvec *lruvec = v;
5583-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
5583+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
55845584
int nid = lruvec_pgdat(lruvec)->node_id;
55855585
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
55865586
DEFINE_MAX_SEQ(lruvec);
@@ -5834,7 +5834,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
58345834
{
58355835
int i;
58365836
int gen, type, zone;
5837-
struct lru_gen_struct *lrugen = &lruvec->lrugen;
5837+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
58385838

58395839
lrugen->max_seq = MIN_NR_GENS + 1;
58405840
lrugen->enabled = lru_gen_enabled();

mm/workingset.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ static void *lru_gen_eviction(struct folio *folio)
223223
unsigned long token;
224224
unsigned long min_seq;
225225
struct lruvec *lruvec;
226-
struct lru_gen_struct *lrugen;
226+
struct lru_gen_folio *lrugen;
227227
int type = folio_is_file_lru(folio);
228228
int delta = folio_nr_pages(folio);
229229
int refs = folio_lru_refs(folio);
@@ -252,7 +252,7 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
252252
unsigned long token;
253253
unsigned long min_seq;
254254
struct lruvec *lruvec;
255-
struct lru_gen_struct *lrugen;
255+
struct lru_gen_folio *lrugen;
256256
struct mem_cgroup *memcg;
257257
struct pglist_data *pgdat;
258258
int type = folio_is_file_lru(folio);

0 commit comments

Comments
 (0)