Skip to content

Commit 7348cc9

Browse files
yuzhaogoogleakpm00
authored andcommitted
mm: multi-gen LRU: remove aging fairness safeguard
Recall that the aging produces the youngest generation: first it scans for accessed folios and updates their gen counters; then it increments lrugen->max_seq. The current aging fairness safeguard for kswapd uses two passes to ensure the fairness to multiple eligible memcgs. On the first pass, which is shared with the eviction, it checks whether all eligible memcgs are low on cold folios. If so, it requires a second pass, on which it ages all those memcgs at the same time. With memcg LRU, the aging, while ensuring eventual fairness, will run when necessary. Therefore the current aging fairness safeguard for kswapd will not be needed. Note that memcg LRU only applies to global reclaim. For memcg reclaim, the aging can be unfair to different memcgs, i.e., their lrugen->max_seq can be incremented at different paces. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Yu Zhao <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: Michael Larabel <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent a579086 commit 7348cc9

File tree

1 file changed

+59
-67
lines changed

1 file changed

+59
-67
lines changed

mm/vmscan.c

Lines changed: 59 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ struct scan_control {
137137

138138
#ifdef CONFIG_LRU_GEN
139139
/* help kswapd make better choices among multiple memcgs */
140-
unsigned int memcgs_need_aging:1;
141140
unsigned long last_reclaimed;
142141
#endif
143142

@@ -4468,7 +4467,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
44684467
return true;
44694468
}
44704469

4471-
static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
4470+
static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
44724471
struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
44734472
{
44744473
int gen, type, zone;
@@ -4477,6 +4476,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
44774476
unsigned long total = 0;
44784477
struct lru_gen_folio *lrugen = &lruvec->lrugen;
44794478
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
4479+
DEFINE_MIN_SEQ(lruvec);
4480+
4481+
/* whether this lruvec is completely out of cold folios */
4482+
if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
4483+
*nr_to_scan = 0;
4484+
return true;
4485+
}
44804486

44814487
for (type = !can_swap; type < ANON_AND_FILE; type++) {
44824488
unsigned long seq;
@@ -4505,8 +4511,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
45054511
* stalls when the number of generations reaches MIN_NR_GENS. Hence, the
45064512
* ideal number of generations is MIN_NR_GENS+1.
45074513
*/
4508-
if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
4509-
return true;
45104514
if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
45114515
return false;
45124516

@@ -4525,40 +4529,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
45254529
return false;
45264530
}
45274531

4528-
static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
4532+
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
45294533
{
4530-
bool need_aging;
4531-
unsigned long nr_to_scan;
4532-
int swappiness = get_swappiness(lruvec, sc);
4534+
int gen, type, zone;
4535+
unsigned long total = 0;
4536+
bool can_swap = get_swappiness(lruvec, sc);
4537+
struct lru_gen_folio *lrugen = &lruvec->lrugen;
45334538
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
45344539
DEFINE_MAX_SEQ(lruvec);
45354540
DEFINE_MIN_SEQ(lruvec);
45364541

4537-
VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
4542+
for (type = !can_swap; type < ANON_AND_FILE; type++) {
4543+
unsigned long seq;
45384544

4539-
mem_cgroup_calculate_protection(NULL, memcg);
4545+
for (seq = min_seq[type]; seq <= max_seq; seq++) {
4546+
gen = lru_gen_from_seq(seq);
45404547

4541-
if (mem_cgroup_below_min(NULL, memcg))
4542-
return false;
4548+
for (zone = 0; zone < MAX_NR_ZONES; zone++)
4549+
total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
4550+
}
4551+
}
45434552

4544-
need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
4553+
/* whether the size is big enough to be helpful */
4554+
return mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
4555+
}
45454556

4546-
if (min_ttl) {
4547-
int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
4548-
unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
4557+
static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc,
4558+
unsigned long min_ttl)
4559+
{
4560+
int gen;
4561+
unsigned long birth;
4562+
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
4563+
DEFINE_MIN_SEQ(lruvec);
45494564

4550-
if (time_is_after_jiffies(birth + min_ttl))
4551-
return false;
4565+
VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
45524566

4553-
/* the size is likely too small to be helpful */
4554-
if (!nr_to_scan && sc->priority != DEF_PRIORITY)
4555-
return false;
4556-
}
4567+
/* see the comment on lru_gen_folio */
4568+
gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
4569+
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
45574570

4558-
if (need_aging)
4559-
try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
4571+
if (time_is_after_jiffies(birth + min_ttl))
4572+
return false;
45604573

4561-
return true;
4574+
if (!lruvec_is_sizable(lruvec, sc))
4575+
return false;
4576+
4577+
mem_cgroup_calculate_protection(NULL, memcg);
4578+
4579+
return !mem_cgroup_below_min(NULL, memcg);
45624580
}
45634581

45644582
/* to protect the working set of the last N jiffies */
@@ -4567,46 +4585,32 @@ static unsigned long lru_gen_min_ttl __read_mostly;
45674585
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
45684586
{
45694587
struct mem_cgroup *memcg;
4570-
bool success = false;
45714588
unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
45724589

45734590
VM_WARN_ON_ONCE(!current_is_kswapd());
45744591

45754592
sc->last_reclaimed = sc->nr_reclaimed;
45764593

4577-
/*
4578-
* To reduce the chance of going into the aging path, which can be
4579-
* costly, optimistically skip it if the flag below was cleared in the
4580-
* eviction path. This improves the overall performance when multiple
4581-
* memcgs are available.
4582-
*/
4583-
if (!sc->memcgs_need_aging) {
4584-
sc->memcgs_need_aging = true;
4594+
/* check the order to exclude compaction-induced reclaim */
4595+
if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
45854596
return;
4586-
}
4587-
4588-
set_mm_walk(pgdat);
45894597

45904598
memcg = mem_cgroup_iter(NULL, NULL, NULL);
45914599
do {
45924600
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
45934601

4594-
if (age_lruvec(lruvec, sc, min_ttl))
4595-
success = true;
4602+
if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
4603+
mem_cgroup_iter_break(NULL, memcg);
4604+
return;
4605+
}
45964606

45974607
cond_resched();
45984608
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
45994609

4600-
clear_mm_walk();
4601-
4602-
/* check the order to exclude compaction-induced reclaim */
4603-
if (success || !min_ttl || sc->order)
4604-
return;
4605-
46064610
/*
46074611
* The main goal is to OOM kill if every generation from all memcgs is
46084612
* younger than min_ttl. However, another possibility is all memcgs are
4609-
* either below min or empty.
4613+
* either too small or below min.
46104614
*/
46114615
if (mutex_trylock(&oom_lock)) {
46124616
struct oom_control oc = {
@@ -5114,34 +5118,28 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
51145118
* reclaim.
51155119
*/
51165120
static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
5117-
bool can_swap, bool *need_aging)
5121+
bool can_swap)
51185122
{
51195123
unsigned long nr_to_scan;
51205124
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
51215125
DEFINE_MAX_SEQ(lruvec);
5122-
DEFINE_MIN_SEQ(lruvec);
51235126

51245127
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) ||
51255128
(mem_cgroup_below_low(sc->target_mem_cgroup, memcg) &&
51265129
!sc->memcg_low_reclaim))
51275130
return 0;
51285131

5129-
*need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
5130-
if (!*need_aging)
5132+
if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
51315133
return nr_to_scan;
51325134

51335135
/* skip the aging path at the default priority */
51345136
if (sc->priority == DEF_PRIORITY)
5135-
goto done;
5137+
return nr_to_scan;
51365138

5137-
/* leave the work to lru_gen_age_node() */
5138-
if (current_is_kswapd())
5139-
return 0;
5139+
try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
51405140

5141-
if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
5142-
return nr_to_scan;
5143-
done:
5144-
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
5141+
/* skip this lruvec as it's low on cold folios */
5142+
return 0;
51455143
}
51465144

51475145
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
@@ -5160,9 +5158,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
51605158
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
51615159
{
51625160
struct blk_plug plug;
5163-
bool need_aging = false;
51645161
unsigned long scanned = 0;
5165-
unsigned long reclaimed = sc->nr_reclaimed;
51665162
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
51675163

51685164
lru_add_drain();
@@ -5183,13 +5179,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
51835179
else
51845180
swappiness = 0;
51855181

5186-
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
5182+
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
51875183
if (!nr_to_scan)
5188-
goto done;
5184+
break;
51895185

51905186
delta = evict_folios(lruvec, sc, swappiness);
51915187
if (!delta)
5192-
goto done;
5188+
break;
51935189

51945190
scanned += delta;
51955191
if (scanned >= nr_to_scan)
@@ -5201,10 +5197,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
52015197
cond_resched();
52025198
}
52035199

5204-
/* see the comment in lru_gen_age_node() */
5205-
if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
5206-
sc->memcgs_need_aging = false;
5207-
done:
52085200
clear_mm_walk();
52095201

52105202
blk_finish_plug(&plug);

0 commit comments

Comments
 (0)