Skip to content

Commit f8d1a31

Browse files
gormanmtorvalds
authored andcommitted
mm: consider whether to decivate based on eligible zones inactive ratio
Minchan Kim reported that with per-zone lru state it was possible to identify that a normal zone with 8^M anonymous pages could trigger OOM with non-atomic order-0 allocations as all pages in the zone were in the active list. gfp_mask=0x26004c0(GFP_KERNEL|__GFP_REPEAT|__GFP_NOTRACK), order=0 Call Trace: __alloc_pages_nodemask+0xe52/0xe60 ? new_slab+0x39c/0x3b0 new_slab+0x39c/0x3b0 ___slab_alloc.constprop.87+0x6da/0x840 ? __alloc_skb+0x3c/0x260 ? enqueue_task_fair+0x73/0xbf0 ? poll_select_copy_remaining+0x140/0x140 __slab_alloc.isra.81.constprop.86+0x40/0x6d ? __alloc_skb+0x3c/0x260 kmem_cache_alloc+0x22c/0x260 ? __alloc_skb+0x3c/0x260 __alloc_skb+0x3c/0x260 alloc_skb_with_frags+0x4e/0x1a0 sock_alloc_send_pskb+0x16a/0x1b0 ? wait_for_unix_gc+0x31/0x90 unix_stream_sendmsg+0x28d/0x340 sock_sendmsg+0x2d/0x40 sock_write_iter+0x6c/0xc0 __vfs_write+0xc0/0x120 vfs_write+0x9b/0x1a0 ? __might_fault+0x49/0xa0 SyS_write+0x44/0x90 do_fast_syscall_32+0xa6/0x1e0 Mem-Info: active_anon:101103 inactive_anon:102219 isolated_anon:0 active_file:503 inactive_file:544 isolated_file:0 unevictable:0 dirty:0 writeback:34 unstable:0 slab_reclaimable:6298 slab_unreclaimable:74669 mapped:863 shmem:0 pagetables:100998 bounce:0 free:23573 free_pcp:1861 free_cma:0 Node 0 active_anon:404412kB inactive_anon:409040kB active_file:2012kB inactive_file:2176kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:3452kB dirty:0kB writeback:136kB shmem:0kB writeback_tmp:0kB unstable:0kB pages_scanned:1320845 all_unreclaimable? yes DMA free:3296kB min:68kB low:84kB high:100kB active_anon:5540kB inactive_anon:0kB active_file:0kB inactive_file:0kB present:15992kB managed:15916kB mlocked:0kB slab_reclaimable:248kB slab_unreclaimable:2628kB kernel_stack:792kB pagetables:2316kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB lowmem_reserve[]: 0 809 1965 1965 Normal free:3600kB min:3604kB low:4504kB high:5404kB active_anon:86304kB inactive_anon:0kB active_file:160kB inactive_file:376kB present:897016kB managed:858524kB mlocked:0kB slab_reclaimable:24944kB slab_unreclaimable:296048kB kernel_stack:163832kB pagetables:35892kB bounce:0kB free_pcp:3076kB local_pcp:656kB free_cma:0kB lowmem_reserve[]: 0 0 9247 9247 HighMem free:86156kB min:512kB low:1796kB high:3080kB active_anon:312852kB inactive_anon:410024kB active_file:1924kB inactive_file:2012kB present:1183736kB managed:1183736kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:365784kB bounce:0kB free_pcp:3868kB local_pcp:720kB free_cma:0kB lowmem_reserve[]: 0 0 0 0 DMA: 8*4kB (UM) 8*8kB (UM) 4*16kB (M) 2*32kB (UM) 2*64kB (UM) 1*128kB (M) 3*256kB (UME) 2*512kB (UE) 1*1024kB (E) 0*2048kB 0*4096kB = 3296kB Normal: 240*4kB (UME) 160*8kB (UME) 23*16kB (ME) 3*32kB (UE) 3*64kB (UME) 2*128kB (ME) 1*256kB (U) 0*512kB 0*1024kB 0*2048kB 0*4096kB = 3408kB HighMem: 10942*4kB (UM) 3102*8kB (UM) 866*16kB (UM) 76*32kB (UM) 11*64kB (UM) 4*128kB (UM) 1*256kB (M) 0*512kB 0*1024kB 0*2048kB 0*4096kB = 86344kB Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB 54409 total pagecache pages 53215 pages in swap cache Swap cache stats: add 300982, delete 247765, find 157978/226539 Free swap = 3803244kB Total swap = 4192252kB 524186 pages RAM 295934 pages HighMem/MovableOnly 9642 pages reserved 0 pages cma reserved The problem is due to the active deactivation logic in inactive_list_is_low: Node 0 active_anon:404412kB inactive_anon:409040kB IOW, (inactive_anon of node * inactive_ratio > active_anon of node) due to highmem anonymous stat so VM never deactivates normal zone's anonymous pages. This patch is a modified version of Minchan's original solution but based upon it. The problem with Minchan's patch is that any low zone with an imbalanced list could force a rotation. In this patch, a zone-constrained global reclaim will rotate the list if the inactive/active ratio of all eligible zones needs to be corrected. It is possible that higher zone pages will be initially rotated prematurely but this is the safer choice to maintain overall LRU age. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Minchan Kim <[email protected]> Signed-off-by: Mel Gorman <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Vlastimil Babka <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 5a1c84b commit f8d1a31

File tree

1 file changed

+29
-5
lines changed

1 file changed

+29
-5
lines changed

mm/vmscan.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,12 +1964,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
19641964
* 1TB 101 10GB
19651965
* 10TB 320 32GB
19661966
*/
1967-
static bool inactive_list_is_low(struct lruvec *lruvec, bool file)
1967+
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
1968+
struct scan_control *sc)
19681969
{
19691970
unsigned long inactive_ratio;
19701971
unsigned long inactive;
19711972
unsigned long active;
19721973
unsigned long gb;
1974+
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
1975+
int zid;
19731976

19741977
/*
19751978
* If we don't have swap space, anonymous page deactivation
@@ -1981,6 +1984,27 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file)
19811984
inactive = lruvec_lru_size(lruvec, file * LRU_FILE);
19821985
active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE);
19831986

1987+
/*
1988+
* For zone-constrained allocations, it is necessary to check if
1989+
* deactivations are required for lowmem to be reclaimed. This
1990+
* calculates the inactive/active pages available in eligible zones.
1991+
*/
1992+
for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) {
1993+
struct zone *zone = &pgdat->node_zones[zid];
1994+
unsigned long inactive_zone, active_zone;
1995+
1996+
if (!populated_zone(zone))
1997+
continue;
1998+
1999+
inactive_zone = zone_page_state(zone,
2000+
NR_ZONE_LRU_BASE + (file * LRU_FILE));
2001+
active_zone = zone_page_state(zone,
2002+
NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE);
2003+
2004+
inactive -= min(inactive, inactive_zone);
2005+
active -= min(active, active_zone);
2006+
}
2007+
19842008
gb = (inactive + active) >> (30 - PAGE_SHIFT);
19852009
if (gb)
19862010
inactive_ratio = int_sqrt(10 * gb);
@@ -1994,7 +2018,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
19942018
struct lruvec *lruvec, struct scan_control *sc)
19952019
{
19962020
if (is_active_lru(lru)) {
1997-
if (inactive_list_is_low(lruvec, is_file_lru(lru)))
2021+
if (inactive_list_is_low(lruvec, is_file_lru(lru), sc))
19982022
shrink_active_list(nr_to_scan, lruvec, sc, lru);
19992023
return 0;
20002024
}
@@ -2125,7 +2149,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
21252149
* lruvec even if it has plenty of old anonymous pages unless the
21262150
* system is under heavy pressure.
21272151
*/
2128-
if (!inactive_list_is_low(lruvec, true) &&
2152+
if (!inactive_list_is_low(lruvec, true, sc) &&
21292153
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) {
21302154
scan_balance = SCAN_FILE;
21312155
goto out;
@@ -2367,7 +2391,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
23672391
* Even if we did not try to evict anon pages at all, we want to
23682392
* rebalance the anon lru active/inactive ratio.
23692393
*/
2370-
if (inactive_list_is_low(lruvec, false))
2394+
if (inactive_list_is_low(lruvec, false, sc))
23712395
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
23722396
sc, LRU_ACTIVE_ANON);
23732397

@@ -3020,7 +3044,7 @@ static void age_active_anon(struct pglist_data *pgdat,
30203044
do {
30213045
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
30223046

3023-
if (inactive_list_is_low(lruvec, false))
3047+
if (inactive_list_is_low(lruvec, false, sc))
30243048
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
30253049
sc, LRU_ACTIVE_ANON);
30263050

0 commit comments

Comments
 (0)