Skip to content

Commit 2864f3d

Browse files
Barry Songakpm00
authored andcommitted
mm: madvise: pageout: ignore references rather than clearing young
While doing MADV_PAGEOUT, the current code will clear PTE young so that vmscan won't read young flags to allow the reclamation of madvised folios to go ahead. It seems we can do it by directly ignoring references, thus we can remove tlb flush in madvise and rmap overhead in vmscan. Regarding the side effect, in the original code, if a parallel thread runs side by side to access the madvised memory with the thread doing madvise, folios will get a chance to be re-activated by vmscan (though the time gap is actually quite small since checking PTEs is done immediately after clearing PTEs young). But with this patch, they will still be reclaimed. But this behaviour doing PAGEOUT and doing access at the same time is quite silly like DoS. So probably, we don't need to care. Or ignoring the new access during the quite small time gap is even better. For DAMON's DAMOS_PAGEOUT based on physical address region, we still keep its behaviour as is since a physical address might be mapped by multiple processes. MADV_PAGEOUT based on virtual address is actually much more aggressive on reclamation. To untouch paddr's DAMOS_PAGEOUT, we simply pass ignore_references as false in reclaim_pages(). A microbench as below has shown 6% decrement on the latency of MADV_PAGEOUT, #define PGSIZE 4096 main() { int i; #define SIZE 512*1024*1024 volatile long *p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); for (i = 0; i < SIZE/sizeof(long); i += PGSIZE / sizeof(long)) p[i] = 0x11; madvise(p, SIZE, MADV_PAGEOUT); } w/o patch w/ patch root@10:~# time ./a.out root@10:~# time ./a.out real 0m49.634s real 0m46.334s user 0m0.637s user 0m0.648s sys 0m47.434s sys 0m44.265s Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Barry Song <[email protected]> Acked-by: Minchan Kim <[email protected]> Cc: SeongJae Park <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Johannes Weiner <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 94c18d5 commit 2864f3d

File tree

4 files changed

+13
-11
lines changed

4 files changed

+13
-11
lines changed

mm/damon/paddr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s)
249249
put_folio:
250250
folio_put(folio);
251251
}
252-
applied = reclaim_pages(&folio_list);
252+
applied = reclaim_pages(&folio_list, false);
253253
cond_resched();
254254
return applied * PAGE_SIZE;
255255
}

mm/internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,7 @@ extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long,
869869
unsigned long, unsigned long);
870870

871871
extern void set_pageblock_order(void);
872-
unsigned long reclaim_pages(struct list_head *folio_list);
872+
unsigned long reclaim_pages(struct list_head *folio_list, bool ignore_references);
873873
unsigned int reclaim_clean_pages_from_list(struct zone *zone,
874874
struct list_head *folio_list);
875875
/* The ALLOC_WMARK bits are used as an index to zone->watermark */

mm/madvise.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
386386
return 0;
387387
}
388388

389-
if (pmd_young(orig_pmd)) {
389+
if (!pageout && pmd_young(orig_pmd)) {
390390
pmdp_invalidate(vma, addr, pmd);
391391
orig_pmd = pmd_mkold(orig_pmd);
392392

@@ -410,7 +410,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
410410
huge_unlock:
411411
spin_unlock(ptl);
412412
if (pageout)
413-
reclaim_pages(&folio_list);
413+
reclaim_pages(&folio_list, true);
414414
return 0;
415415
}
416416

@@ -490,7 +490,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
490490

491491
VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
492492

493-
if (pte_young(ptent)) {
493+
if (!pageout && pte_young(ptent)) {
494494
ptent = ptep_get_and_clear_full(mm, addr, pte,
495495
tlb->fullmm);
496496
ptent = pte_mkold(ptent);
@@ -524,7 +524,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
524524
pte_unmap_unlock(start_pte, ptl);
525525
}
526526
if (pageout)
527-
reclaim_pages(&folio_list);
527+
reclaim_pages(&folio_list, true);
528528
cond_resched();
529529

530530
return 0;

mm/vmscan.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2085,7 +2085,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
20852085
}
20862086

20872087
static unsigned int reclaim_folio_list(struct list_head *folio_list,
2088-
struct pglist_data *pgdat)
2088+
struct pglist_data *pgdat,
2089+
bool ignore_references)
20892090
{
20902091
struct reclaim_stat dummy_stat;
20912092
unsigned int nr_reclaimed;
@@ -2098,7 +2099,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
20982099
.no_demotion = 1,
20992100
};
21002101

2101-
nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
2102+
nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, ignore_references);
21022103
while (!list_empty(folio_list)) {
21032104
folio = lru_to_folio(folio_list);
21042105
list_del(&folio->lru);
@@ -2108,7 +2109,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
21082109
return nr_reclaimed;
21092110
}
21102111

2111-
unsigned long reclaim_pages(struct list_head *folio_list)
2112+
unsigned long reclaim_pages(struct list_head *folio_list, bool ignore_references)
21122113
{
21132114
int nid;
21142115
unsigned int nr_reclaimed = 0;
@@ -2130,11 +2131,12 @@ unsigned long reclaim_pages(struct list_head *folio_list)
21302131
continue;
21312132
}
21322133

2133-
nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid));
2134+
nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid),
2135+
ignore_references);
21342136
nid = folio_nid(lru_to_folio(folio_list));
21352137
} while (!list_empty(folio_list));
21362138

2137-
nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid));
2139+
nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid), ignore_references);
21382140

21392141
memalloc_noreclaim_restore(noreclaim_flag);
21402142

0 commit comments

Comments
 (0)