Skip to content

Commit 574823b

Browse files
committed
Change mincore() to count "mapped" pages rather than "cached" pages
The semantics of what "in core" means for the mincore() system call are somewhat unclear, but Linux has always (since 2.3.52, which is when mincore() was initially done) treated it as "page is available in page cache" rather than "page is mapped in the mapping". The problem with that traditional semantic is that it exposes a lot of system cache state that it really probably shouldn't, and that users shouldn't really even care about. So let's try to avoid that information leak by simply changing the semantics to be that mincore() counts actual mapped pages, not pages that might be cheaply mapped if they were faulted (note the "might be" part of the old semantics: being in the cache doesn't actually guarantee that you can access them without IO anyway, since things like network filesystems may have to revalidate the cache before use). In many ways the old semantics were somewhat insane even aside from the information leak issue. From the very beginning (and that beginning is a long time ago: 2.3.52 was released in March 2000, I think), the code had a comment saying Later we can get more picky about what "in core" means precisely. and this is that "later". Admittedly it is much later than is really comfortable. NOTE! This is a real semantic change, and it is for example known to change the output of "fincore", since that program literally does a mmmap without populating it, and then doing "mincore()" on that mapping that doesn't actually have any pages in it. I'm hoping that nobody actually has any workflow that cares, and the info leak is real. We may have to do something different if it turns out that people have valid reasons to want the old semantics, and if we can limit the information leak sanely. Cc: Kevin Easton <[email protected]> Cc: Jiri Kosina <[email protected]> Cc: Masatake YAMATO <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Greg KH <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Michal Hocko <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 94bd8a0 commit 574823b

File tree

1 file changed

+13
-81
lines changed

1 file changed

+13
-81
lines changed

mm/mincore.c

Lines changed: 13 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -42,72 +42,14 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
4242
return 0;
4343
}
4444

45-
/*
46-
* Later we can get more picky about what "in core" means precisely.
47-
* For now, simply check to see if the page is in the page cache,
48-
* and is up to date; i.e. that no page-in operation would be required
49-
* at this time if an application were to map and access this page.
50-
*/
51-
static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
52-
{
53-
unsigned char present = 0;
54-
struct page *page;
55-
56-
/*
57-
* When tmpfs swaps out a page from a file, any process mapping that
58-
* file will not get a swp_entry_t in its pte, but rather it is like
59-
* any other file mapping (ie. marked !present and faulted in with
60-
* tmpfs's .fault). So swapped out tmpfs mappings are tested here.
61-
*/
62-
#ifdef CONFIG_SWAP
63-
if (shmem_mapping(mapping)) {
64-
page = find_get_entry(mapping, pgoff);
65-
/*
66-
* shmem/tmpfs may return swap: account for swapcache
67-
* page too.
68-
*/
69-
if (xa_is_value(page)) {
70-
swp_entry_t swp = radix_to_swp_entry(page);
71-
page = find_get_page(swap_address_space(swp),
72-
swp_offset(swp));
73-
}
74-
} else
75-
page = find_get_page(mapping, pgoff);
76-
#else
77-
page = find_get_page(mapping, pgoff);
78-
#endif
79-
if (page) {
80-
present = PageUptodate(page);
81-
put_page(page);
82-
}
83-
84-
return present;
85-
}
86-
87-
static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
88-
struct vm_area_struct *vma, unsigned char *vec)
89-
{
90-
unsigned long nr = (end - addr) >> PAGE_SHIFT;
91-
int i;
92-
93-
if (vma->vm_file) {
94-
pgoff_t pgoff;
95-
96-
pgoff = linear_page_index(vma, addr);
97-
for (i = 0; i < nr; i++, pgoff++)
98-
vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
99-
} else {
100-
for (i = 0; i < nr; i++)
101-
vec[i] = 0;
102-
}
103-
return nr;
104-
}
105-
10645
static int mincore_unmapped_range(unsigned long addr, unsigned long end,
10746
struct mm_walk *walk)
10847
{
109-
walk->private += __mincore_unmapped_range(addr, end,
110-
walk->vma, walk->private);
48+
unsigned char *vec = walk->private;
49+
unsigned long nr = (end - addr) >> PAGE_SHIFT;
50+
51+
memset(vec, 0, nr);
52+
walk->private += nr;
11153
return 0;
11254
}
11355

@@ -127,8 +69,9 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
12769
goto out;
12870
}
12971

72+
/* We'll consider a THP page under construction to be there */
13073
if (pmd_trans_unstable(pmd)) {
131-
__mincore_unmapped_range(addr, end, vma, vec);
74+
memset(vec, 1, nr);
13275
goto out;
13376
}
13477

@@ -137,28 +80,17 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
13780
pte_t pte = *ptep;
13881

13982
if (pte_none(pte))
140-
__mincore_unmapped_range(addr, addr + PAGE_SIZE,
141-
vma, vec);
83+
*vec = 0;
14284
else if (pte_present(pte))
14385
*vec = 1;
14486
else { /* pte is a swap entry */
14587
swp_entry_t entry = pte_to_swp_entry(pte);
14688

147-
if (non_swap_entry(entry)) {
148-
/*
149-
* migration or hwpoison entries are always
150-
* uptodate
151-
*/
152-
*vec = 1;
153-
} else {
154-
#ifdef CONFIG_SWAP
155-
*vec = mincore_page(swap_address_space(entry),
156-
swp_offset(entry));
157-
#else
158-
WARN_ON(1);
159-
*vec = 1;
160-
#endif
161-
}
89+
/*
90+
* migration or hwpoison entries are always
91+
* uptodate
92+
*/
93+
*vec = !!non_swap_entry(entry);
16294
}
16395
vec++;
16496
}

0 commit comments

Comments
 (0)