Skip to content

Commit 0d665e7

Browse files
kiryltorvalds
authored andcommitted
mm, page_vma_mapped: Drop faulty pointer arithmetics in check_pte()
Tetsuo reported random crashes under memory pressure on 32-bit x86 system and tracked down to change that introduced page_vma_mapped_walk(). The root cause of the issue is the faulty pointer math in check_pte(). As ->pte may point to an arbitrary page we have to check that they are belong to the section before doing math. Otherwise it may lead to weird results. It wasn't noticed until now as mem_map[] is virtually contiguous on flatmem or vmemmap sparsemem. Pointer arithmetic just works against all 'struct page' pointers. But with classic sparsemem, it doesn't because each section memap is allocated separately and so consecutive pfns crossing two sections might have struct pages at completely unrelated addresses. Let's restructure code a bit and replace pointer arithmetic with operations on pfns. Signed-off-by: Kirill A. Shutemov <[email protected]> Reported-and-tested-by: Tetsuo Handa <[email protected]> Acked-by: Michal Hocko <[email protected]> Fixes: ace71a1 ("mm: introduce page_vma_mapped_walk()") Cc: [email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 0c5b9b5 commit 0d665e7

File tree

2 files changed

+59
-25
lines changed

2 files changed

+59
-25
lines changed

include/linux/swapops.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
124124
return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
125125
}
126126

127+
static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
128+
{
129+
return swp_offset(entry);
130+
}
131+
127132
static inline struct page *device_private_entry_to_page(swp_entry_t entry)
128133
{
129134
return pfn_to_page(swp_offset(entry));
@@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
154159
return false;
155160
}
156161

162+
static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
163+
{
164+
return 0;
165+
}
166+
157167
static inline struct page *device_private_entry_to_page(swp_entry_t entry)
158168
{
159169
return NULL;
@@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry)
189199
return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
190200
}
191201

202+
static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
203+
{
204+
return swp_offset(entry);
205+
}
206+
192207
static inline struct page *migration_entry_to_page(swp_entry_t entry)
193208
{
194209
struct page *p = pfn_to_page(swp_offset(entry));
@@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp)
218233
{
219234
return 0;
220235
}
236+
237+
static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
238+
{
239+
return 0;
240+
}
241+
221242
static inline struct page *migration_entry_to_page(swp_entry_t entry)
222243
{
223244
return NULL;

mm/page_vma_mapped.c

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,48 +30,61 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
3030
return true;
3131
}
3232

33+
/**
34+
* check_pte - check if @pvmw->page is mapped at the @pvmw->pte
35+
*
36+
* page_vma_mapped_walk() found a place where @pvmw->page is *potentially*
37+
* mapped. check_pte() has to validate this.
38+
*
39+
* @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary
40+
* page.
41+
*
42+
* If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
43+
* entry that points to @pvmw->page or any subpage in case of THP.
44+
*
45+
* If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to
46+
* @pvmw->page or any subpage in case of THP.
47+
*
48+
* Otherwise, return false.
49+
*
50+
*/
3351
static bool check_pte(struct page_vma_mapped_walk *pvmw)
3452
{
53+
unsigned long pfn;
54+
3555
if (pvmw->flags & PVMW_MIGRATION) {
36-
#ifdef CONFIG_MIGRATION
3756
swp_entry_t entry;
3857
if (!is_swap_pte(*pvmw->pte))
3958
return false;
4059
entry = pte_to_swp_entry(*pvmw->pte);
4160

4261
if (!is_migration_entry(entry))
4362
return false;
44-
if (migration_entry_to_page(entry) - pvmw->page >=
45-
hpage_nr_pages(pvmw->page)) {
46-
return false;
47-
}
48-
if (migration_entry_to_page(entry) < pvmw->page)
49-
return false;
50-
#else
51-
WARN_ON_ONCE(1);
52-
#endif
53-
} else {
54-
if (is_swap_pte(*pvmw->pte)) {
55-
swp_entry_t entry;
5663

57-
entry = pte_to_swp_entry(*pvmw->pte);
58-
if (is_device_private_entry(entry) &&
59-
device_private_entry_to_page(entry) == pvmw->page)
60-
return true;
61-
}
64+
pfn = migration_entry_to_pfn(entry);
65+
} else if (is_swap_pte(*pvmw->pte)) {
66+
swp_entry_t entry;
6267

63-
if (!pte_present(*pvmw->pte))
68+
/* Handle un-addressable ZONE_DEVICE memory */
69+
entry = pte_to_swp_entry(*pvmw->pte);
70+
if (!is_device_private_entry(entry))
6471
return false;
6572

66-
/* THP can be referenced by any subpage */
67-
if (pte_page(*pvmw->pte) - pvmw->page >=
68-
hpage_nr_pages(pvmw->page)) {
69-
return false;
70-
}
71-
if (pte_page(*pvmw->pte) < pvmw->page)
73+
pfn = device_private_entry_to_pfn(entry);
74+
} else {
75+
if (!pte_present(*pvmw->pte))
7276
return false;
77+
78+
pfn = pte_pfn(*pvmw->pte);
7379
}
7480

81+
if (pfn < page_to_pfn(pvmw->page))
82+
return false;
83+
84+
/* THP can be referenced by any subpage */
85+
if (pfn - page_to_pfn(pvmw->page) >= hpage_nr_pages(pvmw->page))
86+
return false;
87+
7588
return true;
7689
}
7790

0 commit comments

Comments
 (0)