Skip to content

Commit 0fe6e20

Browse files
Naoya HoriguchiAndi Kleen
authored andcommitted
hugetlb, rmap: add reverse mapping for hugepage
This patch adds reverse mapping feature for hugepage by introducing mapcount for shared/private-mapped hugepage and anon_vma for private-mapped hugepage. While hugepage is not currently swappable, reverse mapping can be useful for memory error handler. Without this patch, memory error handler cannot identify processes using the bad hugepage nor unmap it from them. That is: - for shared hugepage: we can collect processes using a hugepage through pagecache, but can not unmap the hugepage because of the lack of mapcount. - for privately mapped hugepage: we can neither collect processes nor unmap the hugepage. This patch solves these problems. This patch include the bug fix given by commit 23be746, so reverts it. Dependency: "hugetlb: move definition of is_vm_hugetlb_page() to hugepage_inline.h" ChangeLog since May 24. - create hugetlb_inline.h and move is_vm_hugetlb_index() in it. - move functions setting up anon_vma for hugepage into mm/rmap.c. ChangeLog since May 13. - rebased to 2.6.34 - fix logic error (in case that private mapping and shared mapping coexist) - move is_vm_hugetlb_page() into include/linux/mm.h to use this function from linear_page_index() - define and use linear_hugepage_index() instead of compound_order() - use page_move_anon_rmap() in hugetlb_cow() - copy exclusive switch of __set_page_anon_rmap() into hugepage counterpart. - revert commit 24be7468 completely Signed-off-by: Naoya Horiguchi <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Larry Woodman <[email protected]> Cc: Lee Schermerhorn <[email protected]> Acked-by: Fengguang Wu <[email protected]> Acked-by: Mel Gorman <[email protected]> Signed-off-by: Andi Kleen <[email protected]>
1 parent 8edf344 commit 0fe6e20

File tree

6 files changed

+114
-12
lines changed

6 files changed

+114
-12
lines changed

include/linux/hugetlb.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
9999
#define is_hugepage_only_range(mm, addr, len) 0
100100
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
101101
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
102+
#define huge_pte_offset(mm, address) 0
102103

103104
#define hugetlb_change_protection(vma, address, end, newprot)
104105

include/linux/pagemap.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,16 @@ static inline loff_t page_offset(struct page *page)
282282
return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
283283
}
284284

285+
extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
286+
unsigned long address);
287+
285288
static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
286289
unsigned long address)
287290
{
288-
pgoff_t pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
291+
pgoff_t pgoff;
292+
if (unlikely(is_vm_hugetlb_page(vma)))
293+
return linear_hugepage_index(vma, address);
294+
pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
289295
pgoff += vma->vm_pgoff;
290296
return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
291297
}

include/linux/poison.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,6 @@
4848
#define POISON_FREE 0x6b /* for use-after-free poisoning */
4949
#define POISON_END 0xa5 /* end-byte of poisoning */
5050

51-
/********** mm/hugetlb.c **********/
52-
/*
53-
* Private mappings of hugetlb pages use this poisoned value for
54-
* page->mapping. The core VM should not be doing anything with this mapping
55-
* but futex requires the existence of some page->mapping value even though it
56-
* is unused if PAGE_MAPPING_ANON is set.
57-
*/
58-
#define HUGETLB_POISON ((void *)(0x00300300 + POISON_POINTER_DELTA + PAGE_MAPPING_ANON))
59-
6051
/********** arch/$ARCH/mm/init.c **********/
6152
#define POISON_FREE_INITMEM 0xcc
6253

include/linux/rmap.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,11 @@ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned lon
140140
void page_add_file_rmap(struct page *);
141141
void page_remove_rmap(struct page *);
142142

143+
void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
144+
unsigned long);
145+
void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
146+
unsigned long);
147+
143148
static inline void page_dup_rmap(struct page *page)
144149
{
145150
atomic_inc(&page->_mapcount);

mm/hugetlb.c

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/bootmem.h>
1919
#include <linux/sysfs.h>
2020
#include <linux/slab.h>
21+
#include <linux/rmap.h>
2122

2223
#include <asm/page.h>
2324
#include <asm/pgtable.h>
@@ -220,6 +221,12 @@ static pgoff_t vma_hugecache_offset(struct hstate *h,
220221
(vma->vm_pgoff >> huge_page_order(h));
221222
}
222223

224+
pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
225+
unsigned long address)
226+
{
227+
return vma_hugecache_offset(hstate_vma(vma), vma, address);
228+
}
229+
223230
/*
224231
* Return the size of the pages allocated when backing a VMA. In the majority
225232
* cases this will be same size as used by the page table entries.
@@ -552,6 +559,7 @@ static void free_huge_page(struct page *page)
552559
set_page_private(page, 0);
553560
page->mapping = NULL;
554561
BUG_ON(page_count(page));
562+
BUG_ON(page_mapcount(page));
555563
INIT_LIST_HEAD(&page->lru);
556564

557565
spin_lock(&hugetlb_lock);
@@ -2129,6 +2137,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
21292137
entry = huge_ptep_get(src_pte);
21302138
ptepage = pte_page(entry);
21312139
get_page(ptepage);
2140+
page_dup_rmap(ptepage);
21322141
set_huge_pte_at(dst, addr, dst_pte, entry);
21332142
}
21342143
spin_unlock(&src->page_table_lock);
@@ -2207,6 +2216,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
22072216
flush_tlb_range(vma, start, end);
22082217
mmu_notifier_invalidate_range_end(mm, start, end);
22092218
list_for_each_entry_safe(page, tmp, &page_list, lru) {
2219+
page_remove_rmap(page);
22102220
list_del(&page->lru);
22112221
put_page(page);
22122222
}
@@ -2272,6 +2282,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
22722282
return 1;
22732283
}
22742284

2285+
/*
2286+
* Hugetlb_cow() should be called with page lock of the original hugepage held.
2287+
*/
22752288
static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
22762289
unsigned long address, pte_t *ptep, pte_t pte,
22772290
struct page *pagecache_page)
@@ -2286,8 +2299,11 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
22862299
retry_avoidcopy:
22872300
/* If no-one else is actually using this page, avoid the copy
22882301
* and just make the page writable */
2289-
avoidcopy = (page_count(old_page) == 1);
2302+
avoidcopy = (page_mapcount(old_page) == 1);
22902303
if (avoidcopy) {
2304+
if (!trylock_page(old_page))
2305+
if (PageAnon(old_page))
2306+
page_move_anon_rmap(old_page, vma, address);
22912307
set_huge_ptep_writable(vma, address, ptep);
22922308
return 0;
22932309
}
@@ -2338,6 +2354,13 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
23382354
return -PTR_ERR(new_page);
23392355
}
23402356

2357+
/*
2358+
* When the original hugepage is shared one, it does not have
2359+
* anon_vma prepared.
2360+
*/
2361+
if (unlikely(anon_vma_prepare(vma)))
2362+
return VM_FAULT_OOM;
2363+
23412364
copy_huge_page(new_page, old_page, address, vma);
23422365
__SetPageUptodate(new_page);
23432366

@@ -2352,6 +2375,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
23522375
huge_ptep_clear_flush(vma, address, ptep);
23532376
set_huge_pte_at(mm, address, ptep,
23542377
make_huge_pte(vma, new_page, 1));
2378+
page_remove_rmap(old_page);
2379+
hugepage_add_anon_rmap(new_page, vma, address);
23552380
/* Make the old page be freed below */
23562381
new_page = old_page;
23572382
}
@@ -2452,10 +2477,17 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
24522477
spin_lock(&inode->i_lock);
24532478
inode->i_blocks += blocks_per_huge_page(h);
24542479
spin_unlock(&inode->i_lock);
2480+
page_dup_rmap(page);
24552481
} else {
24562482
lock_page(page);
2457-
page->mapping = HUGETLB_POISON;
2483+
if (unlikely(anon_vma_prepare(vma))) {
2484+
ret = VM_FAULT_OOM;
2485+
goto backout_unlocked;
2486+
}
2487+
hugepage_add_new_anon_rmap(page, vma, address);
24582488
}
2489+
} else {
2490+
page_dup_rmap(page);
24592491
}
24602492

24612493
/*
@@ -2507,6 +2539,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
25072539
pte_t *ptep;
25082540
pte_t entry;
25092541
int ret;
2542+
struct page *page = NULL;
25102543
struct page *pagecache_page = NULL;
25112544
static DEFINE_MUTEX(hugetlb_instantiation_mutex);
25122545
struct hstate *h = hstate_vma(vma);
@@ -2548,6 +2581,11 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
25482581
vma, address);
25492582
}
25502583

2584+
if (!pagecache_page) {
2585+
page = pte_page(entry);
2586+
lock_page(page);
2587+
}
2588+
25512589
spin_lock(&mm->page_table_lock);
25522590
/* Check for a racing update before calling hugetlb_cow */
25532591
if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
@@ -2573,6 +2611,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
25732611
if (pagecache_page) {
25742612
unlock_page(pagecache_page);
25752613
put_page(pagecache_page);
2614+
} else {
2615+
unlock_page(page);
25762616
}
25772617

25782618
out_mutex:

mm/rmap.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
#include <linux/memcontrol.h>
5757
#include <linux/mmu_notifier.h>
5858
#include <linux/migrate.h>
59+
#include <linux/hugetlb.h>
5960

6061
#include <asm/tlbflush.h>
6162

@@ -326,6 +327,8 @@ vma_address(struct page *page, struct vm_area_struct *vma)
326327
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
327328
unsigned long address;
328329

330+
if (unlikely(is_vm_hugetlb_page(vma)))
331+
pgoff = page->index << huge_page_order(page_hstate(page));
329332
address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
330333
if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
331334
/* page should be within @vma mapping range */
@@ -369,6 +372,12 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
369372
pte_t *pte;
370373
spinlock_t *ptl;
371374

375+
if (unlikely(PageHuge(page))) {
376+
pte = huge_pte_offset(mm, address);
377+
ptl = &mm->page_table_lock;
378+
goto check;
379+
}
380+
372381
pgd = pgd_offset(mm, address);
373382
if (!pgd_present(*pgd))
374383
return NULL;
@@ -389,6 +398,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
389398
}
390399

391400
ptl = pte_lockptr(mm, pmd);
401+
check:
392402
spin_lock(ptl);
393403
if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
394404
*ptlp = ptl;
@@ -873,6 +883,12 @@ void page_remove_rmap(struct page *page)
873883
page_clear_dirty(page);
874884
set_page_dirty(page);
875885
}
886+
/*
887+
* Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
888+
* and not charged by memcg for now.
889+
*/
890+
if (unlikely(PageHuge(page)))
891+
return;
876892
if (PageAnon(page)) {
877893
mem_cgroup_uncharge_page(page);
878894
__dec_zone_page_state(page, NR_ANON_PAGES);
@@ -1445,3 +1461,46 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
14451461
return rmap_walk_file(page, rmap_one, arg);
14461462
}
14471463
#endif /* CONFIG_MIGRATION */
1464+
1465+
#ifdef CONFIG_HUGETLBFS
1466+
/*
1467+
* The following three functions are for anonymous (private mapped) hugepages.
1468+
* Unlike common anonymous pages, anonymous hugepages have no accounting code
1469+
* and no lru code, because we handle hugepages differently from common pages.
1470+
*/
1471+
static void __hugepage_set_anon_rmap(struct page *page,
1472+
struct vm_area_struct *vma, unsigned long address, int exclusive)
1473+
{
1474+
struct anon_vma *anon_vma = vma->anon_vma;
1475+
BUG_ON(!anon_vma);
1476+
if (!exclusive) {
1477+
struct anon_vma_chain *avc;
1478+
avc = list_entry(vma->anon_vma_chain.prev,
1479+
struct anon_vma_chain, same_vma);
1480+
anon_vma = avc->anon_vma;
1481+
}
1482+
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1483+
page->mapping = (struct address_space *) anon_vma;
1484+
page->index = linear_page_index(vma, address);
1485+
}
1486+
1487+
void hugepage_add_anon_rmap(struct page *page,
1488+
struct vm_area_struct *vma, unsigned long address)
1489+
{
1490+
struct anon_vma *anon_vma = vma->anon_vma;
1491+
int first;
1492+
BUG_ON(!anon_vma);
1493+
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1494+
first = atomic_inc_and_test(&page->_mapcount);
1495+
if (first)
1496+
__hugepage_set_anon_rmap(page, vma, address, 0);
1497+
}
1498+
1499+
void hugepage_add_new_anon_rmap(struct page *page,
1500+
struct vm_area_struct *vma, unsigned long address)
1501+
{
1502+
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1503+
atomic_set(&page->_mapcount, 0);
1504+
__hugepage_set_anon_rmap(page, vma, address, 1);
1505+
}
1506+
#endif /* CONFIG_HUGETLBFS */

0 commit comments

Comments
 (0)