Skip to content

Commit c5a647d

Browse files
kiryltorvalds
authored andcommitted
thp: implement splitting pmd for huge zero page
We can't split huge zero page itself (and it's bug if we try), but we can split the pmd which points to it. On splitting the pmd we create a table with all ptes set to normal zero page. [[email protected]: fix build error] Signed-off-by: Kirill A. Shutemov <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Andi Kleen <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Mel Gorman <[email protected]> Cc: David Rientjes <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent e180377 commit c5a647d

File tree

1 file changed

+42
-1
lines changed

1 file changed

+42
-1
lines changed

mm/huge_memory.c

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1616,6 +1616,7 @@ int split_huge_page(struct page *page)
16161616
struct anon_vma *anon_vma;
16171617
int ret = 1;
16181618

1619+
BUG_ON(is_huge_zero_pfn(page_to_pfn(page)));
16191620
BUG_ON(!PageAnon(page));
16201621
anon_vma = page_lock_anon_vma(page);
16211622
if (!anon_vma)
@@ -2475,24 +2476,64 @@ static int khugepaged(void *none)
24752476
return 0;
24762477
}
24772478

2479+
static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
2480+
unsigned long haddr, pmd_t *pmd)
2481+
{
2482+
struct mm_struct *mm = vma->vm_mm;
2483+
pgtable_t pgtable;
2484+
pmd_t _pmd;
2485+
int i;
2486+
2487+
pmdp_clear_flush(vma, haddr, pmd);
2488+
/* leave pmd empty until pte is filled */
2489+
2490+
pgtable = pgtable_trans_huge_withdraw(mm);
2491+
pmd_populate(mm, &_pmd, pgtable);
2492+
2493+
for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
2494+
pte_t *pte, entry;
2495+
entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
2496+
entry = pte_mkspecial(entry);
2497+
pte = pte_offset_map(&_pmd, haddr);
2498+
VM_BUG_ON(!pte_none(*pte));
2499+
set_pte_at(mm, haddr, pte, entry);
2500+
pte_unmap(pte);
2501+
}
2502+
smp_wmb(); /* make pte visible before pmd */
2503+
pmd_populate(mm, pmd, pgtable);
2504+
}
2505+
24782506
void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
24792507
pmd_t *pmd)
24802508
{
24812509
struct page *page;
2482-
unsigned long haddr = address & HPAGE_PMD_MASK;
24832510
struct mm_struct *mm = vma->vm_mm;
2511+
unsigned long haddr = address & HPAGE_PMD_MASK;
2512+
unsigned long mmun_start; /* For mmu_notifiers */
2513+
unsigned long mmun_end; /* For mmu_notifiers */
24842514

24852515
BUG_ON(vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE);
24862516

2517+
mmun_start = haddr;
2518+
mmun_end = haddr + HPAGE_PMD_SIZE;
2519+
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
24872520
spin_lock(&mm->page_table_lock);
24882521
if (unlikely(!pmd_trans_huge(*pmd))) {
24892522
spin_unlock(&mm->page_table_lock);
2523+
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2524+
return;
2525+
}
2526+
if (is_huge_zero_pmd(*pmd)) {
2527+
__split_huge_zero_page_pmd(vma, haddr, pmd);
2528+
spin_unlock(&mm->page_table_lock);
2529+
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
24902530
return;
24912531
}
24922532
page = pmd_page(*pmd);
24932533
VM_BUG_ON(!page_count(page));
24942534
get_page(page);
24952535
spin_unlock(&mm->page_table_lock);
2536+
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
24962537

24972538
split_huge_page(page);
24982539

0 commit comments

Comments
 (0)