Skip to content

Commit 5aaabe8

Browse files
Naoya Horiguchitorvalds
authored andcommitted
pagemap: avoid splitting thp when reading /proc/pid/pagemap
Thp split is not necessary if we explicitly check whether pmds are mapping thps or not. This patch introduces this check and adds code to generate pagemap entries for pmds mapping thps, which results in less performance impact of pagemap on thp. Signed-off-by: Naoya Horiguchi <[email protected]> Reviewed-by: Andi Kleen <[email protected]> Reviewed-by: KAMEZAWA Hiroyuki <[email protected]> Cc: David Rientjes <[email protected]> Cc: Wu Fengguang <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: KOSAKI Motohiro <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent b716ad9 commit 5aaabe8

File tree

1 file changed

+48
-4
lines changed

1 file changed

+48
-4
lines changed

fs/proc/task_mmu.c

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,9 @@ struct pagemapread {
608608
u64 *buffer;
609609
};
610610

611+
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
612+
#define PAGEMAP_WALK_MASK (PMD_MASK)
613+
611614
#define PM_ENTRY_BYTES sizeof(u64)
612615
#define PM_STATUS_BITS 3
613616
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
@@ -666,22 +669,65 @@ static u64 pte_to_pagemap_entry(pte_t pte)
666669
return pme;
667670
}
668671

672+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
673+
static u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
674+
{
675+
u64 pme = 0;
676+
/*
677+
* Currently pmd for thp is always present because thp can not be
678+
* swapped-out, migrated, or HWPOISONed (split in such cases instead.)
679+
* This if-check is just to prepare for future implementation.
680+
*/
681+
if (pmd_present(pmd))
682+
pme = PM_PFRAME(pmd_pfn(pmd) + offset)
683+
| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
684+
return pme;
685+
}
686+
#else
687+
static inline u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
688+
{
689+
return 0;
690+
}
691+
#endif
692+
669693
static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
670694
struct mm_walk *walk)
671695
{
672696
struct vm_area_struct *vma;
673697
struct pagemapread *pm = walk->private;
674698
pte_t *pte;
675699
int err = 0;
700+
u64 pfn = PM_NOT_PRESENT;
676701

677-
split_huge_page_pmd(walk->mm, pmd);
678702
if (pmd_trans_unstable(pmd))
679703
return 0;
680704

681705
/* find the first VMA at or above 'addr' */
682706
vma = find_vma(walk->mm, addr);
707+
spin_lock(&walk->mm->page_table_lock);
708+
if (pmd_trans_huge(*pmd)) {
709+
if (pmd_trans_splitting(*pmd)) {
710+
spin_unlock(&walk->mm->page_table_lock);
711+
wait_split_huge_page(vma->anon_vma, pmd);
712+
} else {
713+
for (; addr != end; addr += PAGE_SIZE) {
714+
unsigned long offset;
715+
716+
offset = (addr & ~PAGEMAP_WALK_MASK) >>
717+
PAGE_SHIFT;
718+
pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
719+
err = add_to_pagemap(addr, pfn, pm);
720+
if (err)
721+
break;
722+
}
723+
spin_unlock(&walk->mm->page_table_lock);
724+
return err;
725+
}
726+
} else {
727+
spin_unlock(&walk->mm->page_table_lock);
728+
}
729+
683730
for (; addr != end; addr += PAGE_SIZE) {
684-
u64 pfn = PM_NOT_PRESENT;
685731

686732
/* check to see if we've left 'vma' behind
687733
* and need a new, higher one */
@@ -764,8 +810,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
764810
* determine which areas of memory are actually mapped and llseek to
765811
* skip over unmapped regions.
766812
*/
767-
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
768-
#define PAGEMAP_WALK_MASK (PMD_MASK)
769813
static ssize_t pagemap_read(struct file *file, char __user *buf,
770814
size_t count, loff_t *ppos)
771815
{

0 commit comments

Comments
 (0)