Skip to content

Commit 025c5b2

Browse files
Naoya Horiguchitorvalds
authored andcommitted
thp: optimize away unnecessary page table locking
Currently when we check if we can handle thp as it is or we need to split it into regular sized pages, we hold page table lock prior to check whether a given pmd is mapping thp or not. Because of this, when it's not "huge pmd" we suffer from unnecessary lock/unlock overhead. To remove it, this patch introduces a optimized check function and replace several similar logics with it. [[email protected]: checkpatch fixes] Signed-off-by: Naoya Horiguchi <[email protected]> Cc: David Rientjes <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Wu Fengguang <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: KOSAKI Motohiro <[email protected]> Reviewed-by: KAMEZAWA Hiroyuki <[email protected]> Cc: Jiri Slaby <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 5aaabe8 commit 025c5b2

File tree

3 files changed

+101
-114
lines changed

3 files changed

+101
-114
lines changed

fs/proc/task_mmu.c

Lines changed: 25 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -394,20 +394,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
394394
pte_t *pte;
395395
spinlock_t *ptl;
396396

397-
spin_lock(&walk->mm->page_table_lock);
398-
if (pmd_trans_huge(*pmd)) {
399-
if (pmd_trans_splitting(*pmd)) {
400-
spin_unlock(&walk->mm->page_table_lock);
401-
wait_split_huge_page(vma->anon_vma, pmd);
402-
} else {
403-
smaps_pte_entry(*(pte_t *)pmd, addr,
404-
HPAGE_PMD_SIZE, walk);
405-
spin_unlock(&walk->mm->page_table_lock);
406-
mss->anonymous_thp += HPAGE_PMD_SIZE;
407-
return 0;
408-
}
409-
} else {
397+
if (pmd_trans_huge_lock(pmd, vma) == 1) {
398+
smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
410399
spin_unlock(&walk->mm->page_table_lock);
400+
mss->anonymous_thp += HPAGE_PMD_SIZE;
401+
return 0;
411402
}
412403

413404
if (pmd_trans_unstable(pmd))
@@ -705,26 +696,19 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
705696
/* find the first VMA at or above 'addr' */
706697
vma = find_vma(walk->mm, addr);
707698
spin_lock(&walk->mm->page_table_lock);
708-
if (pmd_trans_huge(*pmd)) {
709-
if (pmd_trans_splitting(*pmd)) {
710-
spin_unlock(&walk->mm->page_table_lock);
711-
wait_split_huge_page(vma->anon_vma, pmd);
712-
} else {
713-
for (; addr != end; addr += PAGE_SIZE) {
714-
unsigned long offset;
715-
716-
offset = (addr & ~PAGEMAP_WALK_MASK) >>
717-
PAGE_SHIFT;
718-
pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
719-
err = add_to_pagemap(addr, pfn, pm);
720-
if (err)
721-
break;
722-
}
723-
spin_unlock(&walk->mm->page_table_lock);
724-
return err;
699+
if (pmd_trans_huge_lock(pmd, vma) == 1) {
700+
for (; addr != end; addr += PAGE_SIZE) {
701+
unsigned long offset;
702+
703+
offset = (addr & ~PAGEMAP_WALK_MASK) >>
704+
PAGE_SHIFT;
705+
pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
706+
err = add_to_pagemap(addr, pfn, pm);
707+
if (err)
708+
break;
725709
}
726-
} else {
727710
spin_unlock(&walk->mm->page_table_lock);
711+
return err;
728712
}
729713

730714
for (; addr != end; addr += PAGE_SIZE) {
@@ -992,24 +976,17 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
992976
pte_t *pte;
993977

994978
md = walk->private;
995-
spin_lock(&walk->mm->page_table_lock);
996-
if (pmd_trans_huge(*pmd)) {
997-
if (pmd_trans_splitting(*pmd)) {
998-
spin_unlock(&walk->mm->page_table_lock);
999-
wait_split_huge_page(md->vma->anon_vma, pmd);
1000-
} else {
1001-
pte_t huge_pte = *(pte_t *)pmd;
1002-
struct page *page;
1003-
1004-
page = can_gather_numa_stats(huge_pte, md->vma, addr);
1005-
if (page)
1006-
gather_stats(page, md, pte_dirty(huge_pte),
1007-
HPAGE_PMD_SIZE/PAGE_SIZE);
1008-
spin_unlock(&walk->mm->page_table_lock);
1009-
return 0;
1010-
}
1011-
} else {
979+
980+
if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
981+
pte_t huge_pte = *(pte_t *)pmd;
982+
struct page *page;
983+
984+
page = can_gather_numa_stats(huge_pte, md->vma, addr);
985+
if (page)
986+
gather_stats(page, md, pte_dirty(huge_pte),
987+
HPAGE_PMD_SIZE/PAGE_SIZE);
1012988
spin_unlock(&walk->mm->page_table_lock);
989+
return 0;
1013990
}
1014991

1015992
if (pmd_trans_unstable(pmd))

include/linux/huge_mm.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,18 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
113113
unsigned long start,
114114
unsigned long end,
115115
long adjust_next);
116+
extern int __pmd_trans_huge_lock(pmd_t *pmd,
117+
struct vm_area_struct *vma);
118+
/* mmap_sem must be held on entry */
119+
static inline int pmd_trans_huge_lock(pmd_t *pmd,
120+
struct vm_area_struct *vma)
121+
{
122+
VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
123+
if (pmd_trans_huge(*pmd))
124+
return __pmd_trans_huge_lock(pmd, vma);
125+
else
126+
return 0;
127+
}
116128
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
117129
unsigned long start,
118130
unsigned long end,
@@ -176,6 +188,11 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
176188
long adjust_next)
177189
{
178190
}
191+
static inline int pmd_trans_huge_lock(pmd_t *pmd,
192+
struct vm_area_struct *vma)
193+
{
194+
return 0;
195+
}
179196
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
180197

181198
#endif /* _LINUX_HUGE_MM_H */

mm/huge_memory.c

Lines changed: 59 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,32 +1031,23 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
10311031
{
10321032
int ret = 0;
10331033

1034-
spin_lock(&tlb->mm->page_table_lock);
1035-
if (likely(pmd_trans_huge(*pmd))) {
1036-
if (unlikely(pmd_trans_splitting(*pmd))) {
1037-
spin_unlock(&tlb->mm->page_table_lock);
1038-
wait_split_huge_page(vma->anon_vma,
1039-
pmd);
1040-
} else {
1041-
struct page *page;
1042-
pgtable_t pgtable;
1043-
pgtable = get_pmd_huge_pte(tlb->mm);
1044-
page = pmd_page(*pmd);
1045-
pmd_clear(pmd);
1046-
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
1047-
page_remove_rmap(page);
1048-
VM_BUG_ON(page_mapcount(page) < 0);
1049-
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
1050-
VM_BUG_ON(!PageHead(page));
1051-
tlb->mm->nr_ptes--;
1052-
spin_unlock(&tlb->mm->page_table_lock);
1053-
tlb_remove_page(tlb, page);
1054-
pte_free(tlb->mm, pgtable);
1055-
ret = 1;
1056-
}
1057-
} else
1034+
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
1035+
struct page *page;
1036+
pgtable_t pgtable;
1037+
pgtable = get_pmd_huge_pte(tlb->mm);
1038+
page = pmd_page(*pmd);
1039+
pmd_clear(pmd);
1040+
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
1041+
page_remove_rmap(page);
1042+
VM_BUG_ON(page_mapcount(page) < 0);
1043+
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
1044+
VM_BUG_ON(!PageHead(page));
1045+
tlb->mm->nr_ptes--;
10581046
spin_unlock(&tlb->mm->page_table_lock);
1059-
1047+
tlb_remove_page(tlb, page);
1048+
pte_free(tlb->mm, pgtable);
1049+
ret = 1;
1050+
}
10601051
return ret;
10611052
}
10621053

@@ -1066,21 +1057,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
10661057
{
10671058
int ret = 0;
10681059

1069-
spin_lock(&vma->vm_mm->page_table_lock);
1070-
if (likely(pmd_trans_huge(*pmd))) {
1071-
ret = !pmd_trans_splitting(*pmd);
1072-
spin_unlock(&vma->vm_mm->page_table_lock);
1073-
if (unlikely(!ret))
1074-
wait_split_huge_page(vma->anon_vma, pmd);
1075-
else {
1076-
/*
1077-
* All logical pages in the range are present
1078-
* if backed by a huge page.
1079-
*/
1080-
memset(vec, 1, (end - addr) >> PAGE_SHIFT);
1081-
}
1082-
} else
1060+
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
1061+
/*
1062+
* All logical pages in the range are present
1063+
* if backed by a huge page.
1064+
*/
10831065
spin_unlock(&vma->vm_mm->page_table_lock);
1066+
memset(vec, 1, (end - addr) >> PAGE_SHIFT);
1067+
ret = 1;
1068+
}
10841069

10851070
return ret;
10861071
}
@@ -1110,20 +1095,11 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
11101095
goto out;
11111096
}
11121097

1113-
spin_lock(&mm->page_table_lock);
1114-
if (likely(pmd_trans_huge(*old_pmd))) {
1115-
if (pmd_trans_splitting(*old_pmd)) {
1116-
spin_unlock(&mm->page_table_lock);
1117-
wait_split_huge_page(vma->anon_vma, old_pmd);
1118-
ret = -1;
1119-
} else {
1120-
pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
1121-
VM_BUG_ON(!pmd_none(*new_pmd));
1122-
set_pmd_at(mm, new_addr, new_pmd, pmd);
1123-
spin_unlock(&mm->page_table_lock);
1124-
ret = 1;
1125-
}
1126-
} else {
1098+
ret = __pmd_trans_huge_lock(old_pmd, vma);
1099+
if (ret == 1) {
1100+
pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
1101+
VM_BUG_ON(!pmd_none(*new_pmd));
1102+
set_pmd_at(mm, new_addr, new_pmd, pmd);
11271103
spin_unlock(&mm->page_table_lock);
11281104
}
11291105
out:
@@ -1136,24 +1112,41 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
11361112
struct mm_struct *mm = vma->vm_mm;
11371113
int ret = 0;
11381114

1139-
spin_lock(&mm->page_table_lock);
1115+
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
1116+
pmd_t entry;
1117+
entry = pmdp_get_and_clear(mm, addr, pmd);
1118+
entry = pmd_modify(entry, newprot);
1119+
set_pmd_at(mm, addr, pmd, entry);
1120+
spin_unlock(&vma->vm_mm->page_table_lock);
1121+
ret = 1;
1122+
}
1123+
1124+
return ret;
1125+
}
1126+
1127+
/*
1128+
* Returns 1 if a given pmd maps a stable (not under splitting) thp.
1129+
* Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
1130+
*
1131+
* Note that if it returns 1, this routine returns without unlocking page
1132+
* table locks. So callers must unlock them.
1133+
*/
1134+
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
1135+
{
1136+
spin_lock(&vma->vm_mm->page_table_lock);
11401137
if (likely(pmd_trans_huge(*pmd))) {
11411138
if (unlikely(pmd_trans_splitting(*pmd))) {
1142-
spin_unlock(&mm->page_table_lock);
1139+
spin_unlock(&vma->vm_mm->page_table_lock);
11431140
wait_split_huge_page(vma->anon_vma, pmd);
1141+
return -1;
11441142
} else {
1145-
pmd_t entry;
1146-
1147-
entry = pmdp_get_and_clear(mm, addr, pmd);
1148-
entry = pmd_modify(entry, newprot);
1149-
set_pmd_at(mm, addr, pmd, entry);
1150-
spin_unlock(&vma->vm_mm->page_table_lock);
1151-
ret = 1;
1143+
/* Thp mapped by 'pmd' is stable, so we can
1144+
* handle it as it is. */
1145+
return 1;
11521146
}
1153-
} else
1154-
spin_unlock(&vma->vm_mm->page_table_lock);
1155-
1156-
return ret;
1147+
}
1148+
spin_unlock(&vma->vm_mm->page_table_lock);
1149+
return 0;
11571150
}
11581151

11591152
pmd_t *page_check_address_pmd(struct page *page,

0 commit comments

Comments
 (0)