Skip to content

Commit 5c7fb56

Browse files
djbwtorvalds
authored andcommitted
mm, dax: dax-pmd vs thp-pmd vs hugetlbfs-pmd
A dax-huge-page mapping while it uses some thp helpers is ultimately not a transparent huge page. The distinction is especially important in the get_user_pages() path. pmd_devmap() is used to distinguish dax-pmds from pmd_huge() and pmd_trans_huge() which have slightly different semantics. Explicitly mark the pmd_trans_huge() helpers that dax needs by adding pmd_devmap() checks. [[email protected]: fix regression in handling mlocked pages in __split_huge_pmd()] Signed-off-by: Dan Williams <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Matthew Wilcox <[email protected]> Signed-off-by: Kirill A. Shutemov <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 5c2c258 commit 5c7fb56

File tree

7 files changed

+47
-27
lines changed

7 files changed

+47
-27
lines changed

arch/x86/include/asm/pgtable.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,20 @@ static inline int pmd_large(pmd_t pte)
164164
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
165165
static inline int pmd_trans_huge(pmd_t pmd)
166166
{
167-
return pmd_val(pmd) & _PAGE_PSE;
167+
return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
168168
}
169169

170170
static inline int has_transparent_hugepage(void)
171171
{
172172
return cpu_has_pse;
173173
}
174+
175+
#ifdef __HAVE_ARCH_PTE_DEVMAP
176+
static inline int pmd_devmap(pmd_t pmd)
177+
{
178+
return !!(pmd_val(pmd) & _PAGE_DEVMAP);
179+
}
180+
#endif
174181
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
175182

176183
static inline pte_t pte_set_flags(pte_t pte, pteval_t set)

include/linux/huge_mm.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
104104
#define split_huge_pmd(__vma, __pmd, __address) \
105105
do { \
106106
pmd_t *____pmd = (__pmd); \
107-
if (pmd_trans_huge(*____pmd)) \
107+
if (pmd_trans_huge(*____pmd) \
108+
|| pmd_devmap(*____pmd)) \
108109
__split_huge_pmd(__vma, __pmd, __address); \
109110
} while (0)
110111

@@ -124,7 +125,7 @@ static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
124125
spinlock_t **ptl)
125126
{
126127
VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
127-
if (pmd_trans_huge(*pmd))
128+
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
128129
return __pmd_trans_huge_lock(pmd, vma, ptl);
129130
else
130131
return false;

include/linux/mm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,13 @@ struct inode;
329329
#define page_private(page) ((page)->private)
330330
#define set_page_private(page, v) ((page)->private = (v))
331331

332+
#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
333+
static inline int pmd_devmap(pmd_t pmd)
334+
{
335+
return 0;
336+
}
337+
#endif
338+
332339
/*
333340
* FIXME: take this include out, include page-flags.h in
334341
* files which need it (119 of them)

mm/huge_memory.c

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
995995

996996
ret = -EAGAIN;
997997
pmd = *src_pmd;
998-
if (unlikely(!pmd_trans_huge(pmd))) {
998+
if (unlikely(!pmd_trans_huge(pmd) && !pmd_devmap(pmd))) {
999999
pte_free(dst_mm, pgtable);
10001000
goto out_unlock;
10011001
}
@@ -1018,17 +1018,20 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
10181018
goto out_unlock;
10191019
}
10201020

1021-
src_page = pmd_page(pmd);
1022-
VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
1023-
get_page(src_page);
1024-
page_dup_rmap(src_page, true);
1025-
add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
1021+
if (pmd_trans_huge(pmd)) {
1022+
/* thp accounting separate from pmd_devmap accounting */
1023+
src_page = pmd_page(pmd);
1024+
VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
1025+
get_page(src_page);
1026+
page_dup_rmap(src_page, true);
1027+
add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
1028+
atomic_long_inc(&dst_mm->nr_ptes);
1029+
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
1030+
}
10261031

10271032
pmdp_set_wrprotect(src_mm, addr, src_pmd);
10281033
pmd = pmd_mkold(pmd_wrprotect(pmd));
1029-
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
10301034
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
1031-
atomic_long_inc(&dst_mm->nr_ptes);
10321035

10331036
ret = 0;
10341037
out_unlock:
@@ -1716,7 +1719,7 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
17161719
spinlock_t **ptl)
17171720
{
17181721
*ptl = pmd_lock(vma->vm_mm, pmd);
1719-
if (likely(pmd_trans_huge(*pmd)))
1722+
if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
17201723
return true;
17211724
spin_unlock(*ptl);
17221725
return false;
@@ -2788,7 +2791,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
27882791
VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
27892792
VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
27902793
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
2791-
VM_BUG_ON(!pmd_trans_huge(*pmd));
2794+
VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd));
27922795

27932796
count_vm_event(THP_SPLIT_PMD);
27942797

@@ -2901,14 +2904,15 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
29012904

29022905
mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
29032906
ptl = pmd_lock(mm, pmd);
2904-
if (unlikely(!pmd_trans_huge(*pmd)))
2907+
if (pmd_trans_huge(*pmd)) {
2908+
page = pmd_page(*pmd);
2909+
if (PageMlocked(page))
2910+
get_page(page);
2911+
else
2912+
page = NULL;
2913+
} else if (!pmd_devmap(*pmd))
29052914
goto out;
2906-
page = pmd_page(*pmd);
29072915
__split_huge_pmd_locked(vma, pmd, haddr, false);
2908-
if (PageMlocked(page))
2909-
get_page(page);
2910-
else
2911-
page = NULL;
29122916
out:
29132917
spin_unlock(ptl);
29142918
mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
@@ -2938,7 +2942,7 @@ static void split_huge_pmd_address(struct vm_area_struct *vma,
29382942
return;
29392943

29402944
pmd = pmd_offset(pud, address);
2941-
if (!pmd_present(*pmd) || !pmd_trans_huge(*pmd))
2945+
if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)))
29422946
return;
29432947
/*
29442948
* Caller holds the mmap_sem write mode, so a huge pmd cannot

mm/memory.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
950950
src_pmd = pmd_offset(src_pud, addr);
951951
do {
952952
next = pmd_addr_end(addr, end);
953-
if (pmd_trans_huge(*src_pmd)) {
953+
if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) {
954954
int err;
955955
VM_BUG_ON(next-addr != HPAGE_PMD_SIZE);
956956
err = copy_huge_pmd(dst_mm, src_mm,
@@ -1177,7 +1177,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
11771177
pmd = pmd_offset(pud, addr);
11781178
do {
11791179
next = pmd_addr_end(addr, end);
1180-
if (pmd_trans_huge(*pmd)) {
1180+
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
11811181
if (next - addr != HPAGE_PMD_SIZE) {
11821182
#ifdef CONFIG_DEBUG_VM
11831183
if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
@@ -3375,7 +3375,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
33753375
int ret;
33763376

33773377
barrier();
3378-
if (pmd_trans_huge(orig_pmd)) {
3378+
if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
33793379
unsigned int dirty = flags & FAULT_FLAG_WRITE;
33803380

33813381
if (pmd_protnone(orig_pmd))
@@ -3404,7 +3404,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
34043404
unlikely(__pte_alloc(mm, vma, pmd, address)))
34053405
return VM_FAULT_OOM;
34063406
/* if an huge pmd materialized from under us just retry later */
3407-
if (unlikely(pmd_trans_huge(*pmd)))
3407+
if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
34083408
return 0;
34093409
/*
34103410
* A regular pmd is established and it can't morph into a huge pmd

mm/mprotect.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
149149
unsigned long this_pages;
150150

151151
next = pmd_addr_end(addr, end);
152-
if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd))
152+
if (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
153+
&& pmd_none_or_clear_bad(pmd))
153154
continue;
154155

155156
/* invoke the mmu notifier if the pmd is populated */
@@ -158,7 +159,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
158159
mmu_notifier_invalidate_range_start(mm, mni_start, end);
159160
}
160161

161-
if (pmd_trans_huge(*pmd)) {
162+
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
162163
if (next - addr != HPAGE_PMD_SIZE)
163164
split_huge_pmd(vma, pmd, addr);
164165
else {

mm/pgtable-generic.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
132132
{
133133
pmd_t pmd;
134134
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
135-
VM_BUG_ON(!pmd_trans_huge(*pmdp));
135+
VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
136136
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
137137
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
138138
return pmd;

0 commit comments

Comments
 (0)