Skip to content

Commit 05e90bd

Browse files
xzpeterakpm00
authored andcommitted
mm/hugetlb: only drop uffd-wp special pte if required
As with shmem uffd-wp special ptes, only drop the uffd-wp special swap pte if unmapping an entire vma or synchronized such that faults can not race with the unmap operation. This requires passing zap_flags all the way to the lowest level hugetlb unmap routine: __unmap_hugepage_range. In general, unmap calls originated in hugetlbfs code will pass the ZAP_FLAG_DROP_MARKER flag as synchronization is in place to prevent faults. The exception is hole punch which will first unmap without any synchronization. Later when hole punch actually removes the page from the file, it will check to see if there was a subsequent fault and if so take the hugetlb fault mutex while unmapping again. This second unmap will pass in ZAP_FLAG_DROP_MARKER. The justification of "whether to apply ZAP_FLAG_DROP_MARKER flag when unmap a hugetlb range" is (IMHO): we should never reach a state when a page fault could errornously fault in a page-cache page that was wr-protected to be writable, even in an extremely short period. That could happen if e.g. we pass ZAP_FLAG_DROP_MARKER when hugetlbfs_punch_hole() calls hugetlb_vmdelete_list(), because if a page faults after that call and before remove_inode_hugepages() is executed, the page cache can be mapped writable again in the small racy window, that can cause unexpected data overwritten. [[email protected]: fix sparse warning] Link: https://lkml.kernel.org/r/[email protected] [[email protected]: move zap_flags_t from mm.h to mm_types.h to fix build issues] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Peter Xu <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Cc: Alistair Popple <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Axel Rasmussen <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: "Kirill A . Shutemov" <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Nadav Amit <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 60dfaad commit 05e90bd

File tree

6 files changed

+45
-20
lines changed

6 files changed

+45
-20
lines changed

fs/hugetlbfs/inode.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,8 @@ static void remove_huge_page(struct page *page)
405405
}
406406

407407
static void
408-
hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
408+
hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
409+
zap_flags_t zap_flags)
409410
{
410411
struct vm_area_struct *vma;
411412

@@ -439,7 +440,7 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
439440
}
440441

441442
unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
442-
NULL);
443+
NULL, zap_flags);
443444
}
444445
}
445446

@@ -517,7 +518,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
517518
mutex_lock(&hugetlb_fault_mutex_table[hash]);
518519
hugetlb_vmdelete_list(&mapping->i_mmap,
519520
index * pages_per_huge_page(h),
520-
(index + 1) * pages_per_huge_page(h));
521+
(index + 1) * pages_per_huge_page(h),
522+
ZAP_FLAG_DROP_MARKER);
521523
i_mmap_unlock_write(mapping);
522524
}
523525

@@ -583,7 +585,8 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
583585
i_mmap_lock_write(mapping);
584586
i_size_write(inode, offset);
585587
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
586-
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
588+
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
589+
ZAP_FLAG_DROP_MARKER);
587590
i_mmap_unlock_write(mapping);
588591
remove_inode_hugepages(inode, offset, LLONG_MAX);
589592
}
@@ -616,8 +619,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
616619
i_mmap_lock_write(mapping);
617620
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
618621
hugetlb_vmdelete_list(&mapping->i_mmap,
619-
hole_start >> PAGE_SHIFT,
620-
hole_end >> PAGE_SHIFT);
622+
hole_start >> PAGE_SHIFT,
623+
hole_end >> PAGE_SHIFT, 0);
621624
i_mmap_unlock_write(mapping);
622625
remove_inode_hugepages(inode, hole_start, hole_end);
623626
inode_unlock(inode);

include/linux/hugetlb.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,12 @@ long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
143143
unsigned long *, unsigned long *, long, unsigned int,
144144
int *);
145145
void unmap_hugepage_range(struct vm_area_struct *,
146-
unsigned long, unsigned long, struct page *);
146+
unsigned long, unsigned long, struct page *,
147+
zap_flags_t);
147148
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
148149
struct vm_area_struct *vma,
149150
unsigned long start, unsigned long end,
150-
struct page *ref_page);
151+
struct page *ref_page, zap_flags_t zap_flags);
151152
void hugetlb_report_meminfo(struct seq_file *);
152153
int hugetlb_report_node_meminfo(char *buf, int len, int nid);
153154
void hugetlb_show_meminfo(void);
@@ -406,7 +407,8 @@ static inline unsigned long hugetlb_change_protection(
406407

407408
static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
408409
struct vm_area_struct *vma, unsigned long start,
409-
unsigned long end, struct page *ref_page)
410+
unsigned long end, struct page *ref_page,
411+
zap_flags_t zap_flags)
410412
{
411413
BUG();
412414
}

include/linux/mm.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3428,8 +3428,6 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
34283428
}
34293429
#endif
34303430

3431-
typedef unsigned int __bitwise zap_flags_t;
3432-
34333431
/*
34343432
* Whether to drop the pte markers, for example, the uffd-wp information for
34353433
* file-backed memory. This should only be specified when we will completely

include/linux/mm_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,4 +863,6 @@ enum fault_flag {
863863
FAULT_FLAG_ORIG_PTE_VALID = 1 << 11,
864864
};
865865

866+
typedef unsigned int __bitwise zap_flags_t;
867+
866868
#endif /* _LINUX_MM_TYPES_H */

mm/hugetlb.c

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4973,7 +4973,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
49734973

49744974
static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
49754975
unsigned long start, unsigned long end,
4976-
struct page *ref_page)
4976+
struct page *ref_page, zap_flags_t zap_flags)
49774977
{
49784978
struct mm_struct *mm = vma->vm_mm;
49794979
unsigned long address;
@@ -5029,7 +5029,18 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
50295029
* unmapped and its refcount is dropped, so just clear pte here.
50305030
*/
50315031
if (unlikely(!pte_present(pte))) {
5032-
huge_pte_clear(mm, address, ptep, sz);
5032+
/*
5033+
* If the pte was wr-protected by uffd-wp in any of the
5034+
* swap forms, meanwhile the caller does not want to
5035+
* drop the uffd-wp bit in this zap, then replace the
5036+
* pte with a marker.
5037+
*/
5038+
if (pte_swp_uffd_wp_any(pte) &&
5039+
!(zap_flags & ZAP_FLAG_DROP_MARKER))
5040+
set_huge_pte_at(mm, address, ptep,
5041+
make_pte_marker(PTE_MARKER_UFFD_WP));
5042+
else
5043+
huge_pte_clear(mm, address, ptep, sz);
50335044
spin_unlock(ptl);
50345045
continue;
50355046
}
@@ -5057,7 +5068,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
50575068
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
50585069
if (huge_pte_dirty(pte))
50595070
set_page_dirty(page);
5060-
5071+
/* Leave a uffd-wp pte marker if needed */
5072+
if (huge_pte_uffd_wp(pte) &&
5073+
!(zap_flags & ZAP_FLAG_DROP_MARKER))
5074+
set_huge_pte_at(mm, address, ptep,
5075+
make_pte_marker(PTE_MARKER_UFFD_WP));
50615076
hugetlb_count_sub(pages_per_huge_page(h), mm);
50625077
page_remove_rmap(page, vma, true);
50635078

@@ -5091,9 +5106,10 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
50915106

50925107
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
50935108
struct vm_area_struct *vma, unsigned long start,
5094-
unsigned long end, struct page *ref_page)
5109+
unsigned long end, struct page *ref_page,
5110+
zap_flags_t zap_flags)
50955111
{
5096-
__unmap_hugepage_range(tlb, vma, start, end, ref_page);
5112+
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
50975113

50985114
/*
50995115
* Clear this flag so that x86's huge_pmd_share page_table_shareable
@@ -5109,12 +5125,13 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
51095125
}
51105126

51115127
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
5112-
unsigned long end, struct page *ref_page)
5128+
unsigned long end, struct page *ref_page,
5129+
zap_flags_t zap_flags)
51135130
{
51145131
struct mmu_gather tlb;
51155132

51165133
tlb_gather_mmu(&tlb, vma->vm_mm);
5117-
__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
5134+
__unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
51185135
tlb_finish_mmu(&tlb);
51195136
}
51205137

@@ -5169,7 +5186,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
51695186
*/
51705187
if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
51715188
unmap_hugepage_range(iter_vma, address,
5172-
address + huge_page_size(h), page);
5189+
address + huge_page_size(h), page, 0);
51735190
}
51745191
i_mmap_unlock_write(mapping);
51755192
}

mm/memory.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1675,8 +1675,11 @@ static void unmap_single_vma(struct mmu_gather *tlb,
16751675
* safe to do nothing in this case.
16761676
*/
16771677
if (vma->vm_file) {
1678+
zap_flags_t zap_flags = details ?
1679+
details->zap_flags : 0;
16781680
i_mmap_lock_write(vma->vm_file->f_mapping);
1679-
__unmap_hugepage_range_final(tlb, vma, start, end, NULL);
1681+
__unmap_hugepage_range_final(tlb, vma, start, end,
1682+
NULL, zap_flags);
16801683
i_mmap_unlock_write(vma->vm_file->f_mapping);
16811684
}
16821685
} else

0 commit comments

Comments
 (0)