Skip to content

Commit f729c8c

Browse files
Ross Zwislertorvalds
authored andcommitted
dax: wrprotect pmd_t in dax_mapping_entry_mkclean
Currently dax_mapping_entry_mkclean() fails to clean and write protect the pmd_t of a DAX PMD entry during an *sync operation. This can result in data loss in the following sequence: 1) mmap write to DAX PMD, dirtying PMD radix tree entry and making the pmd_t dirty and writeable 2) fsync, flushing out PMD data and cleaning the radix tree entry. We currently fail to mark the pmd_t as clean and write protected. 3) more mmap writes to the PMD. These don't cause any page faults since the pmd_t is dirty and writeable. The radix tree entry remains clean. 4) fsync, which fails to flush the dirty PMD data because the radix tree entry was clean. 5) crash - dirty data that should have been fsync'd as part of 4) could still have been in the processor cache, and is lost. Fix this by marking the pmd_t clean and write protected in dax_mapping_entry_mkclean(), which is called as part of the fsync operation 2). This will cause the writes in step 3) above to generate page faults where we'll re-dirty the PMD radix tree entry, resulting in flushes in the fsync that happens in step 4). Fixes: 4b4bb46 ("dax: clear dirty entry tags on cache flush") Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ross Zwisler <[email protected]> Reviewed-by: Jan Kara <[email protected]> Cc: Alexander Viro <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Dan Williams <[email protected]> Cc: Dave Chinner <[email protected]> Cc: Jan Kara <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Dave Hansen <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 0979639 commit f729c8c

File tree

3 files changed

+38
-19
lines changed

3 files changed

+38
-19
lines changed

fs/dax.c

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -691,8 +691,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
691691
pgoff_t index, unsigned long pfn)
692692
{
693693
struct vm_area_struct *vma;
694-
pte_t *ptep;
695-
pte_t pte;
694+
pte_t pte, *ptep = NULL;
695+
pmd_t *pmdp = NULL;
696696
spinlock_t *ptl;
697697
bool changed;
698698

@@ -707,21 +707,42 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
707707

708708
address = pgoff_address(index, vma);
709709
changed = false;
710-
if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
710+
if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
711711
continue;
712-
if (pfn != pte_pfn(*ptep))
713-
goto unlock;
714-
if (!pte_dirty(*ptep) && !pte_write(*ptep))
715-
goto unlock;
716712

717-
flush_cache_page(vma, address, pfn);
718-
pte = ptep_clear_flush(vma, address, ptep);
719-
pte = pte_wrprotect(pte);
720-
pte = pte_mkclean(pte);
721-
set_pte_at(vma->vm_mm, address, ptep, pte);
722-
changed = true;
723-
unlock:
724-
pte_unmap_unlock(ptep, ptl);
713+
if (pmdp) {
714+
#ifdef CONFIG_FS_DAX_PMD
715+
pmd_t pmd;
716+
717+
if (pfn != pmd_pfn(*pmdp))
718+
goto unlock_pmd;
719+
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
720+
goto unlock_pmd;
721+
722+
flush_cache_page(vma, address, pfn);
723+
pmd = pmdp_huge_clear_flush(vma, address, pmdp);
724+
pmd = pmd_wrprotect(pmd);
725+
pmd = pmd_mkclean(pmd);
726+
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
727+
changed = true;
728+
unlock_pmd:
729+
spin_unlock(ptl);
730+
#endif
731+
} else {
732+
if (pfn != pte_pfn(*ptep))
733+
goto unlock_pte;
734+
if (!pte_dirty(*ptep) && !pte_write(*ptep))
735+
goto unlock_pte;
736+
737+
flush_cache_page(vma, address, pfn);
738+
pte = ptep_clear_flush(vma, address, ptep);
739+
pte = pte_wrprotect(pte);
740+
pte = pte_mkclean(pte);
741+
set_pte_at(vma->vm_mm, address, ptep, pte);
742+
changed = true;
743+
unlock_pte:
744+
pte_unmap_unlock(ptep, ptl);
745+
}
725746

726747
if (changed)
727748
mmu_notifier_invalidate_page(vma->vm_mm, address);

include/linux/mm.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,8 +1210,6 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
12101210
struct vm_area_struct *vma);
12111211
void unmap_mapping_range(struct address_space *mapping,
12121212
loff_t const holebegin, loff_t const holelen, int even_cows);
1213-
int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
1214-
spinlock_t **ptlp);
12151213
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
12161214
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
12171215
int follow_pfn(struct vm_area_struct *vma, unsigned long address,

mm/memory.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3819,8 +3819,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
38193819
return -EINVAL;
38203820
}
38213821

3822-
int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
3823-
spinlock_t **ptlp)
3822+
static inline int follow_pte(struct mm_struct *mm, unsigned long address,
3823+
pte_t **ptepp, spinlock_t **ptlp)
38243824
{
38253825
int res;
38263826

0 commit comments

Comments
 (0)