Skip to content

Commit a6abc2c

Browse files
jankaratorvalds
authored andcommitted
dax: make cache flushing protected by entry lock
Currently, flushing of caches for DAX mappings was ignoring entry lock. So far this was ok (modulo a bug that a difference in entry lock could cause cache flushing to be mistakenly skipped) but in the following patches we will write-protect PTEs on cache flushing and clear dirty tags. For that we will need more exclusion. So do cache flushing under an entry lock. This allows us to remove one lock-unlock pair of mapping->tree_lock as a bonus. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Jan Kara <[email protected]> Reviewed-by: Ross Zwisler <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Dan Williams <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent cae1240 commit a6abc2c

File tree

1 file changed

+39
-22
lines changed

1 file changed

+39
-22
lines changed

fs/dax.c

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -618,32 +618,50 @@ static int dax_writeback_one(struct block_device *bdev,
618618
struct address_space *mapping, pgoff_t index, void *entry)
619619
{
620620
struct radix_tree_root *page_tree = &mapping->page_tree;
621-
struct radix_tree_node *node;
622621
struct blk_dax_ctl dax;
623-
void **slot;
622+
void *entry2, **slot;
624623
int ret = 0;
625624

626-
spin_lock_irq(&mapping->tree_lock);
627625
/*
628-
* Regular page slots are stabilized by the page lock even
629-
* without the tree itself locked. These unlocked entries
630-
* need verification under the tree lock.
626+
* A page got tagged dirty in DAX mapping? Something is seriously
627+
* wrong.
631628
*/
632-
if (!__radix_tree_lookup(page_tree, index, &node, &slot))
633-
goto unlock;
634-
if (*slot != entry)
635-
goto unlock;
636-
637-
/* another fsync thread may have already written back this entry */
638-
if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
639-
goto unlock;
629+
if (WARN_ON(!radix_tree_exceptional_entry(entry)))
630+
return -EIO;
640631

632+
spin_lock_irq(&mapping->tree_lock);
633+
entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
634+
/* Entry got punched out / reallocated? */
635+
if (!entry2 || !radix_tree_exceptional_entry(entry2))
636+
goto put_unlocked;
637+
/*
638+
* Entry got reallocated elsewhere? No need to writeback. We have to
639+
* compare sectors as we must not bail out due to difference in lockbit
640+
* or entry type.
641+
*/
642+
if (dax_radix_sector(entry2) != dax_radix_sector(entry))
643+
goto put_unlocked;
641644
if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
642645
dax_is_zero_entry(entry))) {
643646
ret = -EIO;
644-
goto unlock;
647+
goto put_unlocked;
645648
}
646649

650+
/* Another fsync thread may have already written back this entry */
651+
if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
652+
goto put_unlocked;
653+
/* Lock the entry to serialize with page faults */
654+
entry = lock_slot(mapping, slot);
655+
/*
656+
* We can clear the tag now but we have to be careful so that concurrent
657+
* dax_writeback_one() calls for the same index cannot finish before we
658+
* actually flush the caches. This is achieved as the calls will look
659+
* at the entry only under tree_lock and once they do that they will
660+
* see the entry locked and wait for it to unlock.
661+
*/
662+
radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
663+
spin_unlock_irq(&mapping->tree_lock);
664+
647665
/*
648666
* Even if dax_writeback_mapping_range() was given a wbc->range_start
649667
* in the middle of a PMD, the 'index' we are given will be aligned to
@@ -653,31 +671,30 @@ static int dax_writeback_one(struct block_device *bdev,
653671
*/
654672
dax.sector = dax_radix_sector(entry);
655673
dax.size = PAGE_SIZE << dax_radix_order(entry);
656-
spin_unlock_irq(&mapping->tree_lock);
657674

658675
/*
659676
* We cannot hold tree_lock while calling dax_map_atomic() because it
660677
* eventually calls cond_resched().
661678
*/
662679
ret = dax_map_atomic(bdev, &dax);
663-
if (ret < 0)
680+
if (ret < 0) {
681+
put_locked_mapping_entry(mapping, index, entry);
664682
return ret;
683+
}
665684

666685
if (WARN_ON_ONCE(ret < dax.size)) {
667686
ret = -EIO;
668687
goto unmap;
669688
}
670689

671690
wb_cache_pmem(dax.addr, dax.size);
672-
673-
spin_lock_irq(&mapping->tree_lock);
674-
radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
675-
spin_unlock_irq(&mapping->tree_lock);
676691
unmap:
677692
dax_unmap_atomic(bdev, &dax);
693+
put_locked_mapping_entry(mapping, index, entry);
678694
return ret;
679695

680-
unlock:
696+
put_unlocked:
697+
put_unlocked_mapping_entry(mapping, index, entry2);
681698
spin_unlock_irq(&mapping->tree_lock);
682699
return ret;
683700
}

0 commit comments

Comments
 (0)