Skip to content

Commit c2a7d2a

Browse files
djbwdavejiang
authored andcommitted
filesystem-dax: Introduce dax_lock_mapping_entry()
In preparation for implementing support for memory poison (media error) handling via dax mappings, implement a lock_page() equivalent. Poison error handling requires rmap and needs guarantees that the page->mapping association is maintained / valid (inode not freed) for the duration of the lookup. In the device-dax case it is sufficient to simply hold a dev_pagemap reference. In the filesystem-dax case we need to use the entry lock. Export the entry lock via dax_lock_mapping_entry() that uses rcu_read_lock() to protect against the inode being freed, and revalidates the page->mapping association under xa_lock(). Cc: Christoph Hellwig <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Ross Zwisler <[email protected]> Cc: Jan Kara <[email protected]> Signed-off-by: Dan Williams <[email protected]> Signed-off-by: Dave Jiang <[email protected]>
1 parent ae1139e commit c2a7d2a

File tree

2 files changed

+116
-6
lines changed

2 files changed

+116
-6
lines changed

fs/dax.c

Lines changed: 103 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
226226
*
227227
* Must be called with the i_pages lock held.
228228
*/
229-
static void *get_unlocked_mapping_entry(struct address_space *mapping,
230-
pgoff_t index, void ***slotp)
229+
static void *__get_unlocked_mapping_entry(struct address_space *mapping,
230+
pgoff_t index, void ***slotp, bool (*wait_fn)(void))
231231
{
232232
void *entry, **slot;
233233
struct wait_exceptional_entry_queue ewait;
@@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
237237
ewait.wait.func = wake_exceptional_entry_func;
238238

239239
for (;;) {
240+
bool revalidate;
241+
240242
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
241243
&slot);
242244
if (!entry ||
@@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
251253
prepare_to_wait_exclusive(wq, &ewait.wait,
252254
TASK_UNINTERRUPTIBLE);
253255
xa_unlock_irq(&mapping->i_pages);
254-
schedule();
256+
revalidate = wait_fn();
255257
finish_wait(wq, &ewait.wait);
256258
xa_lock_irq(&mapping->i_pages);
259+
if (revalidate)
260+
return ERR_PTR(-EAGAIN);
257261
}
258262
}
259263

260-
static void dax_unlock_mapping_entry(struct address_space *mapping,
261-
pgoff_t index)
264+
static bool entry_wait(void)
265+
{
266+
schedule();
267+
/*
268+
* Never return an ERR_PTR() from
269+
* __get_unlocked_mapping_entry(), just keep looping.
270+
*/
271+
return false;
272+
}
273+
274+
static void *get_unlocked_mapping_entry(struct address_space *mapping,
275+
pgoff_t index, void ***slotp)
276+
{
277+
return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
278+
}
279+
280+
static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
262281
{
263282
void *entry, **slot;
264283

@@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
277296
static void put_locked_mapping_entry(struct address_space *mapping,
278297
pgoff_t index)
279298
{
280-
dax_unlock_mapping_entry(mapping, index);
299+
unlock_mapping_entry(mapping, index);
281300
}
282301

283302
/*
@@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
374393
return NULL;
375394
}
376395

396+
static bool entry_wait_revalidate(void)
397+
{
398+
rcu_read_unlock();
399+
schedule();
400+
rcu_read_lock();
401+
402+
/*
403+
* Tell __get_unlocked_mapping_entry() to take a break, we need
404+
* to revalidate page->mapping after dropping locks
405+
*/
406+
return true;
407+
}
408+
409+
bool dax_lock_mapping_entry(struct page *page)
410+
{
411+
pgoff_t index;
412+
struct inode *inode;
413+
bool did_lock = false;
414+
void *entry = NULL, **slot;
415+
struct address_space *mapping;
416+
417+
rcu_read_lock();
418+
for (;;) {
419+
mapping = READ_ONCE(page->mapping);
420+
421+
if (!dax_mapping(mapping))
422+
break;
423+
424+
/*
425+
* In the device-dax case there's no need to lock, a
426+
* struct dev_pagemap pin is sufficient to keep the
427+
* inode alive, and we assume we have dev_pagemap pin
428+
* otherwise we would not have a valid pfn_to_page()
429+
* translation.
430+
*/
431+
inode = mapping->host;
432+
if (S_ISCHR(inode->i_mode)) {
433+
did_lock = true;
434+
break;
435+
}
436+
437+
xa_lock_irq(&mapping->i_pages);
438+
if (mapping != page->mapping) {
439+
xa_unlock_irq(&mapping->i_pages);
440+
continue;
441+
}
442+
index = page->index;
443+
444+
entry = __get_unlocked_mapping_entry(mapping, index, &slot,
445+
entry_wait_revalidate);
446+
if (!entry) {
447+
xa_unlock_irq(&mapping->i_pages);
448+
break;
449+
} else if (IS_ERR(entry)) {
450+
WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
451+
continue;
452+
}
453+
lock_slot(mapping, slot);
454+
did_lock = true;
455+
xa_unlock_irq(&mapping->i_pages);
456+
break;
457+
}
458+
rcu_read_unlock();
459+
460+
return did_lock;
461+
}
462+
463+
void dax_unlock_mapping_entry(struct page *page)
464+
{
465+
struct address_space *mapping = page->mapping;
466+
struct inode *inode = mapping->host;
467+
468+
if (S_ISCHR(inode->i_mode))
469+
return;
470+
471+
unlock_mapping_entry(mapping, page->index);
472+
}
473+
377474
/*
378475
* Find radix tree entry at given index. If it points to an exceptional entry,
379476
* return it with the radix tree entry locked. If the radix tree doesn't

include/linux/dax.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
8888
struct block_device *bdev, struct writeback_control *wbc);
8989

9090
struct page *dax_layout_busy_page(struct address_space *mapping);
91+
bool dax_lock_mapping_entry(struct page *page);
92+
void dax_unlock_mapping_entry(struct page *page);
9193
#else
9294
static inline bool bdev_dax_supported(struct block_device *bdev,
9395
int blocksize)
@@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
119121
{
120122
return -EOPNOTSUPP;
121123
}
124+
125+
static inline bool dax_lock_mapping_entry(struct page *page)
126+
{
127+
if (IS_DAX(page->mapping->host))
128+
return true;
129+
return false;
130+
}
131+
132+
static inline void dax_unlock_mapping_entry(struct page *page)
133+
{
134+
}
122135
#endif
123136

124137
int dax_read_lock(void);

0 commit comments

Comments
 (0)