Skip to content

Commit cfc93c6

Browse files
author
Matthew Wilcox
committed
dax: Convert dax_insert_pfn_mkwrite to XArray
Add some XArray-based helper functions to replace the radix tree based metaphors currently in use. The biggest change is that converted code doesn't see its own lock bit; get_unlocked_entry() always returns an entry with the lock bit clear. So we don't have to mess around loading the current entry and clearing the lock bit; we can just store the unlocked entry that we already have. Signed-off-by: Matthew Wilcox <[email protected]>
1 parent ec4907f commit cfc93c6

File tree

1 file changed

+117
-32
lines changed

1 file changed

+117
-32
lines changed

fs/dax.c

Lines changed: 117 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,17 @@
3838
#define CREATE_TRACE_POINTS
3939
#include <trace/events/fs_dax.h>
4040

41+
static inline unsigned int pe_order(enum page_entry_size pe_size)
42+
{
43+
if (pe_size == PE_SIZE_PTE)
44+
return PAGE_SHIFT - PAGE_SHIFT;
45+
if (pe_size == PE_SIZE_PMD)
46+
return PMD_SHIFT - PAGE_SHIFT;
47+
if (pe_size == PE_SIZE_PUD)
48+
return PUD_SHIFT - PAGE_SHIFT;
49+
return ~0;
50+
}
51+
4152
/* We choose 4096 entries - same as per-zone page wait tables */
4253
#define DAX_WAIT_TABLE_BITS 12
4354
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
@@ -46,6 +57,9 @@
4657
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
4758
#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
4859

60+
/* The order of a PMD entry */
61+
#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
62+
4963
static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES];
5064

5165
static int __init init_dax_wait_table(void)
@@ -85,10 +99,15 @@ static void *dax_make_locked(unsigned long pfn, unsigned long flags)
8599
DAX_LOCKED);
86100
}
87101

102+
static bool dax_is_locked(void *entry)
103+
{
104+
return xa_to_value(entry) & DAX_LOCKED;
105+
}
106+
88107
static unsigned int dax_entry_order(void *entry)
89108
{
90109
if (xa_to_value(entry) & DAX_PMD)
91-
return PMD_SHIFT - PAGE_SHIFT;
110+
return PMD_ORDER;
92111
return 0;
93112
}
94113

@@ -181,6 +200,81 @@ static void dax_wake_mapping_entry_waiter(struct xarray *xa,
181200
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
182201
}
183202

203+
static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
204+
{
205+
return dax_wake_mapping_entry_waiter(xas->xa, xas->xa_index, entry,
206+
wake_all);
207+
}
208+
209+
/*
210+
* Look up entry in page cache, wait for it to become unlocked if it
211+
* is a DAX entry and return it. The caller must subsequently call
212+
* put_unlocked_entry() if it did not lock the entry or dax_unlock_entry()
213+
* if it did.
214+
*
215+
* Must be called with the i_pages lock held.
216+
*/
217+
static void *get_unlocked_entry(struct xa_state *xas)
218+
{
219+
void *entry;
220+
struct wait_exceptional_entry_queue ewait;
221+
wait_queue_head_t *wq;
222+
223+
init_wait(&ewait.wait);
224+
ewait.wait.func = wake_exceptional_entry_func;
225+
226+
for (;;) {
227+
entry = xas_load(xas);
228+
if (!entry || xa_is_internal(entry) ||
229+
WARN_ON_ONCE(!xa_is_value(entry)) ||
230+
!dax_is_locked(entry))
231+
return entry;
232+
233+
wq = dax_entry_waitqueue(xas->xa, xas->xa_index, entry,
234+
&ewait.key);
235+
prepare_to_wait_exclusive(wq, &ewait.wait,
236+
TASK_UNINTERRUPTIBLE);
237+
xas_unlock_irq(xas);
238+
xas_reset(xas);
239+
schedule();
240+
finish_wait(wq, &ewait.wait);
241+
xas_lock_irq(xas);
242+
}
243+
}
244+
245+
static void put_unlocked_entry(struct xa_state *xas, void *entry)
246+
{
247+
/* If we were the only waiter woken, wake the next one */
248+
if (entry)
249+
dax_wake_entry(xas, entry, false);
250+
}
251+
252+
/*
253+
* We used the xa_state to get the entry, but then we locked the entry and
254+
* dropped the xa_lock, so we know the xa_state is stale and must be reset
255+
* before use.
256+
*/
257+
static void dax_unlock_entry(struct xa_state *xas, void *entry)
258+
{
259+
void *old;
260+
261+
xas_reset(xas);
262+
xas_lock_irq(xas);
263+
old = xas_store(xas, entry);
264+
xas_unlock_irq(xas);
265+
BUG_ON(!dax_is_locked(old));
266+
dax_wake_entry(xas, entry, false);
267+
}
268+
269+
/*
270+
* Return: The entry stored at this location before it was locked.
271+
*/
272+
static void *dax_lock_entry(struct xa_state *xas, void *entry)
273+
{
274+
unsigned long v = xa_to_value(entry);
275+
return xas_store(xas, xa_mk_value(v | DAX_LOCKED));
276+
}
277+
184278
/*
185279
* Check whether the given slot is locked. Must be called with the i_pages
186280
* lock held.
@@ -1728,50 +1822,46 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
17281822
/*
17291823
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
17301824
* @vmf: The description of the fault
1731-
* @pe_size: Size of entry to be inserted
17321825
* @pfn: PFN to insert
1826+
* @order: Order of entry to insert.
17331827
*
17341828
* This function inserts a writeable PTE or PMD entry into the page tables
17351829
* for an mmaped DAX file. It also marks the page cache entry as dirty.
17361830
*/
1737-
static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf,
1738-
enum page_entry_size pe_size,
1739-
pfn_t pfn)
1831+
static vm_fault_t
1832+
dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
17401833
{
17411834
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
1742-
void *entry, **slot;
1743-
pgoff_t index = vmf->pgoff;
1835+
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
1836+
void *entry;
17441837
vm_fault_t ret;
17451838

1746-
xa_lock_irq(&mapping->i_pages);
1747-
entry = get_unlocked_mapping_entry(mapping, index, &slot);
1839+
xas_lock_irq(&xas);
1840+
entry = get_unlocked_entry(&xas);
17481841
/* Did we race with someone splitting entry or so? */
17491842
if (!entry ||
1750-
(pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
1751-
(pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
1752-
put_unlocked_mapping_entry(mapping, index, entry);
1753-
xa_unlock_irq(&mapping->i_pages);
1843+
(order == 0 && !dax_is_pte_entry(entry)) ||
1844+
(order == PMD_ORDER && (xa_is_internal(entry) ||
1845+
!dax_is_pmd_entry(entry)))) {
1846+
put_unlocked_entry(&xas, entry);
1847+
xas_unlock_irq(&xas);
17541848
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
17551849
VM_FAULT_NOPAGE);
17561850
return VM_FAULT_NOPAGE;
17571851
}
1758-
radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
1759-
entry = lock_slot(mapping, slot);
1760-
xa_unlock_irq(&mapping->i_pages);
1761-
switch (pe_size) {
1762-
case PE_SIZE_PTE:
1852+
xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
1853+
dax_lock_entry(&xas, entry);
1854+
xas_unlock_irq(&xas);
1855+
if (order == 0)
17631856
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
1764-
break;
17651857
#ifdef CONFIG_FS_DAX_PMD
1766-
case PE_SIZE_PMD:
1858+
else if (order == PMD_ORDER)
17671859
ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
17681860
pfn, true);
1769-
break;
17701861
#endif
1771-
default:
1862+
else
17721863
ret = VM_FAULT_FALLBACK;
1773-
}
1774-
put_locked_mapping_entry(mapping, index);
1864+
dax_unlock_entry(&xas, entry);
17751865
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
17761866
return ret;
17771867
}
@@ -1791,17 +1881,12 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
17911881
{
17921882
int err;
17931883
loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
1794-
size_t len = 0;
1884+
unsigned int order = pe_order(pe_size);
1885+
size_t len = PAGE_SIZE << order;
17951886

1796-
if (pe_size == PE_SIZE_PTE)
1797-
len = PAGE_SIZE;
1798-
else if (pe_size == PE_SIZE_PMD)
1799-
len = PMD_SIZE;
1800-
else
1801-
WARN_ON_ONCE(1);
18021887
err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
18031888
if (err)
18041889
return VM_FAULT_SIGBUS;
1805-
return dax_insert_pfn_mkwrite(vmf, pe_size, pfn);
1890+
return dax_insert_pfn_mkwrite(vmf, pfn, order);
18061891
}
18071892
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);

0 commit comments

Comments
 (0)