38
38
#define CREATE_TRACE_POINTS
39
39
#include <trace/events/fs_dax.h>
40
40
41
+ static inline unsigned int pe_order (enum page_entry_size pe_size )
42
+ {
43
+ if (pe_size == PE_SIZE_PTE )
44
+ return PAGE_SHIFT - PAGE_SHIFT ;
45
+ if (pe_size == PE_SIZE_PMD )
46
+ return PMD_SHIFT - PAGE_SHIFT ;
47
+ if (pe_size == PE_SIZE_PUD )
48
+ return PUD_SHIFT - PAGE_SHIFT ;
49
+ return ~0 ;
50
+ }
51
+
41
52
/* We choose 4096 entries - same as per-zone page wait tables */
42
53
#define DAX_WAIT_TABLE_BITS 12
43
54
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
46
57
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
47
58
#define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT)
48
59
60
+ /* The order of a PMD entry */
61
+ #define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
62
+
49
63
static wait_queue_head_t wait_table [DAX_WAIT_TABLE_ENTRIES ];
50
64
51
65
static int __init init_dax_wait_table (void )
@@ -85,10 +99,15 @@ static void *dax_make_locked(unsigned long pfn, unsigned long flags)
85
99
DAX_LOCKED );
86
100
}
87
101
102
+ static bool dax_is_locked (void * entry )
103
+ {
104
+ return xa_to_value (entry ) & DAX_LOCKED ;
105
+ }
106
+
88
107
static unsigned int dax_entry_order (void * entry )
89
108
{
90
109
if (xa_to_value (entry ) & DAX_PMD )
91
- return PMD_SHIFT - PAGE_SHIFT ;
110
+ return PMD_ORDER ;
92
111
return 0 ;
93
112
}
94
113
@@ -181,6 +200,81 @@ static void dax_wake_mapping_entry_waiter(struct xarray *xa,
181
200
__wake_up (wq , TASK_NORMAL , wake_all ? 0 : 1 , & key );
182
201
}
183
202
203
+ static void dax_wake_entry (struct xa_state * xas , void * entry , bool wake_all )
204
+ {
205
+ return dax_wake_mapping_entry_waiter (xas -> xa , xas -> xa_index , entry ,
206
+ wake_all );
207
+ }
208
+
209
+ /*
210
+ * Look up entry in page cache, wait for it to become unlocked if it
211
+ * is a DAX entry and return it. The caller must subsequently call
212
+ * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry()
213
+ * if it did.
214
+ *
215
+ * Must be called with the i_pages lock held.
216
+ */
217
+ static void * get_unlocked_entry (struct xa_state * xas )
218
+ {
219
+ void * entry ;
220
+ struct wait_exceptional_entry_queue ewait ;
221
+ wait_queue_head_t * wq ;
222
+
223
+ init_wait (& ewait .wait );
224
+ ewait .wait .func = wake_exceptional_entry_func ;
225
+
226
+ for (;;) {
227
+ entry = xas_load (xas );
228
+ if (!entry || xa_is_internal (entry ) ||
229
+ WARN_ON_ONCE (!xa_is_value (entry )) ||
230
+ !dax_is_locked (entry ))
231
+ return entry ;
232
+
233
+ wq = dax_entry_waitqueue (xas -> xa , xas -> xa_index , entry ,
234
+ & ewait .key );
235
+ prepare_to_wait_exclusive (wq , & ewait .wait ,
236
+ TASK_UNINTERRUPTIBLE );
237
+ xas_unlock_irq (xas );
238
+ xas_reset (xas );
239
+ schedule ();
240
+ finish_wait (wq , & ewait .wait );
241
+ xas_lock_irq (xas );
242
+ }
243
+ }
244
+
245
+ static void put_unlocked_entry (struct xa_state * xas , void * entry )
246
+ {
247
+ /* If we were the only waiter woken, wake the next one */
248
+ if (entry )
249
+ dax_wake_entry (xas , entry , false);
250
+ }
251
+
252
+ /*
253
+ * We used the xa_state to get the entry, but then we locked the entry and
254
+ * dropped the xa_lock, so we know the xa_state is stale and must be reset
255
+ * before use.
256
+ */
257
+ static void dax_unlock_entry (struct xa_state * xas , void * entry )
258
+ {
259
+ void * old ;
260
+
261
+ xas_reset (xas );
262
+ xas_lock_irq (xas );
263
+ old = xas_store (xas , entry );
264
+ xas_unlock_irq (xas );
265
+ BUG_ON (!dax_is_locked (old ));
266
+ dax_wake_entry (xas , entry , false);
267
+ }
268
+
269
+ /*
270
+ * Return: The entry stored at this location before it was locked.
271
+ */
272
+ static void * dax_lock_entry (struct xa_state * xas , void * entry )
273
+ {
274
+ unsigned long v = xa_to_value (entry );
275
+ return xas_store (xas , xa_mk_value (v | DAX_LOCKED ));
276
+ }
277
+
184
278
/*
185
279
* Check whether the given slot is locked. Must be called with the i_pages
186
280
* lock held.
@@ -1728,50 +1822,46 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
1728
1822
/*
1729
1823
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
1730
1824
* @vmf: The description of the fault
1731
- * @pe_size: Size of entry to be inserted
1732
1825
* @pfn: PFN to insert
1826
+ * @order: Order of entry to insert.
1733
1827
*
1734
1828
* This function inserts a writeable PTE or PMD entry into the page tables
1735
1829
* for an mmaped DAX file. It also marks the page cache entry as dirty.
1736
1830
*/
1737
- static vm_fault_t dax_insert_pfn_mkwrite (struct vm_fault * vmf ,
1738
- enum page_entry_size pe_size ,
1739
- pfn_t pfn )
1831
+ static vm_fault_t
1832
+ dax_insert_pfn_mkwrite (struct vm_fault * vmf , pfn_t pfn , unsigned int order )
1740
1833
{
1741
1834
struct address_space * mapping = vmf -> vma -> vm_file -> f_mapping ;
1742
- void * entry , * * slot ;
1743
- pgoff_t index = vmf -> pgoff ;
1835
+ XA_STATE_ORDER ( xas , & mapping -> i_pages , vmf -> pgoff , order ) ;
1836
+ void * entry ;
1744
1837
vm_fault_t ret ;
1745
1838
1746
- xa_lock_irq ( & mapping -> i_pages );
1747
- entry = get_unlocked_mapping_entry ( mapping , index , & slot );
1839
+ xas_lock_irq ( & xas );
1840
+ entry = get_unlocked_entry ( & xas );
1748
1841
/* Did we race with someone splitting entry or so? */
1749
1842
if (!entry ||
1750
- (pe_size == PE_SIZE_PTE && !dax_is_pte_entry (entry )) ||
1751
- (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry (entry ))) {
1752
- put_unlocked_mapping_entry (mapping , index , entry );
1753
- xa_unlock_irq (& mapping -> i_pages );
1843
+ (order == 0 && !dax_is_pte_entry (entry )) ||
1844
+ (order == PMD_ORDER && (xa_is_internal (entry ) ||
1845
+ !dax_is_pmd_entry (entry )))) {
1846
+ put_unlocked_entry (& xas , entry );
1847
+ xas_unlock_irq (& xas );
1754
1848
trace_dax_insert_pfn_mkwrite_no_entry (mapping -> host , vmf ,
1755
1849
VM_FAULT_NOPAGE );
1756
1850
return VM_FAULT_NOPAGE ;
1757
1851
}
1758
- radix_tree_tag_set (& mapping -> i_pages , index , PAGECACHE_TAG_DIRTY );
1759
- entry = lock_slot (mapping , slot );
1760
- xa_unlock_irq (& mapping -> i_pages );
1761
- switch (pe_size ) {
1762
- case PE_SIZE_PTE :
1852
+ xas_set_mark (& xas , PAGECACHE_TAG_DIRTY );
1853
+ dax_lock_entry (& xas , entry );
1854
+ xas_unlock_irq (& xas );
1855
+ if (order == 0 )
1763
1856
ret = vmf_insert_mixed_mkwrite (vmf -> vma , vmf -> address , pfn );
1764
- break ;
1765
1857
#ifdef CONFIG_FS_DAX_PMD
1766
- case PE_SIZE_PMD :
1858
+ else if ( order == PMD_ORDER )
1767
1859
ret = vmf_insert_pfn_pmd (vmf -> vma , vmf -> address , vmf -> pmd ,
1768
1860
pfn , true);
1769
- break ;
1770
1861
#endif
1771
- default :
1862
+ else
1772
1863
ret = VM_FAULT_FALLBACK ;
1773
- }
1774
- put_locked_mapping_entry (mapping , index );
1864
+ dax_unlock_entry (& xas , entry );
1775
1865
trace_dax_insert_pfn_mkwrite (mapping -> host , vmf , ret );
1776
1866
return ret ;
1777
1867
}
@@ -1791,17 +1881,12 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
1791
1881
{
1792
1882
int err ;
1793
1883
loff_t start = ((loff_t )vmf -> pgoff ) << PAGE_SHIFT ;
1794
- size_t len = 0 ;
1884
+ unsigned int order = pe_order (pe_size );
1885
+ size_t len = PAGE_SIZE << order ;
1795
1886
1796
- if (pe_size == PE_SIZE_PTE )
1797
- len = PAGE_SIZE ;
1798
- else if (pe_size == PE_SIZE_PMD )
1799
- len = PMD_SIZE ;
1800
- else
1801
- WARN_ON_ONCE (1 );
1802
1887
err = vfs_fsync_range (vmf -> vma -> vm_file , start , start + len - 1 , 1 );
1803
1888
if (err )
1804
1889
return VM_FAULT_SIGBUS ;
1805
- return dax_insert_pfn_mkwrite (vmf , pe_size , pfn );
1890
+ return dax_insert_pfn_mkwrite (vmf , pfn , order );
1806
1891
}
1807
1892
EXPORT_SYMBOL_GPL (dax_finish_sync_fault );
0 commit comments