@@ -123,6 +123,15 @@ static int dax_is_empty_entry(void *entry)
123
123
return xa_to_value (entry ) & DAX_EMPTY ;
124
124
}
125
125
126
+ /*
127
+ * true if the entry that was found is of a smaller order than the entry
128
+ * we were looking for
129
+ */
130
+ static bool dax_is_conflict (void * entry )
131
+ {
132
+ return entry == XA_RETRY_ENTRY ;
133
+ }
134
+
126
135
/*
127
136
* DAX page cache entry locking
128
137
*/
@@ -195,11 +204,13 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
195
204
* Look up entry in page cache, wait for it to become unlocked if it
196
205
* is a DAX entry and return it. The caller must subsequently call
197
206
* put_unlocked_entry() if it did not lock the entry or dax_unlock_entry()
198
- * if it did.
207
+ * if it did. The entry returned may have a larger order than @order.
208
+ * If @order is larger than the order of the entry found in i_pages, this
209
+ * function returns a dax_is_conflict entry.
199
210
*
200
211
* Must be called with the i_pages lock held.
201
212
*/
202
- static void * get_unlocked_entry (struct xa_state * xas )
213
+ static void * get_unlocked_entry (struct xa_state * xas , unsigned int order )
203
214
{
204
215
void * entry ;
205
216
struct wait_exceptional_entry_queue ewait ;
@@ -210,6 +221,8 @@ static void *get_unlocked_entry(struct xa_state *xas)
210
221
211
222
for (;;) {
212
223
entry = xas_find_conflict (xas );
224
+ if (dax_entry_order (entry ) < order )
225
+ return XA_RETRY_ENTRY ;
213
226
if (!entry || WARN_ON_ONCE (!xa_is_value (entry )) ||
214
227
!dax_is_locked (entry ))
215
228
return entry ;
@@ -254,7 +267,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
254
267
static void put_unlocked_entry (struct xa_state * xas , void * entry )
255
268
{
256
269
/* If we were the only waiter woken, wake the next one */
257
- if (entry )
270
+ if (entry && dax_is_conflict ( entry ) )
258
271
dax_wake_entry (xas , entry , false);
259
272
}
260
273
@@ -461,28 +474,25 @@ void dax_unlock_page(struct page *page, dax_entry_t cookie)
461
474
* overlap with xarray value entries.
462
475
*/
463
476
static void * grab_mapping_entry (struct xa_state * xas ,
464
- struct address_space * mapping , unsigned long size_flag )
477
+ struct address_space * mapping , unsigned int order )
465
478
{
466
479
unsigned long index = xas -> xa_index ;
467
480
bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */
468
481
void * entry ;
469
482
470
483
retry :
471
484
xas_lock_irq (xas );
472
- entry = get_unlocked_entry (xas );
485
+ entry = get_unlocked_entry (xas , order );
473
486
474
487
if (entry ) {
488
+ if (dax_is_conflict (entry ))
489
+ goto fallback ;
475
490
if (!xa_is_value (entry )) {
476
491
xas_set_err (xas , EIO );
477
492
goto out_unlock ;
478
493
}
479
494
480
- if (size_flag & DAX_PMD ) {
481
- if (dax_is_pte_entry (entry )) {
482
- put_unlocked_entry (xas , entry );
483
- goto fallback ;
484
- }
485
- } else { /* trying to grab a PTE entry */
495
+ if (order == 0 ) {
486
496
if (dax_is_pmd_entry (entry ) &&
487
497
(dax_is_zero_entry (entry ) ||
488
498
dax_is_empty_entry (entry ))) {
@@ -523,7 +533,11 @@ static void *grab_mapping_entry(struct xa_state *xas,
523
533
if (entry ) {
524
534
dax_lock_entry (xas , entry );
525
535
} else {
526
- entry = dax_make_entry (pfn_to_pfn_t (0 ), size_flag | DAX_EMPTY );
536
+ unsigned long flags = DAX_EMPTY ;
537
+
538
+ if (order > 0 )
539
+ flags |= DAX_PMD ;
540
+ entry = dax_make_entry (pfn_to_pfn_t (0 ), flags );
527
541
dax_lock_entry (xas , entry );
528
542
if (xas_error (xas ))
529
543
goto out_unlock ;
@@ -594,7 +608,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
594
608
if (WARN_ON_ONCE (!xa_is_value (entry )))
595
609
continue ;
596
610
if (unlikely (dax_is_locked (entry )))
597
- entry = get_unlocked_entry (& xas );
611
+ entry = get_unlocked_entry (& xas , 0 );
598
612
if (entry )
599
613
page = dax_busy_page (entry );
600
614
put_unlocked_entry (& xas , entry );
@@ -621,7 +635,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
621
635
void * entry ;
622
636
623
637
xas_lock_irq (& xas );
624
- entry = get_unlocked_entry (& xas );
638
+ entry = get_unlocked_entry (& xas , 0 );
625
639
if (!entry || WARN_ON_ONCE (!xa_is_value (entry )))
626
640
goto out ;
627
641
if (!trunc &&
@@ -849,7 +863,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
849
863
if (unlikely (dax_is_locked (entry ))) {
850
864
void * old_entry = entry ;
851
865
852
- entry = get_unlocked_entry (xas );
866
+ entry = get_unlocked_entry (xas , 0 );
853
867
854
868
/* Entry got punched out / reallocated? */
855
869
if (!entry || WARN_ON_ONCE (!xa_is_value (entry )))
@@ -1510,7 +1524,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
1510
1524
* entry is already in the array, for instance), it will return
1511
1525
* VM_FAULT_FALLBACK.
1512
1526
*/
1513
- entry = grab_mapping_entry (& xas , mapping , DAX_PMD );
1527
+ entry = grab_mapping_entry (& xas , mapping , PMD_ORDER );
1514
1528
if (xa_is_internal (entry )) {
1515
1529
result = xa_to_internal (entry );
1516
1530
goto fallback ;
@@ -1659,11 +1673,10 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
1659
1673
vm_fault_t ret ;
1660
1674
1661
1675
xas_lock_irq (& xas );
1662
- entry = get_unlocked_entry (& xas );
1676
+ entry = get_unlocked_entry (& xas , order );
1663
1677
/* Did we race with someone splitting entry or so? */
1664
- if (!entry ||
1665
- (order == 0 && !dax_is_pte_entry (entry )) ||
1666
- (order == PMD_ORDER && !dax_is_pmd_entry (entry ))) {
1678
+ if (!entry || dax_is_conflict (entry ) ||
1679
+ (order == 0 && !dax_is_pte_entry (entry ))) {
1667
1680
put_unlocked_entry (& xas , entry );
1668
1681
xas_unlock_irq (& xas );
1669
1682
trace_dax_insert_pfn_mkwrite_no_entry (mapping -> host , vmf ,
0 commit comments