Skip to content

Commit 55f8163

Browse files
iridesDarrick J. Wong
authored andcommitted
fsdax: factor out helpers to simplify the dax fault code
The dax page fault code is too long and a bit difficult to read. And it is hard to understand when we trying to add new features. Some of the PTE/PMD codes have similar logic. So, factor out helper functions to simplify the code. Signed-off-by: Shiyang Ruan <[email protected]> Reviewed-by: Ritesh Harjani <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> [hch: minor cleanups] Signed-off-by: Christoph Hellwig <[email protected]> Signed-off-by: Darrick J. Wong <[email protected]>
1 parent b74b129 commit 55f8163

File tree

1 file changed

+84
-69
lines changed

1 file changed

+84
-69
lines changed

fs/dax.c

Lines changed: 84 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,53 @@ static bool dax_fault_is_synchronous(unsigned long flags,
12551255
&& (iomap->flags & IOMAP_F_DIRTY);
12561256
}
12571257

1258+
/*
1259+
* When handling a synchronous page fault and the inode need a fsync, we can
1260+
* insert the PTE/PMD into page tables only after that fsync happened. Skip
1261+
* insertion for now and return the pfn so that caller can insert it after the
1262+
* fsync is done.
1263+
*/
1264+
static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
1265+
{
1266+
if (WARN_ON_ONCE(!pfnp))
1267+
return VM_FAULT_SIGBUS;
1268+
*pfnp = pfn;
1269+
return VM_FAULT_NEEDDSYNC;
1270+
}
1271+
1272+
static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap,
1273+
loff_t pos)
1274+
{
1275+
sector_t sector = dax_iomap_sector(iomap, pos);
1276+
unsigned long vaddr = vmf->address;
1277+
vm_fault_t ret;
1278+
int error = 0;
1279+
1280+
switch (iomap->type) {
1281+
case IOMAP_HOLE:
1282+
case IOMAP_UNWRITTEN:
1283+
clear_user_highpage(vmf->cow_page, vaddr);
1284+
break;
1285+
case IOMAP_MAPPED:
1286+
error = copy_cow_page_dax(iomap->bdev, iomap->dax_dev, sector,
1287+
vmf->cow_page, vaddr);
1288+
break;
1289+
default:
1290+
WARN_ON_ONCE(1);
1291+
error = -EIO;
1292+
break;
1293+
}
1294+
1295+
if (error)
1296+
return dax_fault_return(error);
1297+
1298+
__SetPageUptodate(vmf->cow_page);
1299+
ret = finish_fault(vmf);
1300+
if (!ret)
1301+
return VM_FAULT_DONE_COW;
1302+
return ret;
1303+
}
1304+
12581305
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
12591306
int *iomap_errp, const struct iomap_ops *ops)
12601307
{
@@ -1323,30 +1370,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
13231370
}
13241371

13251372
if (vmf->cow_page) {
1326-
sector_t sector = dax_iomap_sector(&iomap, pos);
1327-
1328-
switch (iomap.type) {
1329-
case IOMAP_HOLE:
1330-
case IOMAP_UNWRITTEN:
1331-
clear_user_highpage(vmf->cow_page, vaddr);
1332-
break;
1333-
case IOMAP_MAPPED:
1334-
error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev,
1335-
sector, vmf->cow_page, vaddr);
1336-
break;
1337-
default:
1338-
WARN_ON_ONCE(1);
1339-
error = -EIO;
1340-
break;
1341-
}
1342-
1343-
if (error)
1344-
goto error_finish_iomap;
1345-
1346-
__SetPageUptodate(vmf->cow_page);
1347-
ret = finish_fault(vmf);
1348-
if (!ret)
1349-
ret = VM_FAULT_DONE_COW;
1373+
ret = dax_fault_cow_page(vmf, &iomap, pos);
13501374
goto finish_iomap;
13511375
}
13521376

@@ -1366,19 +1390,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
13661390
entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
13671391
0, write && !sync);
13681392

1369-
/*
1370-
* If we are doing synchronous page fault and inode needs fsync,
1371-
* we can insert PTE into page tables only after that happens.
1372-
* Skip insertion for now and return the pfn so that caller can
1373-
* insert it after fsync is done.
1374-
*/
13751393
if (sync) {
1376-
if (WARN_ON_ONCE(!pfnp)) {
1377-
error = -EIO;
1378-
goto error_finish_iomap;
1379-
}
1380-
*pfnp = pfn;
1381-
ret = VM_FAULT_NEEDDSYNC | major;
1394+
ret = dax_fault_synchronous_pfnp(pfnp, pfn);
13821395
goto finish_iomap;
13831396
}
13841397
trace_dax_insert_mapping(inode, vmf, entry);
@@ -1477,13 +1490,45 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
14771490
return VM_FAULT_FALLBACK;
14781491
}
14791492

1493+
static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas,
1494+
pgoff_t max_pgoff)
1495+
{
1496+
unsigned long pmd_addr = vmf->address & PMD_MASK;
1497+
bool write = vmf->flags & FAULT_FLAG_WRITE;
1498+
1499+
/*
1500+
* Make sure that the faulting address's PMD offset (color) matches
1501+
* the PMD offset from the start of the file. This is necessary so
1502+
* that a PMD range in the page table overlaps exactly with a PMD
1503+
* range in the page cache.
1504+
*/
1505+
if ((vmf->pgoff & PG_PMD_COLOUR) !=
1506+
((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
1507+
return true;
1508+
1509+
/* Fall back to PTEs if we're going to COW */
1510+
if (write && !(vmf->vma->vm_flags & VM_SHARED))
1511+
return true;
1512+
1513+
/* If the PMD would extend outside the VMA */
1514+
if (pmd_addr < vmf->vma->vm_start)
1515+
return true;
1516+
if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
1517+
return true;
1518+
1519+
/* If the PMD would extend beyond the file size */
1520+
if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff)
1521+
return true;
1522+
1523+
return false;
1524+
}
1525+
14801526
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
14811527
const struct iomap_ops *ops)
14821528
{
14831529
struct vm_area_struct *vma = vmf->vma;
14841530
struct address_space *mapping = vma->vm_file->f_mapping;
14851531
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
1486-
unsigned long pmd_addr = vmf->address & PMD_MASK;
14871532
bool write = vmf->flags & FAULT_FLAG_WRITE;
14881533
bool sync;
14891534
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
@@ -1506,33 +1551,12 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
15061551

15071552
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
15081553

1509-
/*
1510-
* Make sure that the faulting address's PMD offset (color) matches
1511-
* the PMD offset from the start of the file. This is necessary so
1512-
* that a PMD range in the page table overlaps exactly with a PMD
1513-
* range in the page cache.
1514-
*/
1515-
if ((vmf->pgoff & PG_PMD_COLOUR) !=
1516-
((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
1517-
goto fallback;
1518-
1519-
/* Fall back to PTEs if we're going to COW */
1520-
if (write && !(vma->vm_flags & VM_SHARED))
1521-
goto fallback;
1522-
1523-
/* If the PMD would extend outside the VMA */
1524-
if (pmd_addr < vma->vm_start)
1525-
goto fallback;
1526-
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
1527-
goto fallback;
1528-
15291554
if (xas.xa_index >= max_pgoff) {
15301555
result = VM_FAULT_SIGBUS;
15311556
goto out;
15321557
}
15331558

1534-
/* If the PMD would extend beyond the file size */
1535-
if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
1559+
if (dax_fault_check_fallback(vmf, &xas, max_pgoff))
15361560
goto fallback;
15371561

15381562
/*
@@ -1584,17 +1608,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
15841608
entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
15851609
DAX_PMD, write && !sync);
15861610

1587-
/*
1588-
* If we are doing synchronous page fault and inode needs fsync,
1589-
* we can insert PMD into page tables only after that happens.
1590-
* Skip insertion for now and return the pfn so that caller can
1591-
* insert it after fsync is done.
1592-
*/
15931611
if (sync) {
1594-
if (WARN_ON_ONCE(!pfnp))
1595-
goto finish_iomap;
1596-
*pfnp = pfn;
1597-
result = VM_FAULT_NEEDDSYNC;
1612+
result = dax_fault_synchronous_pfnp(pfnp, pfn);
15981613
goto finish_iomap;
15991614
}
16001615

0 commit comments

Comments
 (0)