Skip to content

Commit d1a5f2b

Browse files
committed
block: use DAX for partition table reads
Avoid populating pagecache when the block device is in DAX mode. Otherwise these page cache entries collide with the fsync/msync implementation and break data durability guarantees. Cc: Jan Kara <[email protected]> Cc: Jeff Moyer <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Dave Chinner <[email protected]> Cc: Andrew Morton <[email protected]> Reported-by: Ross Zwisler <[email protected]> Tested-by: Ross Zwisler <[email protected]> Reviewed-by: Matthew Wilcox <[email protected]> Signed-off-by: Dan Williams <[email protected]>
1 parent 9f4736f commit d1a5f2b

File tree

3 files changed

+46
-3
lines changed

3 files changed

+46
-3
lines changed

block/partition-generic.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/kmod.h>
1717
#include <linux/ctype.h>
1818
#include <linux/genhd.h>
19+
#include <linux/dax.h>
1920
#include <linux/blktrace_api.h>
2021

2122
#include "partitions/check.h"
@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
550551
return 0;
551552
}
552553

553-
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
554+
static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
554555
{
555556
struct address_space *mapping = bdev->bd_inode->i_mapping;
557+
558+
return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
559+
NULL);
560+
}
561+
562+
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
563+
{
556564
struct page *page;
557565

558-
page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
559-
NULL);
566+
/* don't populate page cache for dax capable devices */
567+
if (IS_DAX(bdev->bd_inode))
568+
page = read_dax_sector(bdev, n);
569+
else
570+
page = read_pagecache_sector(bdev, n);
571+
560572
if (!IS_ERR(page)) {
561573
if (PageError(page))
562574
goto fail;

fs/dax.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
5858
blk_queue_exit(bdev->bd_queue);
5959
}
6060

61+
struct page *read_dax_sector(struct block_device *bdev, sector_t n)
62+
{
63+
struct page *page = alloc_pages(GFP_KERNEL, 0);
64+
struct blk_dax_ctl dax = {
65+
.size = PAGE_SIZE,
66+
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
67+
};
68+
long rc;
69+
70+
if (!page)
71+
return ERR_PTR(-ENOMEM);
72+
73+
rc = dax_map_atomic(bdev, &dax);
74+
if (rc < 0)
75+
return ERR_PTR(rc);
76+
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
77+
dax_unmap_atomic(bdev, &dax);
78+
return page;
79+
}
80+
6181
/*
6282
* dax_clear_blocks() is called from within transaction context from XFS,
6383
* and hence this means the stack from this point must follow GFP_NOFS

include/linux/dax.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
1414
dax_iodone_t);
1515
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
1616
dax_iodone_t);
17+
18+
#ifdef CONFIG_FS_DAX
19+
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
20+
#else
21+
static inline struct page *read_dax_sector(struct block_device *bdev,
22+
sector_t n)
23+
{
24+
return ERR_PTR(-ENXIO);
25+
}
26+
#endif
27+
1728
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1829
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
1930
unsigned int flags, get_block_t, dax_iodone_t);

0 commit comments

Comments
 (0)