Skip to content

Commit 29a8ea4

Browse files
committed
Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm fixes from Dan Williams: "1/ Fixes to the libnvdimm 'pfn' device that establishes a reserved area for storing a struct page array. 2/ Fixes for dax operations on a raw block device to prevent pagecache collisions with dax mappings. 3/ A fix for pfn_t usage in vm_insert_mixed that lead to a null pointer de-reference. These have received build success notification from the kbuild robot across 153 configs and pass the latest ndctl tests" * 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: phys_to_pfn_t: use phys_addr_t mm: fix pfn_t to page conversion in vm_insert_mixed block: use DAX for partition table reads block: revert runtime dax control of the raw block device fs, block: force direct-I/O for dax-enabled block devices devm_memremap_pages: fix vmem_altmap lifetime + alignment handling libnvdimm, pfn: fix restoring memmap location libnvdimm: fix mode determination for e820 devices
2 parents 36f90b0 + 76e9f0e commit 29a8ea4

File tree

13 files changed

+75
-93
lines changed

13 files changed

+75
-93
lines changed

block/ioctl.c

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
434434

435435
return true;
436436
}
437-
438-
static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
439-
{
440-
unsigned long arg;
441-
int rc = 0;
442-
443-
if (!capable(CAP_SYS_ADMIN))
444-
return -EACCES;
445-
446-
if (get_user(arg, (int __user *)(argp)))
447-
return -EFAULT;
448-
arg = !!arg;
449-
if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
450-
return 0;
451-
452-
if (arg)
453-
arg = S_DAX;
454-
455-
if (arg && !blkdev_dax_capable(bdev))
456-
return -ENOTTY;
457-
458-
inode_lock(bdev->bd_inode);
459-
if (bdev->bd_map_count == 0)
460-
inode_set_flags(bdev->bd_inode, arg, S_DAX);
461-
else
462-
rc = -EBUSY;
463-
inode_unlock(bdev->bd_inode);
464-
return rc;
465-
}
466-
#else
467-
static int blkdev_daxset(struct block_device *bdev, int arg)
468-
{
469-
if (arg)
470-
return -ENOTTY;
471-
return 0;
472-
}
473437
#endif
474438

475439
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
@@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
634598
case BLKTRACESETUP:
635599
case BLKTRACETEARDOWN:
636600
return blk_trace_ioctl(bdev, cmd, argp);
637-
case BLKDAXSET:
638-
return blkdev_daxset(bdev, arg);
639601
case BLKDAXGET:
640602
return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
641603
break;

block/partition-generic.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/kmod.h>
1717
#include <linux/ctype.h>
1818
#include <linux/genhd.h>
19+
#include <linux/dax.h>
1920
#include <linux/blktrace_api.h>
2021

2122
#include "partitions/check.h"
@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
550551
return 0;
551552
}
552553

553-
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
554+
static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
554555
{
555556
struct address_space *mapping = bdev->bd_inode->i_mapping;
557+
558+
return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
559+
NULL);
560+
}
561+
562+
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
563+
{
556564
struct page *page;
557565

558-
page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
559-
NULL);
566+
/* don't populate page cache for dax capable devices */
567+
if (IS_DAX(bdev->bd_inode))
568+
page = read_dax_sector(bdev, n);
569+
else
570+
page = read_pagecache_sector(bdev, n);
571+
560572
if (!IS_ERR(page)) {
561573
if (PageError(page))
562574
goto fail;

drivers/nvdimm/namespace_devs.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,10 +1277,12 @@ static ssize_t mode_show(struct device *dev,
12771277

12781278
device_lock(dev);
12791279
claim = ndns->claim;
1280-
if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim)))
1281-
mode = "memory";
1282-
else if (claim && is_nd_btt(claim))
1280+
if (claim && is_nd_btt(claim))
12831281
mode = "safe";
1282+
else if (claim && is_nd_pfn(claim))
1283+
mode = "memory";
1284+
else if (!claim && pmem_should_map_pages(dev))
1285+
mode = "memory";
12841286
else
12851287
mode = "raw";
12861288
rc = sprintf(buf, "%s\n", mode);

drivers/nvdimm/pfn_devs.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -301,10 +301,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
301301

302302
switch (le32_to_cpu(pfn_sb->mode)) {
303303
case PFN_MODE_RAM:
304-
break;
305304
case PFN_MODE_PMEM:
306-
/* TODO: allocate from PMEM support */
307-
return -ENOTTY;
305+
break;
308306
default:
309307
return -ENXIO;
310308
}

fs/block_dev.c

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,56 +1736,28 @@ static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
17361736
return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
17371737
}
17381738

1739-
static void blkdev_vm_open(struct vm_area_struct *vma)
1740-
{
1741-
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
1742-
struct block_device *bdev = I_BDEV(bd_inode);
1743-
1744-
inode_lock(bd_inode);
1745-
bdev->bd_map_count++;
1746-
inode_unlock(bd_inode);
1747-
}
1748-
1749-
static void blkdev_vm_close(struct vm_area_struct *vma)
1750-
{
1751-
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
1752-
struct block_device *bdev = I_BDEV(bd_inode);
1753-
1754-
inode_lock(bd_inode);
1755-
bdev->bd_map_count--;
1756-
inode_unlock(bd_inode);
1757-
}
1758-
17591739
static const struct vm_operations_struct blkdev_dax_vm_ops = {
1760-
.open = blkdev_vm_open,
1761-
.close = blkdev_vm_close,
17621740
.fault = blkdev_dax_fault,
17631741
.pmd_fault = blkdev_dax_pmd_fault,
17641742
.pfn_mkwrite = blkdev_dax_fault,
17651743
};
17661744

17671745
static const struct vm_operations_struct blkdev_default_vm_ops = {
1768-
.open = blkdev_vm_open,
1769-
.close = blkdev_vm_close,
17701746
.fault = filemap_fault,
17711747
.map_pages = filemap_map_pages,
17721748
};
17731749

17741750
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
17751751
{
17761752
struct inode *bd_inode = bdev_file_inode(file);
1777-
struct block_device *bdev = I_BDEV(bd_inode);
17781753

17791754
file_accessed(file);
1780-
inode_lock(bd_inode);
1781-
bdev->bd_map_count++;
17821755
if (IS_DAX(bd_inode)) {
17831756
vma->vm_ops = &blkdev_dax_vm_ops;
17841757
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
17851758
} else {
17861759
vma->vm_ops = &blkdev_default_vm_ops;
17871760
}
1788-
inode_unlock(bd_inode);
17891761

17901762
return 0;
17911763
}

fs/dax.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
5858
blk_queue_exit(bdev->bd_queue);
5959
}
6060

61+
struct page *read_dax_sector(struct block_device *bdev, sector_t n)
62+
{
63+
struct page *page = alloc_pages(GFP_KERNEL, 0);
64+
struct blk_dax_ctl dax = {
65+
.size = PAGE_SIZE,
66+
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
67+
};
68+
long rc;
69+
70+
if (!page)
71+
return ERR_PTR(-ENOMEM);
72+
73+
rc = dax_map_atomic(bdev, &dax);
74+
if (rc < 0)
75+
return ERR_PTR(rc);
76+
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
77+
dax_unmap_atomic(bdev, &dax);
78+
return page;
79+
}
80+
6181
/*
6282
* dax_clear_blocks() is called from within transaction context from XFS,
6383
* and hence this means the stack from this point must follow GFP_NOFS

include/linux/dax.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
1414
dax_iodone_t);
1515
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
1616
dax_iodone_t);
17+
18+
#ifdef CONFIG_FS_DAX
19+
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
20+
#else
21+
static inline struct page *read_dax_sector(struct block_device *bdev,
22+
sector_t n)
23+
{
24+
return ERR_PTR(-ENXIO);
25+
}
26+
#endif
27+
1728
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1829
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
1930
unsigned int flags, get_block_t, dax_iodone_t);

include/linux/fs.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,6 @@ struct block_device {
484484
int bd_fsfreeze_count;
485485
/* Mutex for freeze */
486486
struct mutex bd_fsfreeze_mutex;
487-
#ifdef CONFIG_FS_DAX
488-
int bd_map_count;
489-
#endif
490487
};
491488

492489
/*
@@ -2907,7 +2904,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
29072904

29082905
static inline bool io_is_direct(struct file *filp)
29092906
{
2910-
return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
2907+
return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
29112908
}
29122909

29132910
static inline int iocb_flags(struct file *file)

include/linux/pfn_t.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
2929
return __pfn_to_pfn_t(pfn, 0);
3030
}
3131

32-
extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
32+
extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
3333

3434
static inline bool pfn_t_has_page(pfn_t pfn)
3535
{
@@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
4848
return NULL;
4949
}
5050

51-
static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
51+
static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
5252
{
5353
return PFN_PHYS(pfn_t_to_pfn(pfn));
5454
}

include/uapi/linux/fs.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,6 @@ struct fsxattr {
222222
#define BLKSECDISCARD _IO(0x12,125)
223223
#define BLKROTATIONAL _IO(0x12,126)
224224
#define BLKZEROOUT _IO(0x12,127)
225-
#define BLKDAXSET _IO(0x12,128)
226225
#define BLKDAXGET _IO(0x12,129)
227226

228227
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */

kernel/memremap.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr)
150150
}
151151
EXPORT_SYMBOL(devm_memunmap);
152152

153-
pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
153+
pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
154154
{
155155
return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
156156
}
@@ -183,7 +183,11 @@ EXPORT_SYMBOL(put_zone_device_page);
183183

184184
static void pgmap_radix_release(struct resource *res)
185185
{
186-
resource_size_t key;
186+
resource_size_t key, align_start, align_size, align_end;
187+
188+
align_start = res->start & ~(SECTION_SIZE - 1);
189+
align_size = ALIGN(resource_size(res), SECTION_SIZE);
190+
align_end = align_start + align_size - 1;
187191

188192
mutex_lock(&pgmap_lock);
189193
for (key = res->start; key <= res->end; key += SECTION_SIZE)
@@ -226,12 +230,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
226230
percpu_ref_put(pgmap->ref);
227231
}
228232

229-
pgmap_radix_release(res);
230-
231233
/* pages are dead and unused, undo the arch mapping */
232234
align_start = res->start & ~(SECTION_SIZE - 1);
233235
align_size = ALIGN(resource_size(res), SECTION_SIZE);
234236
arch_remove_memory(align_start, align_size);
237+
pgmap_radix_release(res);
235238
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
236239
"%s: failed to free all reserved pages\n", __func__);
237240
}
@@ -267,7 +270,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
267270
{
268271
int is_ram = region_intersects(res->start, resource_size(res),
269272
"System RAM");
270-
resource_size_t key, align_start, align_size;
273+
resource_size_t key, align_start, align_size, align_end;
271274
struct dev_pagemap *pgmap;
272275
struct page_map *page_map;
273276
unsigned long pfn;
@@ -309,7 +312,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
309312

310313
mutex_lock(&pgmap_lock);
311314
error = 0;
312-
for (key = res->start; key <= res->end; key += SECTION_SIZE) {
315+
align_start = res->start & ~(SECTION_SIZE - 1);
316+
align_size = ALIGN(resource_size(res), SECTION_SIZE);
317+
align_end = align_start + align_size - 1;
318+
for (key = align_start; key <= align_end; key += SECTION_SIZE) {
313319
struct dev_pagemap *dup;
314320

315321
rcu_read_lock();
@@ -336,8 +342,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
336342
if (nid < 0)
337343
nid = numa_mem_id();
338344

339-
align_start = res->start & ~(SECTION_SIZE - 1);
340-
align_size = ALIGN(resource_size(res), SECTION_SIZE);
341345
error = arch_add_memory(nid, align_start, align_size, true);
342346
if (error)
343347
goto err_add_memory;

mm/memory.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,10 +1591,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
15911591
* than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP
15921592
* without pte special, it would there be refcounted as a normal page.
15931593
*/
1594-
if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
1594+
if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
15951595
struct page *page;
15961596

1597-
page = pfn_t_to_page(pfn);
1597+
/*
1598+
* At this point we are committed to insert_page()
1599+
* regardless of whether the caller specified flags that
1600+
* result in pfn_t_has_page() == false.
1601+
*/
1602+
page = pfn_to_page(pfn_t_to_pfn(pfn));
15981603
return insert_page(vma, addr, page, vma->vm_page_prot);
15991604
}
16001605
return insert_pfn(vma, addr, pfn, vma->vm_page_prot);

tools/testing/nvdimm/test/iomap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
113113
}
114114
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
115115

116-
pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
116+
pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
117117
{
118118
struct nfit_test_resource *nfit_res = get_nfit_res(addr);
119119

0 commit comments

Comments
 (0)