Skip to content

Commit ab68f26

Browse files
committed
/dev/dax, pmem: direct access to persistent memory
Device DAX is the device-centric analogue of Filesystem DAX (CONFIG_FS_DAX). It allows memory ranges to be allocated and mapped without need of an intervening file system. Device DAX is strict, precise and predictable. Specifically this interface: 1/ Guarantees fault granularity with respect to a given page size (pte, pmd, or pud) set at configuration time. 2/ Enforces deterministic behavior by being strict about what fault scenarios are supported. For example, by forcing MADV_DONTFORK semantics and omitting MAP_PRIVATE support device-dax guarantees that a mapping always behaves/performs the same once established. It is the "what you see is what you get" access mechanism to differentiated memory vs filesystem DAX which has filesystem specific implementation semantics. Persistent memory is the first target, but the mechanism is also targeted for exclusive allocations of performance differentiated memory ranges. This commit is limited to the base device driver infrastructure to associate a dax device with pmem range. Cc: Jeff Moyer <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Ross Zwisler <[email protected]> Reviewed-by: Johannes Thumshirn <[email protected]> Signed-off-by: Dan Williams <[email protected]>
1 parent 6cf9c5b commit ab68f26

File tree

9 files changed

+478
-0
lines changed

9 files changed

+478
-0
lines changed

drivers/Kconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,8 @@ source "drivers/android/Kconfig"
190190

191191
source "drivers/nvdimm/Kconfig"
192192

193+
source "drivers/dax/Kconfig"
194+
193195
source "drivers/nvmem/Kconfig"
194196

195197
source "drivers/hwtracing/stm/Kconfig"

drivers/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/
6666
obj-$(CONFIG_NVM) += lightnvm/
6767
obj-y += base/ block/ misc/ mfd/ nfc/
6868
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
69+
obj-$(CONFIG_DEV_DAX) += dax/
6970
obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
7071
obj-$(CONFIG_NUBUS) += nubus/
7172
obj-y += macintosh/

drivers/dax/Kconfig

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
menuconfig DEV_DAX
2+
tristate "DAX: direct access to differentiated memory"
3+
default m if NVDIMM_DAX
4+
help
5+
Support raw access to differentiated (persistence, bandwidth,
6+
latency...) memory via an mmap(2) capable character
7+
device. Platform firmware or a device driver may identify a
8+
platform memory resource that is differentiated from the
9+
baseline memory pool. Mappings of a /dev/daxX.Y device impose
10+
restrictions that make the mapping behavior deterministic.
11+
12+
if DEV_DAX
13+
14+
config DEV_DAX_PMEM
15+
tristate "PMEM DAX: direct access to persistent memory"
16+
depends on NVDIMM_DAX
17+
default DEV_DAX
18+
help
19+
Support raw access to persistent memory. Note that this
20+
driver consumes memory ranges allocated and exported by the
21+
libnvdimm sub-system.
22+
23+
Say Y if unsure
24+
25+
endif

drivers/dax/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
obj-$(CONFIG_DEV_DAX) += dax.o
2+
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
3+
4+
dax_pmem-y := pmem.o

drivers/dax/dax.c

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
/*
2+
* Copyright(c) 2016 Intel Corporation. All rights reserved.
3+
*
4+
* This program is free software; you can redistribute it and/or modify
5+
* it under the terms of version 2 of the GNU General Public License as
6+
* published by the Free Software Foundation.
7+
*
8+
* This program is distributed in the hope that it will be useful, but
9+
* WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11+
* General Public License for more details.
12+
*/
13+
#include <linux/pagemap.h>
14+
#include <linux/module.h>
15+
#include <linux/device.h>
16+
#include <linux/pfn_t.h>
17+
#include <linux/slab.h>
18+
#include <linux/dax.h>
19+
#include <linux/fs.h>
20+
#include <linux/mm.h>
21+
22+
static int dax_major;
23+
static struct class *dax_class;
24+
static DEFINE_IDA(dax_minor_ida);
25+
26+
/**
27+
* struct dax_region - mapping infrastructure for dax devices
28+
* @id: kernel-wide unique region for a memory range
29+
* @base: linear address corresponding to @res
30+
* @kref: to pin while other agents have a need to do lookups
31+
* @dev: parent device backing this region
32+
* @align: allocation and mapping alignment for child dax devices
33+
* @res: physical address range of the region
34+
* @pfn_flags: identify whether the pfns are paged back or not
35+
*/
36+
struct dax_region {
37+
int id;
38+
struct ida ida;
39+
void *base;
40+
struct kref kref;
41+
struct device *dev;
42+
unsigned int align;
43+
struct resource res;
44+
unsigned long pfn_flags;
45+
};
46+
47+
/**
48+
* struct dax_dev - subdivision of a dax region
49+
* @region - parent region
50+
* @dev - device backing the character device
51+
* @kref - enable this data to be tracked in filp->private_data
52+
* @id - child id in the region
53+
* @num_resources - number of physical address extents in this device
54+
* @res - array of physical address ranges
55+
*/
56+
struct dax_dev {
57+
struct dax_region *region;
58+
struct device *dev;
59+
struct kref kref;
60+
int id;
61+
int num_resources;
62+
struct resource res[0];
63+
};
64+
65+
static void dax_region_free(struct kref *kref)
66+
{
67+
struct dax_region *dax_region;
68+
69+
dax_region = container_of(kref, struct dax_region, kref);
70+
kfree(dax_region);
71+
}
72+
73+
void dax_region_put(struct dax_region *dax_region)
74+
{
75+
kref_put(&dax_region->kref, dax_region_free);
76+
}
77+
EXPORT_SYMBOL_GPL(dax_region_put);
78+
79+
static void dax_dev_free(struct kref *kref)
80+
{
81+
struct dax_dev *dax_dev;
82+
83+
dax_dev = container_of(kref, struct dax_dev, kref);
84+
dax_region_put(dax_dev->region);
85+
kfree(dax_dev);
86+
}
87+
88+
static void dax_dev_put(struct dax_dev *dax_dev)
89+
{
90+
kref_put(&dax_dev->kref, dax_dev_free);
91+
}
92+
93+
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
94+
struct resource *res, unsigned int align, void *addr,
95+
unsigned long pfn_flags)
96+
{
97+
struct dax_region *dax_region;
98+
99+
dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
100+
101+
if (!dax_region)
102+
return NULL;
103+
104+
memcpy(&dax_region->res, res, sizeof(*res));
105+
dax_region->pfn_flags = pfn_flags;
106+
kref_init(&dax_region->kref);
107+
dax_region->id = region_id;
108+
ida_init(&dax_region->ida);
109+
dax_region->align = align;
110+
dax_region->dev = parent;
111+
dax_region->base = addr;
112+
113+
return dax_region;
114+
}
115+
EXPORT_SYMBOL_GPL(alloc_dax_region);
116+
117+
static ssize_t size_show(struct device *dev,
118+
struct device_attribute *attr, char *buf)
119+
{
120+
struct dax_dev *dax_dev = dev_get_drvdata(dev);
121+
unsigned long long size = 0;
122+
int i;
123+
124+
for (i = 0; i < dax_dev->num_resources; i++)
125+
size += resource_size(&dax_dev->res[i]);
126+
127+
return sprintf(buf, "%llu\n", size);
128+
}
129+
static DEVICE_ATTR_RO(size);
130+
131+
static struct attribute *dax_device_attributes[] = {
132+
&dev_attr_size.attr,
133+
NULL,
134+
};
135+
136+
static const struct attribute_group dax_device_attribute_group = {
137+
.attrs = dax_device_attributes,
138+
};
139+
140+
static const struct attribute_group *dax_attribute_groups[] = {
141+
&dax_device_attribute_group,
142+
NULL,
143+
};
144+
145+
static void unregister_dax_dev(void *_dev)
146+
{
147+
struct device *dev = _dev;
148+
struct dax_dev *dax_dev = dev_get_drvdata(dev);
149+
struct dax_region *dax_region = dax_dev->region;
150+
151+
dev_dbg(dev, "%s\n", __func__);
152+
153+
get_device(dev);
154+
device_unregister(dev);
155+
ida_simple_remove(&dax_region->ida, dax_dev->id);
156+
ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
157+
put_device(dev);
158+
dax_dev_put(dax_dev);
159+
}
160+
161+
int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
162+
int count)
163+
{
164+
struct device *parent = dax_region->dev;
165+
struct dax_dev *dax_dev;
166+
struct device *dev;
167+
int rc, minor;
168+
dev_t dev_t;
169+
170+
dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
171+
if (!dax_dev)
172+
return -ENOMEM;
173+
memcpy(dax_dev->res, res, sizeof(*res) * count);
174+
dax_dev->num_resources = count;
175+
kref_init(&dax_dev->kref);
176+
dax_dev->region = dax_region;
177+
kref_get(&dax_region->kref);
178+
179+
dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
180+
if (dax_dev->id < 0) {
181+
rc = dax_dev->id;
182+
goto err_id;
183+
}
184+
185+
minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
186+
if (minor < 0) {
187+
rc = minor;
188+
goto err_minor;
189+
}
190+
191+
dev_t = MKDEV(dax_major, minor);
192+
dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev,
193+
dax_attribute_groups, "dax%d.%d", dax_region->id,
194+
dax_dev->id);
195+
if (IS_ERR(dev)) {
196+
rc = PTR_ERR(dev);
197+
goto err_create;
198+
}
199+
dax_dev->dev = dev;
200+
201+
rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
202+
if (rc) {
203+
unregister_dax_dev(dev);
204+
return rc;
205+
}
206+
207+
return 0;
208+
209+
err_create:
210+
ida_simple_remove(&dax_minor_ida, minor);
211+
err_minor:
212+
ida_simple_remove(&dax_region->ida, dax_dev->id);
213+
err_id:
214+
dax_dev_put(dax_dev);
215+
216+
return rc;
217+
}
218+
EXPORT_SYMBOL_GPL(devm_create_dax_dev);
219+
220+
static const struct file_operations dax_fops = {
221+
.llseek = noop_llseek,
222+
.owner = THIS_MODULE,
223+
};
224+
225+
static int __init dax_init(void)
226+
{
227+
int rc;
228+
229+
rc = register_chrdev(0, "dax", &dax_fops);
230+
if (rc < 0)
231+
return rc;
232+
dax_major = rc;
233+
234+
dax_class = class_create(THIS_MODULE, "dax");
235+
if (IS_ERR(dax_class)) {
236+
unregister_chrdev(dax_major, "dax");
237+
return PTR_ERR(dax_class);
238+
}
239+
240+
return 0;
241+
}
242+
243+
static void __exit dax_exit(void)
244+
{
245+
class_destroy(dax_class);
246+
unregister_chrdev(dax_major, "dax");
247+
ida_destroy(&dax_minor_ida);
248+
}
249+
250+
MODULE_AUTHOR("Intel Corporation");
251+
MODULE_LICENSE("GPL v2");
252+
subsys_initcall(dax_init);
253+
module_exit(dax_exit);

drivers/dax/dax.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright(c) 2016 Intel Corporation. All rights reserved.
3+
*
4+
* This program is free software; you can redistribute it and/or modify
5+
* it under the terms of version 2 of the GNU General Public License as
6+
* published by the Free Software Foundation.
7+
*
8+
* This program is distributed in the hope that it will be useful, but
9+
* WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11+
* General Public License for more details.
12+
*/
13+
#ifndef __DAX_H__
14+
#define __DAX_H__
15+
struct device;
16+
struct resource;
17+
struct dax_region;
18+
void dax_region_put(struct dax_region *dax_region);
19+
struct dax_region *alloc_dax_region(struct device *parent,
20+
int region_id, struct resource *res, unsigned int align,
21+
void *addr, unsigned long flags);
22+
int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
23+
int count);
24+
#endif /* __DAX_H__ */

0 commit comments

Comments
 (0)