Skip to content

Commit 779dd20

Browse files
Ben Widawskydjbw
authored andcommitted
cxl/region: Add region creation support
CXL 2.0 allows for dynamic provisioning of new memory regions (system physical address resources like "System RAM" and "Persistent Memory"). Whereas DDR and PMEM resources are conveyed statically at boot, CXL allows for assembling and instantiating new regions from the available capacity of CXL memory expanders in the system. Sysfs with an "echo $region_name > $create_region_attribute" interface is chosen as the mechanism to initiate the provisioning process. This was chosen over ioctl() and netlink() to keep the configuration interface entirely in a pseudo-fs interface, and it was chosen over configfs since, aside from this one creation event, the interface is read-mostly. I.e. configfs supports cases where an object is designed to be provisioned each boot, like an iSCSI storage target, and CXL region creation is mostly for PMEM regions which are created usually once per-lifetime of a server instance. This is an improvement over nvdimm that pre-created "seed" devices that tended to confuse users looking to determine which devices are active and which are idle. Recall that the major change that CXL brings over previous persistent memory architectures is the ability to dynamically define new regions. Compare that to drivers like 'nfit' where the region configuration is statically defined by platform firmware. Regions are created as a child of a root decoder that encompasses an address space with constraints. When created through sysfs, the root decoder is explicit. When created from an LSA's region structure a root decoder will possibly need to be inferred by the driver. Upon region creation through sysfs, a vacant region is created with a unique name. Regions have a number of attributes that must be configured before the region can be bound to the driver where HDM decoder program is completed. An example of creating a new region: - Allocate a new region name: region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region) - Create a new region by name: while region=$(cat /sys/bus/cxl/devices/decoder0.0/create_pmem_region) ! echo $region > /sys/bus/cxl/devices/decoder0.0/create_pmem_region do true; done - Region now exists in sysfs: stat -t /sys/bus/cxl/devices/decoder0.0/$region - Delete the region, and name: echo $region > /sys/bus/cxl/devices/decoder0.0/delete_region Signed-off-by: Ben Widawsky <[email protected]> Reviewed-by: Jonathan Cameron <[email protected]> Link: https://lore.kernel.org/r/165784333909.1758207.794374602146306032.stgit@dwillia2-xfh.jf.intel.com [djbw: simplify locking, reword changelog] Signed-off-by: Dan Williams <[email protected]>
1 parent 14b8058 commit 779dd20

File tree

9 files changed

+311
-0
lines changed

9 files changed

+311
-0
lines changed

Documentation/ABI/testing/sysfs-bus-cxl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,3 +268,28 @@ Description:
268268
to the next target in the interleave at address N +
269269
interleave_granularity (assuming N is aligned to
270270
interleave_granularity).
271+
272+
273+
What: /sys/bus/cxl/devices/decoderX.Y/create_pmem_region
274+
Date: May, 2022
275+
KernelVersion: v5.20
276+
277+
Description:
278+
(RW) Write a string in the form 'regionZ' to start the process
279+
of defining a new persistent memory region (interleave-set)
280+
within the decode range bounded by root decoder 'decoderX.Y'.
281+
The value written must match the current value returned from
282+
reading this attribute. An atomic compare exchange operation is
283+
done on write to assign the requested id to a region and
284+
allocate the region-id for the next creation attempt. EBUSY is
285+
returned if the region name written does not match the current
286+
cached value.
287+
288+
289+
What: /sys/bus/cxl/devices/decoderX.Y/delete_region
290+
Date: May, 2022
291+
KernelVersion: v5.20
292+
293+
Description:
294+
(WO) Write a string in the form 'regionZ' to delete that region,
295+
provided it is currently idle / not bound to a driver.

Documentation/driver-api/cxl/memory-devices.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,17 @@ CXL Core
362362
.. kernel-doc:: drivers/cxl/core/mbox.c
363363
:doc: cxl mbox
364364

365+
CXL Regions
366+
-----------
367+
.. kernel-doc:: drivers/cxl/region.h
368+
:identifiers:
369+
370+
.. kernel-doc:: drivers/cxl/core/region.c
371+
:doc: cxl core region
372+
373+
.. kernel-doc:: drivers/cxl/core/region.c
374+
:identifiers:
375+
365376
External Interfaces
366377
===================
367378

drivers/cxl/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,9 @@ config CXL_SUSPEND
103103
def_bool y
104104
depends on SUSPEND && CXL_MEM
105105

106+
config CXL_REGION
107+
bool
108+
default CXL_BUS
109+
select MEMREGION
110+
106111
endif

drivers/cxl/core/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ cxl_core-y += memdev.o
1010
cxl_core-y += mbox.o
1111
cxl_core-y += pci.o
1212
cxl_core-y += hdm.o
13+
cxl_core-$(CONFIG_CXL_REGION) += region.o

drivers/cxl/core/core.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@ extern const struct device_type cxl_nvdimm_type;
99

1010
extern struct attribute_group cxl_base_attribute_group;
1111

12+
#ifdef CONFIG_CXL_REGION
13+
extern struct device_attribute dev_attr_create_pmem_region;
14+
extern struct device_attribute dev_attr_delete_region;
15+
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
16+
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
17+
#else
18+
#define CXL_REGION_ATTR(x) NULL
19+
#define SET_CXL_REGION_ATTR(x)
20+
#endif
21+
1222
struct cxl_send_command;
1323
struct cxl_mem_query_commands;
1424
int cxl_query_cmd(struct cxl_memdev *cxlmd,

drivers/cxl/core/port.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0-only
22
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
33
#include <linux/io-64-nonatomic-lo-hi.h>
4+
#include <linux/memregion.h>
45
#include <linux/workqueue.h>
56
#include <linux/debugfs.h>
67
#include <linux/device.h>
@@ -300,11 +301,35 @@ static struct attribute *cxl_decoder_root_attrs[] = {
300301
&dev_attr_cap_type2.attr,
301302
&dev_attr_cap_type3.attr,
302303
&dev_attr_target_list.attr,
304+
SET_CXL_REGION_ATTR(create_pmem_region)
305+
SET_CXL_REGION_ATTR(delete_region)
303306
NULL,
304307
};
305308

309+
static bool can_create_pmem(struct cxl_root_decoder *cxlrd)
310+
{
311+
unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_PMEM;
312+
313+
return (cxlrd->cxlsd.cxld.flags & flags) == flags;
314+
}
315+
316+
static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n)
317+
{
318+
struct device *dev = kobj_to_dev(kobj);
319+
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
320+
321+
if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd))
322+
return 0;
323+
324+
if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd))
325+
return 0;
326+
327+
return a->mode;
328+
}
329+
306330
static struct attribute_group cxl_decoder_root_attribute_group = {
307331
.attrs = cxl_decoder_root_attrs,
332+
.is_visible = cxl_root_decoder_visible,
308333
};
309334

310335
static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
@@ -387,6 +412,8 @@ static void cxl_root_decoder_release(struct device *dev)
387412
{
388413
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
389414

415+
if (atomic_read(&cxlrd->region_id) >= 0)
416+
memregion_free(atomic_read(&cxlrd->region_id));
390417
__cxl_decoder_release(&cxlrd->cxlsd.cxld);
391418
kfree(cxlrd);
392419
}
@@ -1484,6 +1511,18 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
14841511

14851512
cxld = &cxlsd->cxld;
14861513
cxld->dev.type = &cxl_decoder_root_type;
1514+
/*
1515+
* cxl_root_decoder_release() special cases negative ids to
1516+
* detect memregion_alloc() failures.
1517+
*/
1518+
atomic_set(&cxlrd->region_id, -1);
1519+
rc = memregion_alloc(GFP_KERNEL);
1520+
if (rc < 0) {
1521+
put_device(&cxld->dev);
1522+
return ERR_PTR(rc);
1523+
}
1524+
1525+
atomic_set(&cxlrd->region_id, rc);
14871526
return cxlrd;
14881527
}
14891528
EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL);

drivers/cxl/core/region.c

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3+
#include <linux/memregion.h>
4+
#include <linux/genalloc.h>
5+
#include <linux/device.h>
6+
#include <linux/module.h>
7+
#include <linux/slab.h>
8+
#include <linux/idr.h>
9+
#include <cxl.h>
10+
#include "core.h"
11+
12+
/**
13+
* DOC: cxl core region
14+
*
15+
* CXL Regions represent mapped memory capacity in system physical address
16+
* space. Whereas the CXL Root Decoders identify the bounds of potential CXL
17+
* Memory ranges, Regions represent the active mapped capacity by the HDM
18+
* Decoder Capability structures throughout the Host Bridges, Switches, and
19+
* Endpoints in the topology.
20+
*/
21+
22+
static struct cxl_region *to_cxl_region(struct device *dev);
23+
24+
static void cxl_region_release(struct device *dev)
25+
{
26+
struct cxl_region *cxlr = to_cxl_region(dev);
27+
28+
memregion_free(cxlr->id);
29+
kfree(cxlr);
30+
}
31+
32+
static const struct device_type cxl_region_type = {
33+
.name = "cxl_region",
34+
.release = cxl_region_release,
35+
};
36+
37+
bool is_cxl_region(struct device *dev)
38+
{
39+
return dev->type == &cxl_region_type;
40+
}
41+
EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
42+
43+
static struct cxl_region *to_cxl_region(struct device *dev)
44+
{
45+
if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
46+
"not a cxl_region device\n"))
47+
return NULL;
48+
49+
return container_of(dev, struct cxl_region, dev);
50+
}
51+
52+
static void unregister_region(void *dev)
53+
{
54+
device_unregister(dev);
55+
}
56+
57+
static struct lock_class_key cxl_region_key;
58+
59+
static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
60+
{
61+
struct cxl_region *cxlr;
62+
struct device *dev;
63+
64+
cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
65+
if (!cxlr) {
66+
memregion_free(id);
67+
return ERR_PTR(-ENOMEM);
68+
}
69+
70+
dev = &cxlr->dev;
71+
device_initialize(dev);
72+
lockdep_set_class(&dev->mutex, &cxl_region_key);
73+
dev->parent = &cxlrd->cxlsd.cxld.dev;
74+
device_set_pm_not_required(dev);
75+
dev->bus = &cxl_bus_type;
76+
dev->type = &cxl_region_type;
77+
cxlr->id = id;
78+
79+
return cxlr;
80+
}
81+
82+
/**
83+
* devm_cxl_add_region - Adds a region to a decoder
84+
* @cxlrd: root decoder
85+
* @id: memregion id to create, or memregion_free() on failure
86+
* @mode: mode for the endpoint decoders of this region
87+
* @type: select whether this is an expander or accelerator (type-2 or type-3)
88+
*
89+
* This is the second step of region initialization. Regions exist within an
90+
* address space which is mapped by a @cxlrd.
91+
*
92+
* Return: 0 if the region was added to the @cxlrd, else returns negative error
93+
* code. The region will be named "regionZ" where Z is the unique region number.
94+
*/
95+
static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
96+
int id,
97+
enum cxl_decoder_mode mode,
98+
enum cxl_decoder_type type)
99+
{
100+
struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
101+
struct cxl_region *cxlr;
102+
struct device *dev;
103+
int rc;
104+
105+
cxlr = cxl_region_alloc(cxlrd, id);
106+
if (IS_ERR(cxlr))
107+
return cxlr;
108+
cxlr->mode = mode;
109+
cxlr->type = type;
110+
111+
dev = &cxlr->dev;
112+
rc = dev_set_name(dev, "region%d", id);
113+
if (rc)
114+
goto err;
115+
116+
rc = device_add(dev);
117+
if (rc)
118+
goto err;
119+
120+
rc = devm_add_action_or_reset(port->uport, unregister_region, cxlr);
121+
if (rc)
122+
return ERR_PTR(rc);
123+
124+
dev_dbg(port->uport, "%s: created %s\n",
125+
dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
126+
return cxlr;
127+
128+
err:
129+
put_device(dev);
130+
return ERR_PTR(rc);
131+
}
132+
133+
static ssize_t create_pmem_region_show(struct device *dev,
134+
struct device_attribute *attr, char *buf)
135+
{
136+
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
137+
138+
return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
139+
}
140+
141+
static ssize_t create_pmem_region_store(struct device *dev,
142+
struct device_attribute *attr,
143+
const char *buf, size_t len)
144+
{
145+
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
146+
struct cxl_region *cxlr;
147+
int id, rc;
148+
149+
rc = sscanf(buf, "region%d\n", &id);
150+
if (rc != 1)
151+
return -EINVAL;
152+
153+
rc = memregion_alloc(GFP_KERNEL);
154+
if (rc < 0)
155+
return rc;
156+
157+
if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
158+
memregion_free(rc);
159+
return -EBUSY;
160+
}
161+
162+
cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM,
163+
CXL_DECODER_EXPANDER);
164+
if (IS_ERR(cxlr))
165+
return PTR_ERR(cxlr);
166+
167+
return len;
168+
}
169+
DEVICE_ATTR_RW(create_pmem_region);
170+
171+
static struct cxl_region *
172+
cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
173+
{
174+
struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
175+
struct device *region_dev;
176+
177+
region_dev = device_find_child_by_name(&cxld->dev, name);
178+
if (!region_dev)
179+
return ERR_PTR(-ENODEV);
180+
181+
return to_cxl_region(region_dev);
182+
}
183+
184+
static ssize_t delete_region_store(struct device *dev,
185+
struct device_attribute *attr,
186+
const char *buf, size_t len)
187+
{
188+
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
189+
struct cxl_port *port = to_cxl_port(dev->parent);
190+
struct cxl_region *cxlr;
191+
192+
cxlr = cxl_find_region_by_name(cxlrd, buf);
193+
if (IS_ERR(cxlr))
194+
return PTR_ERR(cxlr);
195+
196+
devm_release_action(port->uport, unregister_region, cxlr);
197+
put_device(&cxlr->dev);
198+
199+
return len;
200+
}
201+
DEVICE_ATTR_WO(delete_region);

drivers/cxl/cxl.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,13 +286,29 @@ struct cxl_switch_decoder {
286286
/**
287287
* struct cxl_root_decoder - Static platform CXL address decoder
288288
* @res: host / parent resource for region allocations
289+
* @region_id: region id for next region provisioning event
289290
* @cxlsd: base cxl switch decoder
290291
*/
291292
struct cxl_root_decoder {
292293
struct resource *res;
294+
atomic_t region_id;
293295
struct cxl_switch_decoder cxlsd;
294296
};
295297

298+
/**
299+
* struct cxl_region - CXL region
300+
* @dev: This region's device
301+
* @id: This region's id. Id is globally unique across all regions
302+
* @mode: Endpoint decoder allocation / access mode
303+
* @type: Endpoint decoder target type
304+
*/
305+
struct cxl_region {
306+
struct device dev;
307+
int id;
308+
enum cxl_decoder_mode mode;
309+
enum cxl_decoder_type type;
310+
};
311+
296312
/**
297313
* enum cxl_nvdimm_brige_state - state machine for managing bus rescans
298314
* @CXL_NVB_NEW: Set at bridge create and after cxl_pmem_wq is destroyed
@@ -447,6 +463,8 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port);
447463
int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm);
448464
int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
449465

466+
bool is_cxl_region(struct device *dev);
467+
450468
extern struct bus_type cxl_bus_type;
451469

452470
struct cxl_driver {

tools/testing/cxl/Kbuild

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o
4747
cxl_core-y += $(CXL_CORE_SRC)/mbox.o
4848
cxl_core-y += $(CXL_CORE_SRC)/pci.o
4949
cxl_core-y += $(CXL_CORE_SRC)/hdm.o
50+
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
5051
cxl_core-y += config_check.o
5152

5253
obj-m += test/

0 commit comments

Comments
 (0)