Skip to content

Commit 2522afb

Browse files
committed
libnvdimm/region: Introduce an 'align' attribute
The align attribute applies an alignment constraint for namespace creation in a region. Whereas the 'align' attribute of a namespace applied alignment padding via an info block, the 'align' attribute applies alignment constraints to the free space allocation. The default for 'align' is the maximum known memremap_compat_align() across all archs (16MiB from PowerPC at time of writing) multiplied by the number of interleave ways if there is blk-aliasing. The minimum is PAGE_SIZE and allows for the creation of cross-arch incompatible namespaces, just as previous kernels allowed, but the expectation is cross-arch and mode-independent compatibility by default. The regression risk with this change is limited to cases that were dependent on the ability to create unaligned namespaces, *and* for some reason are unable to opt-out of aligned namespaces by writing to 'regionX/align'. If such a scenario arises the default can be flipped from opt-out to opt-in of compat-aligned namespace creation, but that is a last resort. The kernel will otherwise continue to support existing defined misaligned namespaces. Unfortunately this change needs to touch several parts of the implementation at once: - region/available_size: expand busy extents to current align - region/max_available_extent: expand busy extents to current align - namespace/size: trim free space to current align ...to keep the free space accounting conforming to the dynamic align setting. Reported-by: Aneesh Kumar K.V <[email protected]> Reported-by: Jeff Moyer <[email protected]> Reviewed-by: Aneesh Kumar K.V <[email protected]> Reviewed-by: Jeff Moyer <[email protected]> Link: https://lore.kernel.org/r/158041478371.3889308.14542630147672668068.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <[email protected]>
1 parent a0e3745 commit 2522afb

File tree

4 files changed

+192
-26
lines changed

4 files changed

+192
-26
lines changed

drivers/nvdimm/dimm_devs.c

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,21 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm)
563563
return rc;
564564
}
565565

566+
static unsigned long dpa_align(struct nd_region *nd_region)
567+
{
568+
struct device *dev = &nd_region->dev;
569+
570+
if (dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev),
571+
"bus lock required for capacity provision\n"))
572+
return 0;
573+
if (dev_WARN_ONCE(dev, !nd_region->ndr_mappings || nd_region->align
574+
% nd_region->ndr_mappings,
575+
"invalid region align %#lx mappings: %d\n",
576+
nd_region->align, nd_region->ndr_mappings))
577+
return 0;
578+
return nd_region->align / nd_region->ndr_mappings;
579+
}
580+
566581
int alias_dpa_busy(struct device *dev, void *data)
567582
{
568583
resource_size_t map_end, blk_start, new;
@@ -571,6 +586,7 @@ int alias_dpa_busy(struct device *dev, void *data)
571586
struct nd_region *nd_region;
572587
struct nvdimm_drvdata *ndd;
573588
struct resource *res;
589+
unsigned long align;
574590
int i;
575591

576592
if (!is_memory(dev))
@@ -608,13 +624,21 @@ int alias_dpa_busy(struct device *dev, void *data)
608624
* Find the free dpa from the end of the last pmem allocation to
609625
* the end of the interleave-set mapping.
610626
*/
627+
align = dpa_align(nd_region);
628+
if (!align)
629+
return 0;
630+
611631
for_each_dpa_resource(ndd, res) {
632+
resource_size_t start, end;
633+
612634
if (strncmp(res->name, "pmem", 4) != 0)
613635
continue;
614-
if ((res->start >= blk_start && res->start < map_end)
615-
|| (res->end >= blk_start
616-
&& res->end <= map_end)) {
617-
new = max(blk_start, min(map_end + 1, res->end + 1));
636+
637+
start = ALIGN_DOWN(res->start, align);
638+
end = ALIGN(res->end + 1, align) - 1;
639+
if ((start >= blk_start && start < map_end)
640+
|| (end >= blk_start && end <= map_end)) {
641+
new = max(blk_start, min(map_end, end) + 1);
618642
if (new != blk_start) {
619643
blk_start = new;
620644
goto retry;
@@ -654,17 +678,28 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
654678
.res = NULL,
655679
};
656680
struct resource *res;
681+
unsigned long align;
657682

658683
if (!ndd)
659684
return 0;
660685

661686
device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
662687

663688
/* now account for busy blk allocations in unaliased dpa */
689+
align = dpa_align(nd_region);
690+
if (!align)
691+
return 0;
664692
for_each_dpa_resource(ndd, res) {
693+
resource_size_t start, end, size;
694+
665695
if (strncmp(res->name, "blk", 3) != 0)
666696
continue;
667-
info.available -= resource_size(res);
697+
start = ALIGN_DOWN(res->start, align);
698+
end = ALIGN(res->end + 1, align) - 1;
699+
size = end - start + 1;
700+
if (size >= info.available)
701+
return 0;
702+
info.available -= size;
668703
}
669704

670705
return info.available;
@@ -683,19 +718,31 @@ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region,
683718
struct nvdimm_bus *nvdimm_bus;
684719
resource_size_t max = 0;
685720
struct resource *res;
721+
unsigned long align;
686722

687723
/* if a dimm is disabled the available capacity is zero */
688724
if (!ndd)
689725
return 0;
690726

727+
align = dpa_align(nd_region);
728+
if (!align)
729+
return 0;
730+
691731
nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
692732
if (__reserve_free_pmem(&nd_region->dev, nd_mapping->nvdimm))
693733
return 0;
694734
for_each_dpa_resource(ndd, res) {
735+
resource_size_t start, end;
736+
695737
if (strcmp(res->name, "pmem-reserve") != 0)
696738
continue;
697-
if (resource_size(res) > max)
698-
max = resource_size(res);
739+
/* trim free space relative to current alignment setting */
740+
start = ALIGN(res->start, align);
741+
end = ALIGN_DOWN(res->end + 1, align) - 1;
742+
if (end < start)
743+
continue;
744+
if (end - start + 1 > max)
745+
max = end - start + 1;
699746
}
700747
release_free_pmem(nvdimm_bus, nd_mapping);
701748
return max;
@@ -723,24 +770,33 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
723770
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
724771
struct resource *res;
725772
const char *reason;
773+
unsigned long align;
726774

727775
if (!ndd)
728776
return 0;
729777

778+
align = dpa_align(nd_region);
779+
if (!align)
780+
return 0;
781+
730782
map_start = nd_mapping->start;
731783
map_end = map_start + nd_mapping->size - 1;
732784
blk_start = max(map_start, map_end + 1 - *overlap);
733785
for_each_dpa_resource(ndd, res) {
734-
if (res->start >= map_start && res->start < map_end) {
786+
resource_size_t start, end;
787+
788+
start = ALIGN_DOWN(res->start, align);
789+
end = ALIGN(res->end + 1, align) - 1;
790+
if (start >= map_start && start < map_end) {
735791
if (strncmp(res->name, "blk", 3) == 0)
736792
blk_start = min(blk_start,
737-
max(map_start, res->start));
738-
else if (res->end > map_end) {
793+
max(map_start, start));
794+
else if (end > map_end) {
739795
reason = "misaligned to iset";
740796
goto err;
741797
} else
742-
busy += resource_size(res);
743-
} else if (res->end >= map_start && res->end <= map_end) {
798+
busy += end - start + 1;
799+
} else if (end >= map_start && end <= map_end) {
744800
if (strncmp(res->name, "blk", 3) == 0) {
745801
/*
746802
* If a BLK allocation overlaps the start of
@@ -749,8 +805,8 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
749805
*/
750806
blk_start = map_start;
751807
} else
752-
busy += resource_size(res);
753-
} else if (map_start > res->start && map_start < res->end) {
808+
busy += end - start + 1;
809+
} else if (map_start > start && map_start < end) {
754810
/* total eclipse of the mapping */
755811
busy += nd_mapping->size;
756812
blk_start = map_start;
@@ -760,7 +816,7 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
760816
*overlap = map_end + 1 - blk_start;
761817
available = blk_start - map_start;
762818
if (busy < available)
763-
return available - busy;
819+
return ALIGN_DOWN(available - busy, align);
764820
return 0;
765821

766822
err:

drivers/nvdimm/namespace_devs.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,11 @@ static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
542542
{
543543
bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
544544
bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
545+
unsigned long align;
546+
547+
align = nd_region->align / nd_region->ndr_mappings;
548+
valid->start = ALIGN(valid->start, align);
549+
valid->end = ALIGN_DOWN(valid->end + 1, align) - 1;
545550

546551
if (valid->start >= valid->end)
547552
goto invalid;
@@ -981,10 +986,10 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
981986
return -ENXIO;
982987
}
983988

984-
div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder);
989+
div_u64_rem(val, nd_region->align, &remainder);
985990
if (remainder) {
986991
dev_dbg(dev, "%llu is not %ldK aligned\n", val,
987-
(PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
992+
nd_region->align / SZ_1K);
988993
return -EINVAL;
989994
}
990995

drivers/nvdimm/nd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ struct nd_region {
146146
struct device *btt_seed;
147147
struct device *pfn_seed;
148148
struct device *dax_seed;
149+
unsigned long align;
149150
u16 ndr_mappings;
150151
u64 ndr_size;
151152
u64 ndr_start;

drivers/nvdimm/region_devs.c

Lines changed: 113 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -216,21 +216,25 @@ int nd_region_to_nstype(struct nd_region *nd_region)
216216
}
217217
EXPORT_SYMBOL(nd_region_to_nstype);
218218

219-
static ssize_t size_show(struct device *dev,
220-
struct device_attribute *attr, char *buf)
219+
static unsigned long long region_size(struct nd_region *nd_region)
221220
{
222-
struct nd_region *nd_region = to_nd_region(dev);
223-
unsigned long long size = 0;
224-
225-
if (is_memory(dev)) {
226-
size = nd_region->ndr_size;
221+
if (is_memory(&nd_region->dev)) {
222+
return nd_region->ndr_size;
227223
} else if (nd_region->ndr_mappings == 1) {
228224
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
229225

230-
size = nd_mapping->size;
226+
return nd_mapping->size;
231227
}
232228

233-
return sprintf(buf, "%llu\n", size);
229+
return 0;
230+
}
231+
232+
static ssize_t size_show(struct device *dev,
233+
struct device_attribute *attr, char *buf)
234+
{
235+
struct nd_region *nd_region = to_nd_region(dev);
236+
237+
return sprintf(buf, "%llu\n", region_size(nd_region));
234238
}
235239
static DEVICE_ATTR_RO(size);
236240

@@ -529,6 +533,55 @@ static ssize_t read_only_store(struct device *dev,
529533
}
530534
static DEVICE_ATTR_RW(read_only);
531535

536+
static ssize_t align_show(struct device *dev,
537+
struct device_attribute *attr, char *buf)
538+
{
539+
struct nd_region *nd_region = to_nd_region(dev);
540+
541+
return sprintf(buf, "%#lx\n", nd_region->align);
542+
}
543+
544+
static ssize_t align_store(struct device *dev,
545+
struct device_attribute *attr, const char *buf, size_t len)
546+
{
547+
struct nd_region *nd_region = to_nd_region(dev);
548+
unsigned long val, dpa;
549+
u32 remainder;
550+
int rc;
551+
552+
rc = kstrtoul(buf, 0, &val);
553+
if (rc)
554+
return rc;
555+
556+
if (!nd_region->ndr_mappings)
557+
return -ENXIO;
558+
559+
/*
560+
* Ensure space-align is evenly divisible by the region
561+
* interleave-width because the kernel typically has no facility
562+
* to determine which DIMM(s), dimm-physical-addresses, would
563+
* contribute to the tail capacity in system-physical-address
564+
* space for the namespace.
565+
*/
566+
dpa = val;
567+
remainder = do_div(dpa, nd_region->ndr_mappings);
568+
if (!is_power_of_2(dpa) || dpa < PAGE_SIZE
569+
|| val > region_size(nd_region) || remainder)
570+
return -EINVAL;
571+
572+
/*
573+
* Given that space allocation consults this value multiple
574+
* times ensure it does not change for the duration of the
575+
* allocation.
576+
*/
577+
nvdimm_bus_lock(dev);
578+
nd_region->align = val;
579+
nvdimm_bus_unlock(dev);
580+
581+
return len;
582+
}
583+
static DEVICE_ATTR_RW(align);
584+
532585
static ssize_t region_badblocks_show(struct device *dev,
533586
struct device_attribute *attr, char *buf)
534587
{
@@ -571,6 +624,7 @@ static DEVICE_ATTR_RO(persistence_domain);
571624

572625
static struct attribute *nd_region_attributes[] = {
573626
&dev_attr_size.attr,
627+
&dev_attr_align.attr,
574628
&dev_attr_nstype.attr,
575629
&dev_attr_mappings.attr,
576630
&dev_attr_btt_seed.attr,
@@ -626,6 +680,19 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
626680
return a->mode;
627681
}
628682

683+
if (a == &dev_attr_align.attr) {
684+
int i;
685+
686+
for (i = 0; i < nd_region->ndr_mappings; i++) {
687+
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
688+
struct nvdimm *nvdimm = nd_mapping->nvdimm;
689+
690+
if (test_bit(NDD_LABELING, &nvdimm->flags))
691+
return a->mode;
692+
}
693+
return 0;
694+
}
695+
629696
if (a != &dev_attr_set_cookie.attr
630697
&& a != &dev_attr_available_size.attr)
631698
return a->mode;
@@ -935,6 +1002,42 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
9351002
}
9361003
EXPORT_SYMBOL(nd_region_release_lane);
9371004

1005+
/*
1006+
* PowerPC requires this alignment for memremap_pages(). All other archs
1007+
* should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
1008+
*/
1009+
#define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
1010+
1011+
static unsigned long default_align(struct nd_region *nd_region)
1012+
{
1013+
unsigned long align, per_mapping;
1014+
int i, mappings;
1015+
u32 remainder;
1016+
1017+
if (is_nd_blk(&nd_region->dev))
1018+
align = PAGE_SIZE;
1019+
else
1020+
align = MEMREMAP_COMPAT_ALIGN_MAX;
1021+
1022+
for (i = 0; i < nd_region->ndr_mappings; i++) {
1023+
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
1024+
struct nvdimm *nvdimm = nd_mapping->nvdimm;
1025+
1026+
if (test_bit(NDD_ALIASING, &nvdimm->flags)) {
1027+
align = MEMREMAP_COMPAT_ALIGN_MAX;
1028+
break;
1029+
}
1030+
}
1031+
1032+
mappings = max_t(u16, 1, nd_region->ndr_mappings);
1033+
per_mapping = align;
1034+
remainder = do_div(per_mapping, mappings);
1035+
if (remainder)
1036+
align *= mappings;
1037+
1038+
return align;
1039+
}
1040+
9381041
static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
9391042
struct nd_region_desc *ndr_desc,
9401043
const struct device_type *dev_type, const char *caller)
@@ -1039,6 +1142,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
10391142
dev->of_node = ndr_desc->of_node;
10401143
nd_region->ndr_size = resource_size(ndr_desc->res);
10411144
nd_region->ndr_start = ndr_desc->res->start;
1145+
nd_region->align = default_align(nd_region);
10421146
if (ndr_desc->flush)
10431147
nd_region->flush = ndr_desc->flush;
10441148
else

0 commit comments

Comments
 (0)