Skip to content

Commit 67a3e8f

Browse files
Ross Zwislerdjbw
authored andcommitted
nd_blk: change aperture mapping from WC to WB
This should result in a pretty sizeable performance gain for reads. For rough comparison I did some simple read testing using PMEM to compare reads of write combining (WC) mappings vs write-back (WB). This was done on a random lab machine. PMEM reads from a write combining mapping: # dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000 100000+0 records in 100000+0 records out 409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s PMEM reads from a write-back mapping: # dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000 1000000+0 records in 1000000+0 records out 4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s To be able to safely support a write-back aperture I needed to add support for the "read flush" _DSM flag, as outlined in the DSM spec: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf This flag tells the ND BLK driver that it needs to flush the cache lines associated with the aperture after the aperture is moved but before any new data is read. This ensures that any stale cache lines from the previous contents of the aperture will be discarded from the processor cache, and the new data will be read properly from the DIMM. We know that the cache lines are clean and will be discarded without any writeback because either a) the previous aperture operation was a read, and we never modified the contents of the aperture, or b) the previous aperture operation was a write and we must have written back the dirtied contents of the aperture to the DIMM before the I/O was completed. In order to add support for the "read flush" flag I needed to add a generic routine to invalidate cache lines, mmio_flush_range(). This is protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently only supported on x86. Signed-off-by: Ross Zwisler <[email protected]> Signed-off-by: Dan Williams <[email protected]>
1 parent e2e0539 commit 67a3e8f

File tree

11 files changed

+88
-36
lines changed

11 files changed

+88
-36
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ config X86
2828
select ARCH_HAS_FAST_MULTIPLIER
2929
select ARCH_HAS_GCOV_PROFILE_ALL
3030
select ARCH_HAS_PMEM_API
31+
select ARCH_HAS_MMIO_FLUSH
3132
select ARCH_HAS_SG_CHAIN
3233
select ARCH_HAVE_NMI_SAFE_CMPXCHG
3334
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI

arch/x86/include/asm/cacheflush.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages);
8989

9090
void clflush_cache_range(void *addr, unsigned int size);
9191

92+
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
93+
9294
#ifdef CONFIG_DEBUG_RODATA
9395
void mark_rodata_ro(void);
9496
extern const int rodata_test_data;

arch/x86/include/asm/io.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,6 @@ static inline void flush_write_buffers(void)
248248
#endif
249249
}
250250

251-
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
252-
253251
#endif /* __KERNEL__ */
254252

255253
extern void native_io_delay(void);

arch/x86/include/asm/pmem.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#include <asm/cpufeature.h>
1919
#include <asm/special_insns.h>
2020

21+
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
22+
2123
#ifdef CONFIG_ARCH_HAS_PMEM_API
2224
/**
2325
* arch_memcpy_to_pmem - copy data to persistent memory

drivers/acpi/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ config ACPI_NFIT
410410
tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
411411
depends on PHYS_ADDR_T_64BIT
412412
depends on BLK_DEV
413+
depends on ARCH_HAS_MMIO_FLUSH
413414
select LIBNVDIMM
414415
help
415416
Infrastructure to probe ACPI 6 compliant platforms for

drivers/acpi/nfit.c

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,7 +1017,7 @@ static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
10171017
if (mmio->num_lines)
10181018
offset = to_interleave_offset(offset, mmio);
10191019

1020-
return readq(mmio->base + offset);
1020+
return readq(mmio->addr.base + offset);
10211021
}
10221022

10231023
static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
@@ -1042,11 +1042,11 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
10421042
if (mmio->num_lines)
10431043
offset = to_interleave_offset(offset, mmio);
10441044

1045-
writeq(cmd, mmio->base + offset);
1045+
writeq(cmd, mmio->addr.base + offset);
10461046
wmb_blk(nfit_blk);
10471047

10481048
if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
1049-
readq(mmio->base + offset);
1049+
readq(mmio->addr.base + offset);
10501050
}
10511051

10521052
static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
@@ -1078,11 +1078,16 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
10781078
}
10791079

10801080
if (rw)
1081-
memcpy_to_pmem(mmio->aperture + offset,
1081+
memcpy_to_pmem(mmio->addr.aperture + offset,
10821082
iobuf + copied, c);
1083-
else
1083+
else {
1084+
if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
1085+
mmio_flush_range((void __force *)
1086+
mmio->addr.aperture + offset, c);
1087+
10841088
memcpy_from_pmem(iobuf + copied,
1085-
mmio->aperture + offset, c);
1089+
mmio->addr.aperture + offset, c);
1090+
}
10861091

10871092
copied += c;
10881093
len -= c;
@@ -1129,7 +1134,10 @@ static void nfit_spa_mapping_release(struct kref *kref)
11291134

11301135
WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
11311136
dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
1132-
iounmap(spa_map->iomem);
1137+
if (spa_map->type == SPA_MAP_APERTURE)
1138+
memunmap((void __force *)spa_map->addr.aperture);
1139+
else
1140+
iounmap(spa_map->addr.base);
11331141
release_mem_region(spa->address, spa->length);
11341142
list_del(&spa_map->list);
11351143
kfree(spa_map);
@@ -1175,7 +1183,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
11751183
spa_map = find_spa_mapping(acpi_desc, spa);
11761184
if (spa_map) {
11771185
kref_get(&spa_map->kref);
1178-
return spa_map->iomem;
1186+
return spa_map->addr.base;
11791187
}
11801188

11811189
spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
@@ -1191,20 +1199,19 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
11911199
if (!res)
11921200
goto err_mem;
11931201

1194-
if (type == SPA_MAP_APERTURE) {
1195-
/*
1196-
* TODO: memremap_pmem() support, but that requires cache
1197-
* flushing when the aperture is moved.
1198-
*/
1199-
spa_map->iomem = ioremap_wc(start, n);
1200-
} else
1201-
spa_map->iomem = ioremap_nocache(start, n);
1202+
spa_map->type = type;
1203+
if (type == SPA_MAP_APERTURE)
1204+
spa_map->addr.aperture = (void __pmem *)memremap(start, n,
1205+
ARCH_MEMREMAP_PMEM);
1206+
else
1207+
spa_map->addr.base = ioremap_nocache(start, n);
1208+
12021209

1203-
if (!spa_map->iomem)
1210+
if (!spa_map->addr.base)
12041211
goto err_map;
12051212

12061213
list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
1207-
return spa_map->iomem;
1214+
return spa_map->addr.base;
12081215

12091216
err_map:
12101217
release_mem_region(start, n);
@@ -1267,7 +1274,7 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
12671274
nfit_blk->dimm_flags = flags.flags;
12681275
else if (rc == -ENOTTY) {
12691276
/* fall back to a conservative default */
1270-
nfit_blk->dimm_flags = ND_BLK_DCR_LATCH;
1277+
nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
12711278
rc = 0;
12721279
} else
12731280
rc = -ENXIO;
@@ -1307,9 +1314,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
13071314
/* map block aperture memory */
13081315
nfit_blk->bdw_offset = nfit_mem->bdw->offset;
13091316
mmio = &nfit_blk->mmio[BDW];
1310-
mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
1317+
mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
13111318
SPA_MAP_APERTURE);
1312-
if (!mmio->base) {
1319+
if (!mmio->addr.base) {
13131320
dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
13141321
nvdimm_name(nvdimm));
13151322
return -ENOMEM;
@@ -1330,9 +1337,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
13301337
nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
13311338
nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
13321339
mmio = &nfit_blk->mmio[DCR];
1333-
mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
1340+
mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
13341341
SPA_MAP_CONTROL);
1335-
if (!mmio->base) {
1342+
if (!mmio->addr.base) {
13361343
dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
13371344
nvdimm_name(nvdimm));
13381345
return -ENOMEM;
@@ -1399,7 +1406,7 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
13991406
for (i = 0; i < 2; i++) {
14001407
struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
14011408

1402-
if (mmio->base)
1409+
if (mmio->addr.base)
14031410
nfit_spa_unmap(acpi_desc, mmio->spa);
14041411
}
14051412
nd_blk_region_set_provider_data(ndbr, NULL);

drivers/acpi/nfit.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ enum nfit_uuids {
4141
};
4242

4343
enum {
44+
ND_BLK_READ_FLUSH = 1,
4445
ND_BLK_DCR_LATCH = 2,
4546
};
4647

@@ -117,12 +118,16 @@ enum nd_blk_mmio_selector {
117118
DCR,
118119
};
119120

121+
struct nd_blk_addr {
122+
union {
123+
void __iomem *base;
124+
void __pmem *aperture;
125+
};
126+
};
127+
120128
struct nfit_blk {
121129
struct nfit_blk_mmio {
122-
union {
123-
void __iomem *base;
124-
void __pmem *aperture;
125-
};
130+
struct nd_blk_addr addr;
126131
u64 size;
127132
u64 base_offset;
128133
u32 line_size;
@@ -149,7 +154,8 @@ struct nfit_spa_mapping {
149154
struct acpi_nfit_system_address *spa;
150155
struct list_head list;
151156
struct kref kref;
152-
void __iomem *iomem;
157+
enum spa_map_type type;
158+
struct nd_blk_addr addr;
153159
};
154160

155161
static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)

lib/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,4 +531,7 @@ config ARCH_HAS_SG_CHAIN
531531
config ARCH_HAS_PMEM_API
532532
bool
533533

534+
config ARCH_HAS_MMIO_FLUSH
535+
bool
536+
534537
endmenu

tools/testing/nvdimm/Kbuild

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
ldflags-y += --wrap=ioremap_wc
2+
ldflags-y += --wrap=memremap
23
ldflags-y += --wrap=devm_ioremap_nocache
34
ldflags-y += --wrap=devm_memremap
45
ldflags-y += --wrap=ioremap_nocache
56
ldflags-y += --wrap=iounmap
7+
ldflags-y += --wrap=memunmap
68
ldflags-y += --wrap=__devm_request_region
79
ldflags-y += --wrap=__request_region
810
ldflags-y += --wrap=__release_region

tools/testing/nvdimm/test/iomap.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,25 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
8989
nfit_res = get_nfit_res(offset);
9090
rcu_read_unlock();
9191
if (nfit_res)
92-
return (void __iomem *) nfit_res->buf + offset
93-
- nfit_res->res->start;
92+
return nfit_res->buf + offset - nfit_res->res->start;
9493
return devm_memremap(dev, offset, size, flags);
9594
}
9695
EXPORT_SYMBOL(__wrap_devm_memremap);
9796

97+
void *__wrap_memremap(resource_size_t offset, size_t size,
98+
unsigned long flags)
99+
{
100+
struct nfit_test_resource *nfit_res;
101+
102+
rcu_read_lock();
103+
nfit_res = get_nfit_res(offset);
104+
rcu_read_unlock();
105+
if (nfit_res)
106+
return nfit_res->buf + offset - nfit_res->res->start;
107+
return memremap(offset, size, flags);
108+
}
109+
EXPORT_SYMBOL(__wrap_memremap);
110+
98111
void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
99112
{
100113
return __nfit_test_ioremap(offset, size, ioremap_nocache);
@@ -120,6 +133,19 @@ void __wrap_iounmap(volatile void __iomem *addr)
120133
}
121134
EXPORT_SYMBOL(__wrap_iounmap);
122135

136+
void __wrap_memunmap(void *addr)
137+
{
138+
struct nfit_test_resource *nfit_res;
139+
140+
rcu_read_lock();
141+
nfit_res = get_nfit_res((unsigned long) addr);
142+
rcu_read_unlock();
143+
if (nfit_res)
144+
return;
145+
return memunmap(addr);
146+
}
147+
EXPORT_SYMBOL(__wrap_memunmap);
148+
123149
static struct resource *nfit_test_request_region(struct device *dev,
124150
struct resource *parent, resource_size_t start,
125151
resource_size_t n, const char *name, int flags)

tools/testing/nvdimm/test/nfit.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,9 +1029,13 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
10291029

10301030
lane = nd_region_acquire_lane(nd_region);
10311031
if (rw)
1032-
memcpy(mmio->base + dpa, iobuf, len);
1033-
else
1034-
memcpy(iobuf, mmio->base + dpa, len);
1032+
memcpy(mmio->addr.base + dpa, iobuf, len);
1033+
else {
1034+
memcpy(iobuf, mmio->addr.base + dpa, len);
1035+
1036+
/* give us some some coverage of the mmio_flush_range() API */
1037+
mmio_flush_range(mmio->addr.base + dpa, len);
1038+
}
10351039
nd_region_release_lane(nd_region, lane);
10361040

10371041
return 0;

0 commit comments

Comments
 (0)