Skip to content

Commit cc5dfd5

Browse files
committed
Merge branch 'hmm-devmem-cleanup.4' into rdma.git hmm
Christoph Hellwig says: ==================== Below is a series that cleans up the dev_pagemap interface so that it is more easily usable, which removes the need to wrap it in hmm and thus allowing to kill a lot of code Changes since v3: - pull in "mm/swap: Fix release_pages() when releasing devmap pages" and rebase the other patches on top of that - fold the hmm_devmem_add_resource into the DEVICE_PUBLIC memory removal patch - remove _vm_normal_page as it isn't needed without DEVICE_PUBLIC memory - pick up various ACKs Changes since v2: - fix nvdimm kunit build - add a new memory type for device dax - fix a few issues in intermediate patches that didn't show up in the end result - incorporate feedback from Michal Hocko, including killing of the DEVICE_PUBLIC memory type entirely Changes since v1: - rebase - also switch p2pdma to the internal refcount - add type checking for pgmap->type - rename the migrate method to migrate_to_ram - cleanup the altmap_valid flag - various tidbits from the reviews ==================== Conflicts resolved by: - Keeping Ira's version of the code in swap.c - Using the delete for the section in hmm.rst - Using the delete for the devmap code in hmm.c and .h * branch 'hmm-devmem-cleanup.4': (24 commits) mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR mm: remove the HMM config option mm: sort out the DEVICE_PRIVATE Kconfig mess mm: simplify ZONE_DEVICE page private data mm: remove hmm_devmem_add mm: remove hmm_vma_alloc_locked_page nouveau: use devm_memremap_pages directly nouveau: use alloc_page_vma directly PCI/P2PDMA: use the dev_pagemap internal refcount device-dax: use the dev_pagemap internal refcount memremap: provide an optional internal refcount in struct dev_pagemap memremap: replace the altmap_valid field with a PGMAP_ALTMAP_VALID flag memremap: remove the data field in struct dev_pagemap memremap: add a migrate_to_ram method to struct dev_pagemap_ops memremap: lift the devmap_enable manipulation into devm_memremap_pages memremap: pass a struct dev_pagemap to ->kill and ->cleanup memremap: move dev_pagemap callbacks into a separate structure memremap: validate the pagemap type passed to devm_memremap_pages mm: factor out a devm_request_free_mem_region helper mm: export alloc_pages_vma ... Signed-off-by: Jason Gunthorpe <[email protected]>
2 parents 9ec3f4c + b6b346a commit cc5dfd5

File tree

33 files changed

+370
-1013
lines changed

33 files changed

+370
-1013
lines changed

Documentation/vm/hmm.rst

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -336,33 +336,6 @@ directly using struct page for device memory which left most kernel code paths
336336
unaware of the difference. We only need to make sure that no one ever tries to
337337
map those pages from the CPU side.
338338

339-
HMM provides a set of helpers to register and hotplug device memory as a new
340-
region needing a struct page. This is offered through a very simple API::
341-
342-
struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
343-
struct device *device,
344-
unsigned long size);
345-
void hmm_devmem_remove(struct hmm_devmem *devmem);
346-
347-
The hmm_devmem_ops is where most of the important things are::
348-
349-
struct hmm_devmem_ops {
350-
void (*free)(struct hmm_devmem *devmem, struct page *page);
351-
vm_fault_t (*fault)(struct hmm_devmem *devmem,
352-
struct vm_area_struct *vma,
353-
unsigned long addr,
354-
struct page *page,
355-
unsigned flags,
356-
pmd_t *pmdp);
357-
};
358-
359-
The first callback (free()) happens when the last reference on a device page is
360-
dropped. This means the device page is now free and no longer used by anyone.
361-
The second callback happens whenever the CPU tries to access a device page
362-
which it cannot do. This second callback must trigger a migration back to
363-
system memory.
364-
365-
366339
Migration to and from device memory
367340
===================================
368341

arch/powerpc/mm/mem.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,17 +131,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
131131
{
132132
unsigned long start_pfn = start >> PAGE_SHIFT;
133133
unsigned long nr_pages = size >> PAGE_SHIFT;
134-
struct page *page;
134+
struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
135135
int ret;
136136

137-
/*
138-
* If we have an altmap then we need to skip over any reserved PFNs
139-
* when querying the zone.
140-
*/
141-
page = pfn_to_page(start_pfn);
142-
if (altmap)
143-
page += vmem_altmap_offset(altmap);
144-
145137
__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
146138

147139
/* Remove htab bolted mappings for this section of memory */

arch/x86/mm/init_64.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,13 +1213,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
12131213
{
12141214
unsigned long start_pfn = start >> PAGE_SHIFT;
12151215
unsigned long nr_pages = size >> PAGE_SHIFT;
1216-
struct page *page = pfn_to_page(start_pfn);
1217-
struct zone *zone;
1216+
struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
1217+
struct zone *zone = page_zone(page);
12181218

1219-
/* With altmap the first mapped page is offset from @start */
1220-
if (altmap)
1221-
page += vmem_altmap_offset(altmap);
1222-
zone = page_zone(page);
12231219
__remove_pages(zone, start_pfn, nr_pages, altmap);
12241220
kernel_physical_mapping_remove(start, start + size);
12251221
}

drivers/dax/dax-private.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,13 @@ struct dax_region {
4343
* @target_node: effective numa node if dev_dax memory range is onlined
4444
* @dev - device core
4545
* @pgmap - pgmap for memmap setup / lifetime (driver owned)
46-
* @ref: pgmap reference count (driver owned)
47-
* @cmp: @ref final put completion (driver owned)
4846
*/
4947
struct dev_dax {
5048
struct dax_region *region;
5149
struct dax_device *dax_dev;
5250
int target_node;
5351
struct device dev;
5452
struct dev_pagemap pgmap;
55-
struct percpu_ref ref;
56-
struct completion cmp;
5753
};
5854

5955
static inline struct dev_dax *to_dev_dax(struct device *dev)

drivers/dax/device.c

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -14,37 +14,6 @@
1414
#include "dax-private.h"
1515
#include "bus.h"
1616

17-
static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref)
18-
{
19-
return container_of(ref, struct dev_dax, ref);
20-
}
21-
22-
static void dev_dax_percpu_release(struct percpu_ref *ref)
23-
{
24-
struct dev_dax *dev_dax = ref_to_dev_dax(ref);
25-
26-
dev_dbg(&dev_dax->dev, "%s\n", __func__);
27-
complete(&dev_dax->cmp);
28-
}
29-
30-
static void dev_dax_percpu_exit(struct percpu_ref *ref)
31-
{
32-
struct dev_dax *dev_dax = ref_to_dev_dax(ref);
33-
34-
dev_dbg(&dev_dax->dev, "%s\n", __func__);
35-
wait_for_completion(&dev_dax->cmp);
36-
percpu_ref_exit(ref);
37-
}
38-
39-
static void dev_dax_percpu_kill(struct percpu_ref *data)
40-
{
41-
struct percpu_ref *ref = data;
42-
struct dev_dax *dev_dax = ref_to_dev_dax(ref);
43-
44-
dev_dbg(&dev_dax->dev, "%s\n", __func__);
45-
percpu_ref_kill(ref);
46-
}
47-
4817
static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
4918
const char *func)
5019
{
@@ -459,15 +428,7 @@ int dev_dax_probe(struct device *dev)
459428
return -EBUSY;
460429
}
461430

462-
init_completion(&dev_dax->cmp);
463-
rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0,
464-
GFP_KERNEL);
465-
if (rc)
466-
return rc;
467-
468-
dev_dax->pgmap.ref = &dev_dax->ref;
469-
dev_dax->pgmap.kill = dev_dax_percpu_kill;
470-
dev_dax->pgmap.cleanup = dev_dax_percpu_exit;
431+
dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
471432
addr = devm_memremap_pages(dev, &dev_dax->pgmap);
472433
if (IS_ERR(addr))
473434
return PTR_ERR(addr);

drivers/dax/pmem/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
1616
struct dev_dax *dev_dax;
1717
struct nd_namespace_io *nsio;
1818
struct dax_region *dax_region;
19-
struct dev_pagemap pgmap = { 0 };
19+
struct dev_pagemap pgmap = { };
2020
struct nd_namespace_common *ndns;
2121
struct nd_dax *nd_dax = to_nd_dax(dev);
2222
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;

drivers/gpu/drm/nouveau/Kconfig

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,11 @@ config DRM_NOUVEAU_BACKLIGHT
8484

8585
config DRM_NOUVEAU_SVM
8686
bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
87-
depends on ARCH_HAS_HMM
87+
depends on DEVICE_PRIVATE
8888
depends on DRM_NOUVEAU
89+
depends on HMM_MIRROR
8990
depends on STAGING
90-
select HMM_MIRROR
91-
select DEVICE_PRIVATE
91+
select MIGRATE_VMA_HELPER
9292
default n
9393
help
9494
Say Y here if you want to enable experimental support for

drivers/gpu/drm/nouveau/nouveau_dmem.c

Lines changed: 47 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,20 @@ struct nouveau_dmem_migrate {
7272
};
7373

7474
struct nouveau_dmem {
75-
struct hmm_devmem *devmem;
75+
struct nouveau_drm *drm;
76+
struct dev_pagemap pagemap;
7677
struct nouveau_dmem_migrate migrate;
7778
struct list_head chunk_free;
7879
struct list_head chunk_full;
7980
struct list_head chunk_empty;
8081
struct mutex mutex;
8182
};
8283

84+
static inline struct nouveau_dmem *page_to_dmem(struct page *page)
85+
{
86+
return container_of(page->pgmap, struct nouveau_dmem, pagemap);
87+
}
88+
8389
struct nouveau_dmem_fault {
8490
struct nouveau_drm *drm;
8591
struct nouveau_fence *fence;
@@ -96,14 +102,10 @@ struct nouveau_migrate {
96102
unsigned long dma_nr;
97103
};
98104

99-
static void
100-
nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page)
105+
static void nouveau_dmem_page_free(struct page *page)
101106
{
102-
struct nouveau_dmem_chunk *chunk;
103-
unsigned long idx;
104-
105-
chunk = (void *)hmm_devmem_page_get_drvdata(page);
106-
idx = page_to_pfn(page) - chunk->pfn_first;
107+
struct nouveau_dmem_chunk *chunk = page->zone_device_data;
108+
unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
107109

108110
/*
109111
* FIXME:
@@ -148,11 +150,12 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
148150
if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
149151
continue;
150152

151-
dpage = hmm_vma_alloc_locked_page(vma, addr);
153+
dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr);
152154
if (!dpage) {
153155
dst_pfns[i] = MIGRATE_PFN_ERROR;
154156
continue;
155157
}
158+
lock_page(dpage);
156159

157160
dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
158161
MIGRATE_PFN_LOCKED;
@@ -194,7 +197,7 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
194197

195198
dst_addr = fault->dma[fault->npages++];
196199

197-
chunk = (void *)hmm_devmem_page_get_drvdata(spage);
200+
chunk = spage->zone_device_data;
198201
src_addr = page_to_pfn(spage) - chunk->pfn_first;
199202
src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
200203

@@ -259,29 +262,21 @@ static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
259262
.finalize_and_map = nouveau_dmem_fault_finalize_and_map,
260263
};
261264

262-
static vm_fault_t
263-
nouveau_dmem_fault(struct hmm_devmem *devmem,
264-
struct vm_area_struct *vma,
265-
unsigned long addr,
266-
const struct page *page,
267-
unsigned int flags,
268-
pmd_t *pmdp)
265+
static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
269266
{
270-
struct drm_device *drm_dev = dev_get_drvdata(devmem->device);
267+
struct nouveau_dmem *dmem = page_to_dmem(vmf->page);
271268
unsigned long src[1] = {0}, dst[1] = {0};
272-
struct nouveau_dmem_fault fault = {0};
269+
struct nouveau_dmem_fault fault = { .drm = dmem->drm };
273270
int ret;
274271

275-
276-
277272
/*
278273
* FIXME what we really want is to find some heuristic to migrate more
279274
* than just one page on CPU fault. When such fault happens it is very
280275
* likely that more surrounding page will CPU fault too.
281276
*/
282-
fault.drm = nouveau_drm(drm_dev);
283-
ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr,
284-
addr + PAGE_SIZE, src, dst, &fault);
277+
ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma,
278+
vmf->address, vmf->address + PAGE_SIZE,
279+
src, dst, &fault);
285280
if (ret)
286281
return VM_FAULT_SIGBUS;
287282

@@ -291,10 +286,9 @@ nouveau_dmem_fault(struct hmm_devmem *devmem,
291286
return 0;
292287
}
293288

294-
static const struct hmm_devmem_ops
295-
nouveau_dmem_devmem_ops = {
296-
.free = nouveau_dmem_free,
297-
.fault = nouveau_dmem_fault,
289+
static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
290+
.page_free = nouveau_dmem_page_free,
291+
.migrate_to_ram = nouveau_dmem_migrate_to_ram,
298292
};
299293

300294
static int
@@ -580,7 +574,8 @@ void
580574
nouveau_dmem_init(struct nouveau_drm *drm)
581575
{
582576
struct device *device = drm->dev->dev;
583-
unsigned long i, size;
577+
struct resource *res;
578+
unsigned long i, size, pfn_first;
584579
int ret;
585580

586581
/* This only make sense on PASCAL or newer */
@@ -590,6 +585,7 @@ nouveau_dmem_init(struct nouveau_drm *drm)
590585
if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
591586
return;
592587

588+
drm->dmem->drm = drm;
593589
mutex_init(&drm->dmem->mutex);
594590
INIT_LIST_HEAD(&drm->dmem->chunk_free);
595591
INIT_LIST_HEAD(&drm->dmem->chunk_full);
@@ -599,26 +595,25 @@ nouveau_dmem_init(struct nouveau_drm *drm)
599595

600596
/* Initialize migration dma helpers before registering memory */
601597
ret = nouveau_dmem_migrate_init(drm);
602-
if (ret) {
603-
kfree(drm->dmem);
604-
drm->dmem = NULL;
605-
return;
606-
}
598+
if (ret)
599+
goto out_free;
607600

608601
/*
609602
* FIXME we need some kind of policy to decide how much VRAM we
610603
* want to register with HMM. For now just register everything
611604
* and latter if we want to do thing like over commit then we
612605
* could revisit this.
613606
*/
614-
drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops,
615-
device, size);
616-
if (IS_ERR(drm->dmem->devmem)) {
617-
kfree(drm->dmem);
618-
drm->dmem = NULL;
619-
return;
620-
}
621-
607+
res = devm_request_free_mem_region(device, &iomem_resource, size);
608+
if (IS_ERR(res))
609+
goto out_free;
610+
drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
611+
drm->dmem->pagemap.res = *res;
612+
drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops;
613+
if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap)))
614+
goto out_free;
615+
616+
pfn_first = res->start >> PAGE_SHIFT;
622617
for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) {
623618
struct nouveau_dmem_chunk *chunk;
624619
struct page *page;
@@ -631,17 +626,19 @@ nouveau_dmem_init(struct nouveau_drm *drm)
631626
}
632627

633628
chunk->drm = drm;
634-
chunk->pfn_first = drm->dmem->devmem->pfn_first;
635-
chunk->pfn_first += (i * DMEM_CHUNK_NPAGES);
629+
chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES);
636630
list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
637631

638632
page = pfn_to_page(chunk->pfn_first);
639-
for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) {
640-
hmm_devmem_page_set_drvdata(page, (long)chunk);
641-
}
633+
for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page)
634+
page->zone_device_data = chunk;
642635
}
643636

644637
NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
638+
return;
639+
out_free:
640+
kfree(drm->dmem);
641+
drm->dmem = NULL;
645642
}
646643

647644
static void
@@ -697,7 +694,7 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
697694
if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
698695
continue;
699696

700-
chunk = (void *)hmm_devmem_page_get_drvdata(dpage);
697+
chunk = dpage->zone_device_data;
701698
dst_addr = page_to_pfn(dpage) - chunk->pfn_first;
702699
dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
703700

@@ -832,13 +829,7 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
832829
static inline bool
833830
nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
834831
{
835-
if (!is_device_private_page(page))
836-
return false;
837-
838-
if (drm->dmem->devmem != page->pgmap->data)
839-
return false;
840-
841-
return true;
832+
return is_device_private_page(page) && drm->dmem == page_to_dmem(page);
842833
}
843834

844835
void
@@ -867,7 +858,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
867858
continue;
868859
}
869860

870-
chunk = (void *)hmm_devmem_page_get_drvdata(page);
861+
chunk = page->zone_device_data;
871862
addr = page_to_pfn(page) - chunk->pfn_first;
872863
addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT;
873864

0 commit comments

Comments
 (0)