Skip to content

Commit df06b37

Browse files
Keith Buschtorvalds
authored andcommitted
mm/gup: cache dev_pagemap while pinning pages
Getting pages from ZONE_DEVICE memory needs to check the backing device's live-ness, which is tracked in the device's dev_pagemap metadata. This metadata is stored in a radix tree and looking it up adds measurable software overhead. This patch avoids repeating this relatively costly operation when dev_pagemap is used by caching the last dev_pagemap while getting user pages. The gup_benchmark kernel self test reports this reduces time to get user pages to as low as 1/3 of the previous time. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Keith Busch <[email protected]> Reviewed-by: Dan Williams <[email protected]> Acked-by: Kirill A. Shutemov <[email protected]> Cc: Dave Hansen <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 9fd61bc commit df06b37

File tree

5 files changed

+79
-73
lines changed

5 files changed

+79
-73
lines changed

include/linux/huge_mm.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,9 @@ static inline int hpage_nr_pages(struct page *page)
213213
}
214214

215215
struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
216-
pmd_t *pmd, int flags);
216+
pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
217217
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
218-
pud_t *pud, int flags);
218+
pud_t *pud, int flags, struct dev_pagemap **pgmap);
219219

220220
extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
221221

@@ -344,13 +344,13 @@ static inline void mm_put_huge_zero_page(struct mm_struct *mm)
344344
}
345345

346346
static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
347-
unsigned long addr, pmd_t *pmd, int flags)
347+
unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
348348
{
349349
return NULL;
350350
}
351351

352352
static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
353-
unsigned long addr, pud_t *pud, int flags)
353+
unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap)
354354
{
355355
return NULL;
356356
}

include/linux/mm.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,16 +2536,8 @@ static inline vm_fault_t vmf_error(int err)
25362536
return VM_FAULT_SIGBUS;
25372537
}
25382538

2539-
struct page *follow_page_mask(struct vm_area_struct *vma,
2540-
unsigned long address, unsigned int foll_flags,
2541-
unsigned int *page_mask);
2542-
2543-
static inline struct page *follow_page(struct vm_area_struct *vma,
2544-
unsigned long address, unsigned int foll_flags)
2545-
{
2546-
unsigned int unused_page_mask;
2547-
return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
2548-
}
2539+
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
2540+
unsigned int foll_flags);
25492541

25502542
#define FOLL_WRITE 0x01 /* check pte is writable */
25512543
#define FOLL_TOUCH 0x02 /* mark page accessed */

mm/gup.c

Lines changed: 65 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020

2121
#include "internal.h"
2222

23+
struct follow_page_context {
24+
struct dev_pagemap *pgmap;
25+
unsigned int page_mask;
26+
};
27+
2328
static struct page *no_page_table(struct vm_area_struct *vma,
2429
unsigned int flags)
2530
{
@@ -71,10 +76,10 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
7176
}
7277

7378
static struct page *follow_page_pte(struct vm_area_struct *vma,
74-
unsigned long address, pmd_t *pmd, unsigned int flags)
79+
unsigned long address, pmd_t *pmd, unsigned int flags,
80+
struct dev_pagemap **pgmap)
7581
{
7682
struct mm_struct *mm = vma->vm_mm;
77-
struct dev_pagemap *pgmap = NULL;
7883
struct page *page;
7984
spinlock_t *ptl;
8085
pte_t *ptep, pte;
@@ -116,8 +121,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
116121
* Only return device mapping pages in the FOLL_GET case since
117122
* they are only valid while holding the pgmap reference.
118123
*/
119-
pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
120-
if (pgmap)
124+
*pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
125+
if (*pgmap)
121126
page = pte_page(pte);
122127
else
123128
goto no_page;
@@ -152,15 +157,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
152157
goto retry;
153158
}
154159

155-
if (flags & FOLL_GET) {
160+
if (flags & FOLL_GET)
156161
get_page(page);
157-
158-
/* drop the pgmap reference now that we hold the page */
159-
if (pgmap) {
160-
put_dev_pagemap(pgmap);
161-
pgmap = NULL;
162-
}
163-
}
164162
if (flags & FOLL_TOUCH) {
165163
if ((flags & FOLL_WRITE) &&
166164
!pte_dirty(pte) && !PageDirty(page))
@@ -210,7 +208,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
210208

211209
static struct page *follow_pmd_mask(struct vm_area_struct *vma,
212210
unsigned long address, pud_t *pudp,
213-
unsigned int flags, unsigned int *page_mask)
211+
unsigned int flags,
212+
struct follow_page_context *ctx)
214213
{
215214
pmd_t *pmd, pmdval;
216215
spinlock_t *ptl;
@@ -258,13 +257,13 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
258257
}
259258
if (pmd_devmap(pmdval)) {
260259
ptl = pmd_lock(mm, pmd);
261-
page = follow_devmap_pmd(vma, address, pmd, flags);
260+
page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
262261
spin_unlock(ptl);
263262
if (page)
264263
return page;
265264
}
266265
if (likely(!pmd_trans_huge(pmdval)))
267-
return follow_page_pte(vma, address, pmd, flags);
266+
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
268267

269268
if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
270269
return no_page_table(vma, flags);
@@ -284,7 +283,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
284283
}
285284
if (unlikely(!pmd_trans_huge(*pmd))) {
286285
spin_unlock(ptl);
287-
return follow_page_pte(vma, address, pmd, flags);
286+
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
288287
}
289288
if (flags & FOLL_SPLIT) {
290289
int ret;
@@ -307,18 +306,18 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
307306
}
308307

309308
return ret ? ERR_PTR(ret) :
310-
follow_page_pte(vma, address, pmd, flags);
309+
follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
311310
}
312311
page = follow_trans_huge_pmd(vma, address, pmd, flags);
313312
spin_unlock(ptl);
314-
*page_mask = HPAGE_PMD_NR - 1;
313+
ctx->page_mask = HPAGE_PMD_NR - 1;
315314
return page;
316315
}
317316

318-
319317
static struct page *follow_pud_mask(struct vm_area_struct *vma,
320318
unsigned long address, p4d_t *p4dp,
321-
unsigned int flags, unsigned int *page_mask)
319+
unsigned int flags,
320+
struct follow_page_context *ctx)
322321
{
323322
pud_t *pud;
324323
spinlock_t *ptl;
@@ -344,21 +343,21 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
344343
}
345344
if (pud_devmap(*pud)) {
346345
ptl = pud_lock(mm, pud);
347-
page = follow_devmap_pud(vma, address, pud, flags);
346+
page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
348347
spin_unlock(ptl);
349348
if (page)
350349
return page;
351350
}
352351
if (unlikely(pud_bad(*pud)))
353352
return no_page_table(vma, flags);
354353

355-
return follow_pmd_mask(vma, address, pud, flags, page_mask);
354+
return follow_pmd_mask(vma, address, pud, flags, ctx);
356355
}
357356

358-
359357
static struct page *follow_p4d_mask(struct vm_area_struct *vma,
360358
unsigned long address, pgd_t *pgdp,
361-
unsigned int flags, unsigned int *page_mask)
359+
unsigned int flags,
360+
struct follow_page_context *ctx)
362361
{
363362
p4d_t *p4d;
364363
struct page *page;
@@ -378,7 +377,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
378377
return page;
379378
return no_page_table(vma, flags);
380379
}
381-
return follow_pud_mask(vma, address, p4d, flags, page_mask);
380+
return follow_pud_mask(vma, address, p4d, flags, ctx);
382381
}
383382

384383
/**
@@ -396,13 +395,13 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
396395
*/
397396
struct page *follow_page_mask(struct vm_area_struct *vma,
398397
unsigned long address, unsigned int flags,
399-
unsigned int *page_mask)
398+
struct follow_page_context *ctx)
400399
{
401400
pgd_t *pgd;
402401
struct page *page;
403402
struct mm_struct *mm = vma->vm_mm;
404403

405-
*page_mask = 0;
404+
ctx->page_mask = 0;
406405

407406
/* make this handle hugepd */
408407
page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
@@ -431,7 +430,19 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
431430
return no_page_table(vma, flags);
432431
}
433432

434-
return follow_p4d_mask(vma, address, pgd, flags, page_mask);
433+
return follow_p4d_mask(vma, address, pgd, flags, ctx);
434+
}
435+
436+
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
437+
unsigned int foll_flags)
438+
{
439+
struct follow_page_context ctx = { NULL };
440+
struct page *page;
441+
442+
page = follow_page_mask(vma, address, foll_flags, &ctx);
443+
if (ctx.pgmap)
444+
put_dev_pagemap(ctx.pgmap);
445+
return page;
435446
}
436447

437448
static int get_gate_page(struct mm_struct *mm, unsigned long address,
@@ -659,9 +670,9 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
659670
unsigned int gup_flags, struct page **pages,
660671
struct vm_area_struct **vmas, int *nonblocking)
661672
{
662-
long i = 0;
663-
unsigned int page_mask;
673+
long ret = 0, i = 0;
664674
struct vm_area_struct *vma = NULL;
675+
struct follow_page_context ctx = { NULL };
665676

666677
if (!nr_pages)
667678
return 0;
@@ -691,12 +702,14 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
691702
pages ? &pages[i] : NULL);
692703
if (ret)
693704
return i ? : ret;
694-
page_mask = 0;
705+
ctx.page_mask = 0;
695706
goto next_page;
696707
}
697708

698-
if (!vma || check_vma_flags(vma, gup_flags))
699-
return i ? : -EFAULT;
709+
if (!vma || check_vma_flags(vma, gup_flags)) {
710+
ret = -EFAULT;
711+
goto out;
712+
}
700713
if (is_vm_hugetlb_page(vma)) {
701714
i = follow_hugetlb_page(mm, vma, pages, vmas,
702715
&start, &nr_pages, i,
@@ -709,23 +722,26 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
709722
* If we have a pending SIGKILL, don't keep faulting pages and
710723
* potentially allocating memory.
711724
*/
712-
if (unlikely(fatal_signal_pending(current)))
713-
return i ? i : -ERESTARTSYS;
725+
if (unlikely(fatal_signal_pending(current))) {
726+
ret = -ERESTARTSYS;
727+
goto out;
728+
}
714729
cond_resched();
715-
page = follow_page_mask(vma, start, foll_flags, &page_mask);
730+
731+
page = follow_page_mask(vma, start, foll_flags, &ctx);
716732
if (!page) {
717-
int ret;
718733
ret = faultin_page(tsk, vma, start, &foll_flags,
719734
nonblocking);
720735
switch (ret) {
721736
case 0:
722737
goto retry;
738+
case -EBUSY:
739+
ret = 0;
740+
/* FALLTHRU */
723741
case -EFAULT:
724742
case -ENOMEM:
725743
case -EHWPOISON:
726-
return i ? i : ret;
727-
case -EBUSY:
728-
return i;
744+
goto out;
729745
case -ENOENT:
730746
goto next_page;
731747
}
@@ -737,27 +753,31 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
737753
*/
738754
goto next_page;
739755
} else if (IS_ERR(page)) {
740-
return i ? i : PTR_ERR(page);
756+
ret = PTR_ERR(page);
757+
goto out;
741758
}
742759
if (pages) {
743760
pages[i] = page;
744761
flush_anon_page(vma, page, start);
745762
flush_dcache_page(page);
746-
page_mask = 0;
763+
ctx.page_mask = 0;
747764
}
748765
next_page:
749766
if (vmas) {
750767
vmas[i] = vma;
751-
page_mask = 0;
768+
ctx.page_mask = 0;
752769
}
753-
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
770+
page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
754771
if (page_increm > nr_pages)
755772
page_increm = nr_pages;
756773
i += page_increm;
757774
start += page_increm * PAGE_SIZE;
758775
nr_pages -= page_increm;
759776
} while (nr_pages);
760-
return i;
777+
out:
778+
if (ctx.pgmap)
779+
put_dev_pagemap(ctx.pgmap);
780+
return i ? i : ret;
761781
}
762782

763783
static bool vma_permits_fault(struct vm_area_struct *vma,

mm/huge_memory.c

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -852,11 +852,10 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
852852
}
853853

854854
struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
855-
pmd_t *pmd, int flags)
855+
pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
856856
{
857857
unsigned long pfn = pmd_pfn(*pmd);
858858
struct mm_struct *mm = vma->vm_mm;
859-
struct dev_pagemap *pgmap;
860859
struct page *page;
861860

862861
assert_spin_locked(pmd_lockptr(mm, pmd));
@@ -886,12 +885,11 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
886885
return ERR_PTR(-EEXIST);
887886

888887
pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
889-
pgmap = get_dev_pagemap(pfn, NULL);
890-
if (!pgmap)
888+
*pgmap = get_dev_pagemap(pfn, *pgmap);
889+
if (!*pgmap)
891890
return ERR_PTR(-EFAULT);
892891
page = pfn_to_page(pfn);
893892
get_page(page);
894-
put_dev_pagemap(pgmap);
895893

896894
return page;
897895
}
@@ -1000,11 +998,10 @@ static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
1000998
}
1001999

10021000
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
1003-
pud_t *pud, int flags)
1001+
pud_t *pud, int flags, struct dev_pagemap **pgmap)
10041002
{
10051003
unsigned long pfn = pud_pfn(*pud);
10061004
struct mm_struct *mm = vma->vm_mm;
1007-
struct dev_pagemap *pgmap;
10081005
struct page *page;
10091006

10101007
assert_spin_locked(pud_lockptr(mm, pud));
@@ -1028,12 +1025,11 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
10281025
return ERR_PTR(-EEXIST);
10291026

10301027
pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
1031-
pgmap = get_dev_pagemap(pfn, NULL);
1032-
if (!pgmap)
1028+
*pgmap = get_dev_pagemap(pfn, *pgmap);
1029+
if (!*pgmap)
10331030
return ERR_PTR(-EFAULT);
10341031
page = pfn_to_page(pfn);
10351032
get_page(page);
1036-
put_dev_pagemap(pgmap);
10371033

10381034
return page;
10391035
}

0 commit comments

Comments
 (0)