Skip to content

Commit 6f4576e

Browse files
Naoya Horiguchitorvalds
authored andcommitted
mempolicy: apply page table walker on queue_pages_range()
queue_pages_range() does page table walking in its own way now, but there is some code duplicate. This patch applies page table walker to reduce lines of code. queue_pages_range() has to do some precheck to determine whether we really walk over the vma or just skip it. Now we have test_walk() callback in mm_walk for this purpose, so we can do this replacement cleanly. queue_pages_test_walk() depends on not only the current vma but also the previous one, so queue_pages->prev is introduced to remember it. Signed-off-by: Naoya Horiguchi <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Cyrill Gorcunov <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Kirill A. Shutemov <[email protected]> Cc: Pavel Emelyanov <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 1757bbd commit 6f4576e

File tree

1 file changed

+92
-136
lines changed

1 file changed

+92
-136
lines changed

mm/mempolicy.c

Lines changed: 92 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -471,24 +471,34 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
471471
static void migrate_page_add(struct page *page, struct list_head *pagelist,
472472
unsigned long flags);
473473

474+
struct queue_pages {
475+
struct list_head *pagelist;
476+
unsigned long flags;
477+
nodemask_t *nmask;
478+
struct vm_area_struct *prev;
479+
};
480+
474481
/*
475482
* Scan through pages checking if pages follow certain conditions,
476483
* and move them to the pagelist if they do.
477484
*/
478-
static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
479-
unsigned long addr, unsigned long end,
480-
const nodemask_t *nodes, unsigned long flags,
481-
void *private)
485+
static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
486+
unsigned long end, struct mm_walk *walk)
482487
{
483-
pte_t *orig_pte;
488+
struct vm_area_struct *vma = walk->vma;
489+
struct page *page;
490+
struct queue_pages *qp = walk->private;
491+
unsigned long flags = qp->flags;
492+
int nid;
484493
pte_t *pte;
485494
spinlock_t *ptl;
486495

487-
orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
488-
do {
489-
struct page *page;
490-
int nid;
496+
split_huge_page_pmd(vma, addr, pmd);
497+
if (pmd_trans_unstable(pmd))
498+
return 0;
491499

500+
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
501+
for (; addr != end; pte++, addr += PAGE_SIZE) {
492502
if (!pte_present(*pte))
493503
continue;
494504
page = vm_normal_page(vma, addr, *pte);
@@ -501,114 +511,46 @@ static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
501511
if (PageReserved(page))
502512
continue;
503513
nid = page_to_nid(page);
504-
if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
514+
if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
505515
continue;
506516

507517
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
508-
migrate_page_add(page, private, flags);
509-
else
510-
break;
511-
} while (pte++, addr += PAGE_SIZE, addr != end);
512-
pte_unmap_unlock(orig_pte, ptl);
513-
return addr != end;
518+
migrate_page_add(page, qp->pagelist, flags);
519+
}
520+
pte_unmap_unlock(pte - 1, ptl);
521+
cond_resched();
522+
return 0;
514523
}
515524

516-
static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
517-
pmd_t *pmd, const nodemask_t *nodes, unsigned long flags,
518-
void *private)
525+
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
526+
unsigned long addr, unsigned long end,
527+
struct mm_walk *walk)
519528
{
520529
#ifdef CONFIG_HUGETLB_PAGE
530+
struct queue_pages *qp = walk->private;
531+
unsigned long flags = qp->flags;
521532
int nid;
522533
struct page *page;
523534
spinlock_t *ptl;
524535
pte_t entry;
525536

526-
ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);
527-
entry = huge_ptep_get((pte_t *)pmd);
537+
ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
538+
entry = huge_ptep_get(pte);
528539
if (!pte_present(entry))
529540
goto unlock;
530541
page = pte_page(entry);
531542
nid = page_to_nid(page);
532-
if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
543+
if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
533544
goto unlock;
534545
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
535546
if (flags & (MPOL_MF_MOVE_ALL) ||
536547
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
537-
isolate_huge_page(page, private);
548+
isolate_huge_page(page, qp->pagelist);
538549
unlock:
539550
spin_unlock(ptl);
540551
#else
541552
BUG();
542553
#endif
543-
}
544-
545-
static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud,
546-
unsigned long addr, unsigned long end,
547-
const nodemask_t *nodes, unsigned long flags,
548-
void *private)
549-
{
550-
pmd_t *pmd;
551-
unsigned long next;
552-
553-
pmd = pmd_offset(pud, addr);
554-
do {
555-
next = pmd_addr_end(addr, end);
556-
if (!pmd_present(*pmd))
557-
continue;
558-
if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) {
559-
queue_pages_hugetlb_pmd_range(vma, pmd, nodes,
560-
flags, private);
561-
continue;
562-
}
563-
split_huge_page_pmd(vma, addr, pmd);
564-
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
565-
continue;
566-
if (queue_pages_pte_range(vma, pmd, addr, next, nodes,
567-
flags, private))
568-
return -EIO;
569-
} while (pmd++, addr = next, addr != end);
570-
return 0;
571-
}
572-
573-
static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
574-
unsigned long addr, unsigned long end,
575-
const nodemask_t *nodes, unsigned long flags,
576-
void *private)
577-
{
578-
pud_t *pud;
579-
unsigned long next;
580-
581-
pud = pud_offset(pgd, addr);
582-
do {
583-
next = pud_addr_end(addr, end);
584-
if (pud_huge(*pud) && is_vm_hugetlb_page(vma))
585-
continue;
586-
if (pud_none_or_clear_bad(pud))
587-
continue;
588-
if (queue_pages_pmd_range(vma, pud, addr, next, nodes,
589-
flags, private))
590-
return -EIO;
591-
} while (pud++, addr = next, addr != end);
592-
return 0;
593-
}
594-
595-
static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
596-
unsigned long addr, unsigned long end,
597-
const nodemask_t *nodes, unsigned long flags,
598-
void *private)
599-
{
600-
pgd_t *pgd;
601-
unsigned long next;
602-
603-
pgd = pgd_offset(vma->vm_mm, addr);
604-
do {
605-
next = pgd_addr_end(addr, end);
606-
if (pgd_none_or_clear_bad(pgd))
607-
continue;
608-
if (queue_pages_pud_range(vma, pgd, addr, next, nodes,
609-
flags, private))
610-
return -EIO;
611-
} while (pgd++, addr = next, addr != end);
612554
return 0;
613555
}
614556

@@ -641,6 +583,46 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
641583
}
642584
#endif /* CONFIG_NUMA_BALANCING */
643585

586+
static int queue_pages_test_walk(unsigned long start, unsigned long end,
587+
struct mm_walk *walk)
588+
{
589+
struct vm_area_struct *vma = walk->vma;
590+
struct queue_pages *qp = walk->private;
591+
unsigned long endvma = vma->vm_end;
592+
unsigned long flags = qp->flags;
593+
594+
if (endvma > end)
595+
endvma = end;
596+
if (vma->vm_start > start)
597+
start = vma->vm_start;
598+
599+
if (!(flags & MPOL_MF_DISCONTIG_OK)) {
600+
if (!vma->vm_next && vma->vm_end < end)
601+
return -EFAULT;
602+
if (qp->prev && qp->prev->vm_end < vma->vm_start)
603+
return -EFAULT;
604+
}
605+
606+
qp->prev = vma;
607+
608+
if (vma->vm_flags & VM_PFNMAP)
609+
return 1;
610+
611+
if (flags & MPOL_MF_LAZY) {
612+
/* Similar to task_numa_work, skip inaccessible VMAs */
613+
if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
614+
change_prot_numa(vma, start, endvma);
615+
return 1;
616+
}
617+
618+
if ((flags & MPOL_MF_STRICT) ||
619+
((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
620+
vma_migratable(vma)))
621+
/* queue pages from current vma */
622+
return 0;
623+
return 1;
624+
}
625+
644626
/*
645627
* Walk through page tables and collect pages to be migrated.
646628
*
@@ -650,50 +632,24 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
650632
*/
651633
static int
652634
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
653-
const nodemask_t *nodes, unsigned long flags, void *private)
654-
{
655-
int err = 0;
656-
struct vm_area_struct *vma, *prev;
657-
658-
vma = find_vma(mm, start);
659-
if (!vma)
660-
return -EFAULT;
661-
prev = NULL;
662-
for (; vma && vma->vm_start < end; vma = vma->vm_next) {
663-
unsigned long endvma = vma->vm_end;
664-
665-
if (endvma > end)
666-
endvma = end;
667-
if (vma->vm_start > start)
668-
start = vma->vm_start;
669-
670-
if (!(flags & MPOL_MF_DISCONTIG_OK)) {
671-
if (!vma->vm_next && vma->vm_end < end)
672-
return -EFAULT;
673-
if (prev && prev->vm_end < vma->vm_start)
674-
return -EFAULT;
675-
}
676-
677-
if (flags & MPOL_MF_LAZY) {
678-
/* Similar to task_numa_work, skip inaccessible VMAs */
679-
if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
680-
change_prot_numa(vma, start, endvma);
681-
goto next;
682-
}
683-
684-
if ((flags & MPOL_MF_STRICT) ||
685-
((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
686-
vma_migratable(vma))) {
687-
688-
err = queue_pages_pgd_range(vma, start, endvma, nodes,
689-
flags, private);
690-
if (err)
691-
break;
692-
}
693-
next:
694-
prev = vma;
695-
}
696-
return err;
635+
nodemask_t *nodes, unsigned long flags,
636+
struct list_head *pagelist)
637+
{
638+
struct queue_pages qp = {
639+
.pagelist = pagelist,
640+
.flags = flags,
641+
.nmask = nodes,
642+
.prev = NULL,
643+
};
644+
struct mm_walk queue_pages_walk = {
645+
.hugetlb_entry = queue_pages_hugetlb,
646+
.pmd_entry = queue_pages_pte_range,
647+
.test_walk = queue_pages_test_walk,
648+
.mm = mm,
649+
.private = &qp,
650+
};
651+
652+
return walk_page_range(start, end, &queue_pages_walk);
697653
}
698654

699655
/*

0 commit comments

Comments
 (0)