@@ -471,24 +471,34 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
471
471
static void migrate_page_add (struct page * page , struct list_head * pagelist ,
472
472
unsigned long flags );
473
473
474
+ struct queue_pages {
475
+ struct list_head * pagelist ;
476
+ unsigned long flags ;
477
+ nodemask_t * nmask ;
478
+ struct vm_area_struct * prev ;
479
+ };
480
+
474
481
/*
475
482
* Scan through pages checking if pages follow certain conditions,
476
483
* and move them to the pagelist if they do.
477
484
*/
478
- static int queue_pages_pte_range (struct vm_area_struct * vma , pmd_t * pmd ,
479
- unsigned long addr , unsigned long end ,
480
- const nodemask_t * nodes , unsigned long flags ,
481
- void * private )
485
+ static int queue_pages_pte_range (pmd_t * pmd , unsigned long addr ,
486
+ unsigned long end , struct mm_walk * walk )
482
487
{
483
- pte_t * orig_pte ;
488
+ struct vm_area_struct * vma = walk -> vma ;
489
+ struct page * page ;
490
+ struct queue_pages * qp = walk -> private ;
491
+ unsigned long flags = qp -> flags ;
492
+ int nid ;
484
493
pte_t * pte ;
485
494
spinlock_t * ptl ;
486
495
487
- orig_pte = pte = pte_offset_map_lock (vma -> vm_mm , pmd , addr , & ptl );
488
- do {
489
- struct page * page ;
490
- int nid ;
496
+ split_huge_page_pmd (vma , addr , pmd );
497
+ if (pmd_trans_unstable (pmd ))
498
+ return 0 ;
491
499
500
+ pte = pte_offset_map_lock (walk -> mm , pmd , addr , & ptl );
501
+ for (; addr != end ; pte ++ , addr += PAGE_SIZE ) {
492
502
if (!pte_present (* pte ))
493
503
continue ;
494
504
page = vm_normal_page (vma , addr , * pte );
@@ -501,114 +511,46 @@ static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
501
511
if (PageReserved (page ))
502
512
continue ;
503
513
nid = page_to_nid (page );
504
- if (node_isset (nid , * nodes ) == !!(flags & MPOL_MF_INVERT ))
514
+ if (node_isset (nid , * qp -> nmask ) == !!(flags & MPOL_MF_INVERT ))
505
515
continue ;
506
516
507
517
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL ))
508
- migrate_page_add (page , private , flags );
509
- else
510
- break ;
511
- } while (pte ++ , addr += PAGE_SIZE , addr != end );
512
- pte_unmap_unlock (orig_pte , ptl );
513
- return addr != end ;
518
+ migrate_page_add (page , qp -> pagelist , flags );
519
+ }
520
+ pte_unmap_unlock (pte - 1 , ptl );
521
+ cond_resched ();
522
+ return 0 ;
514
523
}
515
524
516
- static void queue_pages_hugetlb_pmd_range ( struct vm_area_struct * vma ,
517
- pmd_t * pmd , const nodemask_t * nodes , unsigned long flags ,
518
- void * private )
525
+ static int queue_pages_hugetlb ( pte_t * pte , unsigned long hmask ,
526
+ unsigned long addr , unsigned long end ,
527
+ struct mm_walk * walk )
519
528
{
520
529
#ifdef CONFIG_HUGETLB_PAGE
530
+ struct queue_pages * qp = walk -> private ;
531
+ unsigned long flags = qp -> flags ;
521
532
int nid ;
522
533
struct page * page ;
523
534
spinlock_t * ptl ;
524
535
pte_t entry ;
525
536
526
- ptl = huge_pte_lock (hstate_vma (vma ), vma -> vm_mm , ( pte_t * ) pmd );
527
- entry = huge_ptep_get (( pte_t * ) pmd );
537
+ ptl = huge_pte_lock (hstate_vma (walk -> vma ), walk -> mm , pte );
538
+ entry = huge_ptep_get (pte );
528
539
if (!pte_present (entry ))
529
540
goto unlock ;
530
541
page = pte_page (entry );
531
542
nid = page_to_nid (page );
532
- if (node_isset (nid , * nodes ) == !!(flags & MPOL_MF_INVERT ))
543
+ if (node_isset (nid , * qp -> nmask ) == !!(flags & MPOL_MF_INVERT ))
533
544
goto unlock ;
534
545
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
535
546
if (flags & (MPOL_MF_MOVE_ALL ) ||
536
547
(flags & MPOL_MF_MOVE && page_mapcount (page ) == 1 ))
537
- isolate_huge_page (page , private );
548
+ isolate_huge_page (page , qp -> pagelist );
538
549
unlock :
539
550
spin_unlock (ptl );
540
551
#else
541
552
BUG ();
542
553
#endif
543
- }
544
-
545
- static inline int queue_pages_pmd_range (struct vm_area_struct * vma , pud_t * pud ,
546
- unsigned long addr , unsigned long end ,
547
- const nodemask_t * nodes , unsigned long flags ,
548
- void * private )
549
- {
550
- pmd_t * pmd ;
551
- unsigned long next ;
552
-
553
- pmd = pmd_offset (pud , addr );
554
- do {
555
- next = pmd_addr_end (addr , end );
556
- if (!pmd_present (* pmd ))
557
- continue ;
558
- if (pmd_huge (* pmd ) && is_vm_hugetlb_page (vma )) {
559
- queue_pages_hugetlb_pmd_range (vma , pmd , nodes ,
560
- flags , private );
561
- continue ;
562
- }
563
- split_huge_page_pmd (vma , addr , pmd );
564
- if (pmd_none_or_trans_huge_or_clear_bad (pmd ))
565
- continue ;
566
- if (queue_pages_pte_range (vma , pmd , addr , next , nodes ,
567
- flags , private ))
568
- return - EIO ;
569
- } while (pmd ++ , addr = next , addr != end );
570
- return 0 ;
571
- }
572
-
573
- static inline int queue_pages_pud_range (struct vm_area_struct * vma , pgd_t * pgd ,
574
- unsigned long addr , unsigned long end ,
575
- const nodemask_t * nodes , unsigned long flags ,
576
- void * private )
577
- {
578
- pud_t * pud ;
579
- unsigned long next ;
580
-
581
- pud = pud_offset (pgd , addr );
582
- do {
583
- next = pud_addr_end (addr , end );
584
- if (pud_huge (* pud ) && is_vm_hugetlb_page (vma ))
585
- continue ;
586
- if (pud_none_or_clear_bad (pud ))
587
- continue ;
588
- if (queue_pages_pmd_range (vma , pud , addr , next , nodes ,
589
- flags , private ))
590
- return - EIO ;
591
- } while (pud ++ , addr = next , addr != end );
592
- return 0 ;
593
- }
594
-
595
- static inline int queue_pages_pgd_range (struct vm_area_struct * vma ,
596
- unsigned long addr , unsigned long end ,
597
- const nodemask_t * nodes , unsigned long flags ,
598
- void * private )
599
- {
600
- pgd_t * pgd ;
601
- unsigned long next ;
602
-
603
- pgd = pgd_offset (vma -> vm_mm , addr );
604
- do {
605
- next = pgd_addr_end (addr , end );
606
- if (pgd_none_or_clear_bad (pgd ))
607
- continue ;
608
- if (queue_pages_pud_range (vma , pgd , addr , next , nodes ,
609
- flags , private ))
610
- return - EIO ;
611
- } while (pgd ++ , addr = next , addr != end );
612
554
return 0 ;
613
555
}
614
556
@@ -641,6 +583,46 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
641
583
}
642
584
#endif /* CONFIG_NUMA_BALANCING */
643
585
586
+ static int queue_pages_test_walk (unsigned long start , unsigned long end ,
587
+ struct mm_walk * walk )
588
+ {
589
+ struct vm_area_struct * vma = walk -> vma ;
590
+ struct queue_pages * qp = walk -> private ;
591
+ unsigned long endvma = vma -> vm_end ;
592
+ unsigned long flags = qp -> flags ;
593
+
594
+ if (endvma > end )
595
+ endvma = end ;
596
+ if (vma -> vm_start > start )
597
+ start = vma -> vm_start ;
598
+
599
+ if (!(flags & MPOL_MF_DISCONTIG_OK )) {
600
+ if (!vma -> vm_next && vma -> vm_end < end )
601
+ return - EFAULT ;
602
+ if (qp -> prev && qp -> prev -> vm_end < vma -> vm_start )
603
+ return - EFAULT ;
604
+ }
605
+
606
+ qp -> prev = vma ;
607
+
608
+ if (vma -> vm_flags & VM_PFNMAP )
609
+ return 1 ;
610
+
611
+ if (flags & MPOL_MF_LAZY ) {
612
+ /* Similar to task_numa_work, skip inaccessible VMAs */
613
+ if (vma -> vm_flags & (VM_READ | VM_EXEC | VM_WRITE ))
614
+ change_prot_numa (vma , start , endvma );
615
+ return 1 ;
616
+ }
617
+
618
+ if ((flags & MPOL_MF_STRICT ) ||
619
+ ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL )) &&
620
+ vma_migratable (vma )))
621
+ /* queue pages from current vma */
622
+ return 0 ;
623
+ return 1 ;
624
+ }
625
+
644
626
/*
645
627
* Walk through page tables and collect pages to be migrated.
646
628
*
@@ -650,50 +632,24 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
650
632
*/
651
633
static int
652
634
queue_pages_range (struct mm_struct * mm , unsigned long start , unsigned long end ,
653
- const nodemask_t * nodes , unsigned long flags , void * private )
654
- {
655
- int err = 0 ;
656
- struct vm_area_struct * vma , * prev ;
657
-
658
- vma = find_vma (mm , start );
659
- if (!vma )
660
- return - EFAULT ;
661
- prev = NULL ;
662
- for (; vma && vma -> vm_start < end ; vma = vma -> vm_next ) {
663
- unsigned long endvma = vma -> vm_end ;
664
-
665
- if (endvma > end )
666
- endvma = end ;
667
- if (vma -> vm_start > start )
668
- start = vma -> vm_start ;
669
-
670
- if (!(flags & MPOL_MF_DISCONTIG_OK )) {
671
- if (!vma -> vm_next && vma -> vm_end < end )
672
- return - EFAULT ;
673
- if (prev && prev -> vm_end < vma -> vm_start )
674
- return - EFAULT ;
675
- }
676
-
677
- if (flags & MPOL_MF_LAZY ) {
678
- /* Similar to task_numa_work, skip inaccessible VMAs */
679
- if (vma -> vm_flags & (VM_READ | VM_EXEC | VM_WRITE ))
680
- change_prot_numa (vma , start , endvma );
681
- goto next ;
682
- }
683
-
684
- if ((flags & MPOL_MF_STRICT ) ||
685
- ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL )) &&
686
- vma_migratable (vma ))) {
687
-
688
- err = queue_pages_pgd_range (vma , start , endvma , nodes ,
689
- flags , private );
690
- if (err )
691
- break ;
692
- }
693
- next :
694
- prev = vma ;
695
- }
696
- return err ;
635
+ nodemask_t * nodes , unsigned long flags ,
636
+ struct list_head * pagelist )
637
+ {
638
+ struct queue_pages qp = {
639
+ .pagelist = pagelist ,
640
+ .flags = flags ,
641
+ .nmask = nodes ,
642
+ .prev = NULL ,
643
+ };
644
+ struct mm_walk queue_pages_walk = {
645
+ .hugetlb_entry = queue_pages_hugetlb ,
646
+ .pmd_entry = queue_pages_pte_range ,
647
+ .test_walk = queue_pages_test_walk ,
648
+ .mm = mm ,
649
+ .private = & qp ,
650
+ };
651
+
652
+ return walk_page_range (start , end , & queue_pages_walk );
697
653
}
698
654
699
655
/*
0 commit comments