Skip to content

Commit 94723aa

Browse files
Michal Hockotorvalds
authored andcommitted
mm: unclutter THP migration
THP migration is hacked into the generic migration with rather surprising semantic. The migration allocation callback is supposed to check whether the THP can be migrated at once and if that is not the case then it allocates a simple page to migrate. unmap_and_move then fixes that up by spliting the THP into small pages while moving the head page to the newly allocated order-0 page. Remaning pages are moved to the LRU list by split_huge_page. The same happens if the THP allocation fails. This is really ugly and error prone [1]. I also believe that split_huge_page to the LRU lists is inherently wrong because all tail pages are not migrated. Some callers will just work around that by retrying (e.g. memory hotplug). There are other pfn walkers which are simply broken though. e.g. madvise_inject_error will migrate head and then advances next pfn by the huge page size. do_move_page_to_node_array, queue_pages_range (migrate_pages, mbind), will simply split the THP before migration if the THP migration is not supported then falls back to single page migration but it doesn't handle tail pages if the THP migration path is not able to allocate a fresh THP so we end up with ENOMEM and fail the whole migration which is a questionable behavior. Page compaction doesn't try to migrate large pages so it should be immune. This patch tries to unclutter the situation by moving the special THP handling up to the migrate_pages layer where it actually belongs. We simply split the THP page into the existing list if unmap_and_move fails with ENOMEM and retry. So we will _always_ migrate all THP subpages and specific migrate_pages users do not have to deal with this case in a special way. [1] http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Michal Hocko <[email protected]> Acked-by: Kirill A. Shutemov <[email protected]> Reviewed-by: Zi Yan <[email protected]> Cc: Andrea Reale <[email protected]> Cc: Anshuman Khandual <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Naoya Horiguchi <[email protected]> Cc: Vlastimil Babka <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 666feb2 commit 94723aa

File tree

5 files changed

+36
-41
lines changed

5 files changed

+36
-41
lines changed

include/linux/migrate.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ static inline struct page *new_page_nodemask(struct page *page,
4242
return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
4343
preferred_nid, nodemask);
4444

45-
if (thp_migration_supported() && PageTransHuge(page)) {
46-
order = HPAGE_PMD_ORDER;
45+
if (PageTransHuge(page)) {
4746
gfp_mask |= GFP_TRANSHUGE;
47+
order = HPAGE_PMD_ORDER;
4848
}
4949

5050
if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))

mm/huge_memory.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2401,6 +2401,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
24012401

24022402
page_tail->index = head->index + tail;
24032403
page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
2404+
2405+
/*
2406+
* always add to the tail because some iterators expect new
2407+
* pages to show after the currently processed elements - e.g.
2408+
* migrate_pages
2409+
*/
24042410
lru_add_page_tail(head, page_tail, lruvec, list);
24052411
}
24062412

mm/memory_hotplug.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1372,7 +1372,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
13721372
if (isolate_huge_page(page, &source))
13731373
move_pages -= 1 << compound_order(head);
13741374
continue;
1375-
} else if (thp_migration_supported() && PageTransHuge(page))
1375+
} else if (PageTransHuge(page))
13761376
pfn = page_to_pfn(compound_head(page))
13771377
+ hpage_nr_pages(page) - 1;
13781378

mm/mempolicy.c

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -446,15 +446,6 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
446446
__split_huge_pmd(walk->vma, pmd, addr, false, NULL);
447447
goto out;
448448
}
449-
if (!thp_migration_supported()) {
450-
get_page(page);
451-
spin_unlock(ptl);
452-
lock_page(page);
453-
ret = split_huge_page(page);
454-
unlock_page(page);
455-
put_page(page);
456-
goto out;
457-
}
458449
if (!queue_pages_required(page, qp)) {
459450
ret = 1;
460451
goto unlock;
@@ -495,7 +486,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
495486

496487
if (pmd_trans_unstable(pmd))
497488
return 0;
498-
retry:
489+
499490
pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
500491
for (; addr != end; pte++, addr += PAGE_SIZE) {
501492
if (!pte_present(*pte))
@@ -511,22 +502,6 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
511502
continue;
512503
if (!queue_pages_required(page, qp))
513504
continue;
514-
if (PageTransCompound(page) && !thp_migration_supported()) {
515-
get_page(page);
516-
pte_unmap_unlock(pte, ptl);
517-
lock_page(page);
518-
ret = split_huge_page(page);
519-
unlock_page(page);
520-
put_page(page);
521-
/* Failed to split -- skip. */
522-
if (ret) {
523-
pte = pte_offset_map_lock(walk->mm, pmd,
524-
addr, &ptl);
525-
continue;
526-
}
527-
goto retry;
528-
}
529-
530505
migrate_page_add(page, qp->pagelist, flags);
531506
}
532507
pte_unmap_unlock(pte - 1, ptl);
@@ -948,7 +923,7 @@ struct page *alloc_new_node_page(struct page *page, unsigned long node)
948923
if (PageHuge(page))
949924
return alloc_huge_page_node(page_hstate(compound_head(page)),
950925
node);
951-
else if (thp_migration_supported() && PageTransHuge(page)) {
926+
else if (PageTransHuge(page)) {
952927
struct page *thp;
953928

954929
thp = alloc_pages_node(node,
@@ -1124,7 +1099,7 @@ static struct page *new_page(struct page *page, unsigned long start)
11241099
if (PageHuge(page)) {
11251100
return alloc_huge_page_vma(page_hstate(compound_head(page)),
11261101
vma, address);
1127-
} else if (thp_migration_supported() && PageTransHuge(page)) {
1102+
} else if (PageTransHuge(page)) {
11281103
struct page *thp;
11291104

11301105
thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,

mm/migrate.c

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,9 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
11391139
int rc = MIGRATEPAGE_SUCCESS;
11401140
struct page *newpage;
11411141

1142+
if (!thp_migration_supported() && PageTransHuge(page))
1143+
return -ENOMEM;
1144+
11421145
newpage = get_new_page(page, private);
11431146
if (!newpage)
11441147
return -ENOMEM;
@@ -1160,14 +1163,6 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
11601163
goto out;
11611164
}
11621165

1163-
if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
1164-
lock_page(page);
1165-
rc = split_huge_page(page);
1166-
unlock_page(page);
1167-
if (rc)
1168-
goto out;
1169-
}
1170-
11711166
rc = __unmap_and_move(page, newpage, force, mode);
11721167
if (rc == MIGRATEPAGE_SUCCESS)
11731168
set_page_owner_migrate_reason(newpage, reason);
@@ -1381,6 +1376,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
13811376
retry = 0;
13821377

13831378
list_for_each_entry_safe(page, page2, from, lru) {
1379+
retry:
13841380
cond_resched();
13851381

13861382
if (PageHuge(page))
@@ -1394,6 +1390,26 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
13941390

13951391
switch(rc) {
13961392
case -ENOMEM:
1393+
/*
1394+
* THP migration might be unsupported or the
1395+
* allocation could've failed so we should
1396+
* retry on the same page with the THP split
1397+
* to base pages.
1398+
*
1399+
* Head page is retried immediately and tail
1400+
* pages are added to the tail of the list so
1401+
* we encounter them after the rest of the list
1402+
* is processed.
1403+
*/
1404+
if (PageTransHuge(page)) {
1405+
lock_page(page);
1406+
rc = split_huge_page_to_list(page, from);
1407+
unlock_page(page);
1408+
if (!rc) {
1409+
list_safe_reset_next(page, page2, lru);
1410+
goto retry;
1411+
}
1412+
}
13971413
nr_failed++;
13981414
goto out;
13991415
case -EAGAIN:
@@ -1480,8 +1496,6 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
14801496

14811497
/* FOLL_DUMP to ignore special (like zero) pages */
14821498
follflags = FOLL_GET | FOLL_DUMP;
1483-
if (!thp_migration_supported())
1484-
follflags |= FOLL_SPLIT;
14851499
page = follow_page(vma, addr, follflags);
14861500

14871501
err = PTR_ERR(page);

0 commit comments

Comments
 (0)