Skip to content

Commit 49d2e9c

Browse files
Christoph LameterLinus Torvalds
authored andcommitted
[PATCH] Swap Migration V5: migrate_pages() function
This adds the basic page migration function with a minimal implementation that only allows the eviction of pages to swap space. Page eviction and migration may be useful to migrate pages, to suspend programs or for remapping single pages (useful for faulty pages or pages with soft ECC failures) The process is as follows: The function wanting to migrate pages must first build a list of pages to be migrated or evicted and take them off the lru lists via isolate_lru_page(). isolate_lru_page determines that a page is freeable based on the LRU bit set. Then the actual migration or swapout can happen by calling migrate_pages(). migrate_pages does its best to migrate or swapout the pages and does multiple passes over the list. Some pages may only be swappable if they are not dirty. migrate_pages may start writing out dirty pages in the initial passes over the pages. However, migrate_pages may not be able to migrate or evict all pages for a variety of reasons. The remaining pages may be returned to the LRU lists using putback_lru_pages(). Changelog V4->V5: - Use the lru caches to return pages to the LRU Changelog V3->V4: - Restructure code so that applying patches to support full migration does require minimal changes. Rename swapout_pages() to migrate_pages(). Changelog V2->V3: - Extract common code from shrink_list() and swapout_pages() Signed-off-by: Mike Kravetz <[email protected]> Signed-off-by: Christoph Lameter <[email protected]> Cc: "Michael Kerrisk" <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 930d915 commit 49d2e9c

File tree

2 files changed

+182
-34
lines changed

2 files changed

+182
-34
lines changed

include/linux/swap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ extern int vm_swappiness;
178178
extern int isolate_lru_page(struct page *p);
179179
extern int putback_lru_pages(struct list_head *l);
180180

181+
extern int migrate_pages(struct list_head *l, struct list_head *t);
182+
181183
#ifdef CONFIG_MMU
182184
/* linux/mm/shmem.c */
183185
extern int shmem_unuse(swp_entry_t entry, struct page *page);

mm/vmscan.c

Lines changed: 180 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
373373
return PAGE_CLEAN;
374374
}
375375

376+
static int remove_mapping(struct address_space *mapping, struct page *page)
377+
{
378+
if (!mapping)
379+
return 0; /* truncate got there first */
380+
381+
write_lock_irq(&mapping->tree_lock);
382+
383+
/*
384+
* The non-racy check for busy page. It is critical to check
385+
* PageDirty _after_ making sure that the page is freeable and
386+
* not in use by anybody. (pagecache + us == 2)
387+
*/
388+
if (unlikely(page_count(page) != 2))
389+
goto cannot_free;
390+
smp_rmb();
391+
if (unlikely(PageDirty(page)))
392+
goto cannot_free;
393+
394+
if (PageSwapCache(page)) {
395+
swp_entry_t swap = { .val = page_private(page) };
396+
__delete_from_swap_cache(page);
397+
write_unlock_irq(&mapping->tree_lock);
398+
swap_free(swap);
399+
__put_page(page); /* The pagecache ref */
400+
return 1;
401+
}
402+
403+
__remove_from_page_cache(page);
404+
write_unlock_irq(&mapping->tree_lock);
405+
__put_page(page);
406+
return 1;
407+
408+
cannot_free:
409+
write_unlock_irq(&mapping->tree_lock);
410+
return 0;
411+
}
412+
376413
/*
377414
* shrink_list adds the number of reclaimed pages to sc->nr_reclaimed
378415
*/
@@ -504,36 +541,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
504541
goto free_it;
505542
}
506543

507-
if (!mapping)
508-
goto keep_locked; /* truncate got there first */
509-
510-
write_lock_irq(&mapping->tree_lock);
511-
512-
/*
513-
* The non-racy check for busy page. It is critical to check
514-
* PageDirty _after_ making sure that the page is freeable and
515-
* not in use by anybody. (pagecache + us == 2)
516-
*/
517-
if (unlikely(page_count(page) != 2))
518-
goto cannot_free;
519-
smp_rmb();
520-
if (unlikely(PageDirty(page)))
521-
goto cannot_free;
522-
523-
#ifdef CONFIG_SWAP
524-
if (PageSwapCache(page)) {
525-
swp_entry_t swap = { .val = page_private(page) };
526-
__delete_from_swap_cache(page);
527-
write_unlock_irq(&mapping->tree_lock);
528-
swap_free(swap);
529-
__put_page(page); /* The pagecache ref */
530-
goto free_it;
531-
}
532-
#endif /* CONFIG_SWAP */
533-
534-
__remove_from_page_cache(page);
535-
write_unlock_irq(&mapping->tree_lock);
536-
__put_page(page);
544+
if (!remove_mapping(mapping, page))
545+
goto keep_locked;
537546

538547
free_it:
539548
unlock_page(page);
@@ -542,10 +551,6 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
542551
__pagevec_release_nonlru(&freed_pvec);
543552
continue;
544553

545-
cannot_free:
546-
write_unlock_irq(&mapping->tree_lock);
547-
goto keep_locked;
548-
549554
activate_locked:
550555
SetPageActive(page);
551556
pgactivate++;
@@ -563,6 +568,147 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
563568
return reclaimed;
564569
}
565570

571+
/*
572+
* swapout a single page
573+
* page is locked upon entry, unlocked on exit
574+
*
575+
* return codes:
576+
* 0 = complete
577+
* 1 = retry
578+
*/
579+
static int swap_page(struct page *page)
580+
{
581+
struct address_space *mapping = page_mapping(page);
582+
583+
if (page_mapped(page) && mapping)
584+
if (try_to_unmap(page) != SWAP_SUCCESS)
585+
goto unlock_retry;
586+
587+
if (PageDirty(page)) {
588+
/* Page is dirty, try to write it out here */
589+
switch(pageout(page, mapping)) {
590+
case PAGE_KEEP:
591+
case PAGE_ACTIVATE:
592+
goto unlock_retry;
593+
594+
case PAGE_SUCCESS:
595+
goto retry;
596+
597+
case PAGE_CLEAN:
598+
; /* try to free the page below */
599+
}
600+
}
601+
602+
if (PagePrivate(page)) {
603+
if (!try_to_release_page(page, GFP_KERNEL) ||
604+
(!mapping && page_count(page) == 1))
605+
goto unlock_retry;
606+
}
607+
608+
if (remove_mapping(mapping, page)) {
609+
/* Success */
610+
unlock_page(page);
611+
return 0;
612+
}
613+
614+
unlock_retry:
615+
unlock_page(page);
616+
617+
retry:
618+
return 1;
619+
}
620+
/*
621+
* migrate_pages
622+
*
623+
* Two lists are passed to this function. The first list
624+
* contains the pages isolated from the LRU to be migrated.
625+
* The second list contains new pages that the pages isolated
626+
* can be moved to. If the second list is NULL then all
627+
* pages are swapped out.
628+
*
629+
* The function returns after 10 attempts or if no pages
630+
* are movable anymore because t has become empty
631+
* or no retryable pages exist anymore.
632+
*
633+
* SIMPLIFIED VERSION: This implementation of migrate_pages
634+
* is only swapping out pages and never touches the second
635+
* list. The direct migration patchset
636+
* extends this function to avoid the use of swap.
637+
*/
638+
int migrate_pages(struct list_head *l, struct list_head *t)
639+
{
640+
int retry;
641+
LIST_HEAD(failed);
642+
int nr_failed = 0;
643+
int pass = 0;
644+
struct page *page;
645+
struct page *page2;
646+
int swapwrite = current->flags & PF_SWAPWRITE;
647+
648+
if (!swapwrite)
649+
current->flags |= PF_SWAPWRITE;
650+
651+
redo:
652+
retry = 0;
653+
654+
list_for_each_entry_safe(page, page2, l, lru) {
655+
cond_resched();
656+
657+
/*
658+
* Skip locked pages during the first two passes to give the
659+
* functions holding the lock time to release the page. Later we use
660+
* lock_page to have a higher chance of acquiring the lock.
661+
*/
662+
if (pass > 2)
663+
lock_page(page);
664+
else
665+
if (TestSetPageLocked(page))
666+
goto retry_later;
667+
668+
/*
669+
* Only wait on writeback if we have already done a pass where
670+
* we we may have triggered writeouts for lots of pages.
671+
*/
672+
if (pass > 0)
673+
wait_on_page_writeback(page);
674+
else
675+
if (PageWriteback(page)) {
676+
unlock_page(page);
677+
goto retry_later;
678+
}
679+
680+
#ifdef CONFIG_SWAP
681+
if (PageAnon(page) && !PageSwapCache(page)) {
682+
if (!add_to_swap(page)) {
683+
unlock_page(page);
684+
list_move(&page->lru, &failed);
685+
nr_failed++;
686+
continue;
687+
}
688+
}
689+
#endif /* CONFIG_SWAP */
690+
691+
/*
692+
* Page is properly locked and writeback is complete.
693+
* Try to migrate the page.
694+
*/
695+
if (swap_page(page)) {
696+
retry_later:
697+
retry++;
698+
}
699+
}
700+
if (retry && pass++ < 10)
701+
goto redo;
702+
703+
if (!swapwrite)
704+
current->flags &= ~PF_SWAPWRITE;
705+
706+
if (!list_empty(&failed))
707+
list_splice(&failed, l);
708+
709+
return nr_failed + retry;
710+
}
711+
566712
/*
567713
* zone->lru_lock is heavily contended. Some of the functions that
568714
* shrink the lists perform better by taking out a batch of pages

0 commit comments

Comments
 (0)