Skip to content

Commit b2c9e2f

Browse files
x-y-zakpm00
authored andcommitted
mm: make alloc_contig_range work at pageblock granularity
alloc_contig_range() worked at MAX_ORDER_NR_PAGES granularity to avoid merging pageblocks with different migratetypes. It might unnecessarily convert extra pageblocks at the beginning and at the end of the range. Change alloc_contig_range() to work at pageblock granularity. Special handling is needed for free pages and in-use pages across the boundaries of the range specified by alloc_contig_range(). Because these= Partially isolated pages causes free page accounting issues. The free pages will be split and freed into separate migratetype lists; the in-use= Pages will be migrated then the freed pages will be handled in the aforementioned way. [[email protected]: fix deadlock/crash] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Zi Yan <[email protected]> Reported-by: kernel test robot <[email protected]> Cc: Christophe Leroy <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Eric Ren <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Oscar Salvador <[email protected]> Cc: Vlastimil Babka <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 844fbae commit b2c9e2f

File tree

5 files changed

+242
-18
lines changed

5 files changed

+242
-18
lines changed

include/linux/page-isolation.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ int move_freepages_block(struct zone *zone, struct page *page,
4242
*/
4343
int
4444
start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
45-
unsigned migratetype, int flags);
45+
int migratetype, int flags, gfp_t gfp_flags);
4646

4747
/*
4848
* Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
4949
* target range is [start_pfn, end_pfn)
5050
*/
5151
void
5252
undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
53-
unsigned migratetype);
53+
int migratetype);
5454

5555
/*
5656
* Test all pages in [start_pfn, end_pfn) are isolated or not.

mm/internal.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,9 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
359359
phys_addr_t min_addr,
360360
int nid, bool exact_nid);
361361

362+
void split_free_page(struct page *free_page,
363+
int order, unsigned long split_pfn_offset);
364+
362365
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
363366

364367
/*
@@ -422,6 +425,9 @@ isolate_freepages_range(struct compact_control *cc,
422425
int
423426
isolate_migratepages_range(struct compact_control *cc,
424427
unsigned long low_pfn, unsigned long end_pfn);
428+
429+
int __alloc_contig_migrate_range(struct compact_control *cc,
430+
unsigned long start, unsigned long end);
425431
#endif
426432
int find_suitable_fallback(struct free_area *area, unsigned int order,
427433
int migratetype, bool only_stealable, bool *can_steal);

mm/memory_hotplug.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1837,7 +1837,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
18371837
/* set above range as isolated */
18381838
ret = start_isolate_page_range(start_pfn, end_pfn,
18391839
MIGRATE_MOVABLE,
1840-
MEMORY_OFFLINE | REPORT_FAILURE);
1840+
MEMORY_OFFLINE | REPORT_FAILURE,
1841+
GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL);
18411842
if (ret) {
18421843
reason = "failure to isolate range";
18431844
goto failed_removal_pcplists_disabled;

mm/page_alloc.c

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,43 @@ static inline void __free_one_page(struct page *page,
10941094
page_reporting_notify_free(order);
10951095
}
10961096

1097+
/**
1098+
* split_free_page() -- split a free page at split_pfn_offset
1099+
* @free_page: the original free page
1100+
* @order: the order of the page
1101+
* @split_pfn_offset: split offset within the page
1102+
*
1103+
* It is used when the free page crosses two pageblocks with different migratetypes
1104+
* at split_pfn_offset within the page. The split free page will be put into
1105+
* separate migratetype lists afterwards. Otherwise, the function achieves
1106+
* nothing.
1107+
*/
1108+
void split_free_page(struct page *free_page,
1109+
int order, unsigned long split_pfn_offset)
1110+
{
1111+
struct zone *zone = page_zone(free_page);
1112+
unsigned long free_page_pfn = page_to_pfn(free_page);
1113+
unsigned long pfn;
1114+
unsigned long flags;
1115+
int free_page_order;
1116+
1117+
spin_lock_irqsave(&zone->lock, flags);
1118+
del_page_from_free_list(free_page, zone, order);
1119+
for (pfn = free_page_pfn;
1120+
pfn < free_page_pfn + (1UL << order);) {
1121+
int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
1122+
1123+
free_page_order = ffs(split_pfn_offset) - 1;
1124+
__free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
1125+
mt, FPI_NONE);
1126+
pfn += 1UL << free_page_order;
1127+
split_pfn_offset -= (1UL << free_page_order);
1128+
/* we have done the first part, now switch to second part */
1129+
if (split_pfn_offset == 0)
1130+
split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
1131+
}
1132+
spin_unlock_irqrestore(&zone->lock, flags);
1133+
}
10971134
/*
10981135
* A bad page could be due to a number of fields. Instead of multiple branches,
10991136
* try and check multiple fields with one check. The caller must do a detailed
@@ -8951,7 +8988,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list)
89518988
#endif
89528989

89538990
/* [start, end) must belong to a single zone. */
8954-
static int __alloc_contig_migrate_range(struct compact_control *cc,
8991+
int __alloc_contig_migrate_range(struct compact_control *cc,
89558992
unsigned long start, unsigned long end)
89568993
{
89578994
/* This function is based on compact_zone() from compaction.c. */
@@ -9034,7 +9071,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
90349071
unsigned migratetype, gfp_t gfp_mask)
90359072
{
90369073
unsigned long outer_start, outer_end;
9037-
unsigned int order;
9074+
int order;
90389075
int ret = 0;
90399076

90409077
struct compact_control cc = {
@@ -9053,14 +9090,11 @@ int alloc_contig_range(unsigned long start, unsigned long end,
90539090
* What we do here is we mark all pageblocks in range as
90549091
* MIGRATE_ISOLATE. Because pageblock and max order pages may
90559092
* have different sizes, and due to the way page allocator
9056-
* work, we align the range to biggest of the two pages so
9057-
* that page allocator won't try to merge buddies from
9058-
* different pageblocks and change MIGRATE_ISOLATE to some
9059-
* other migration type.
9093+
* work, start_isolate_page_range() has special handlings for this.
90609094
*
90619095
* Once the pageblocks are marked as MIGRATE_ISOLATE, we
90629096
* migrate the pages from an unaligned range (ie. pages that
9063-
* we are interested in). This will put all the pages in
9097+
* we are interested in). This will put all the pages in
90649098
* range back to page allocator as MIGRATE_ISOLATE.
90659099
*
90669100
* When this is done, we take the pages in range from page
@@ -9074,9 +9108,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
90749108
*/
90759109

90769110
ret = start_isolate_page_range(pfn_max_align_down(start),
9077-
pfn_max_align_up(end), migratetype, 0);
9111+
pfn_max_align_up(end), migratetype, 0, gfp_mask);
90789112
if (ret)
9079-
return ret;
9113+
goto done;
90809114

90819115
drain_all_pages(cc.zone);
90829116

@@ -9096,7 +9130,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
90969130
ret = 0;
90979131

90989132
/*
9099-
* Pages from [start, end) are within a MAX_ORDER_NR_PAGES
9133+
* Pages from [start, end) are within a pageblock_nr_pages
91009134
* aligned blocks that are marked as MIGRATE_ISOLATE. What's
91019135
* more, all pages in [start, end) are free in page allocator.
91029136
* What we are going to do is to allocate all pages from

mm/page_isolation.c

Lines changed: 188 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
203203
return -EBUSY;
204204
}
205205

206-
static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
206+
static void unset_migratetype_isolate(struct page *page, int migratetype)
207207
{
208208
struct zone *zone;
209209
unsigned long flags, nr_pages;
@@ -279,6 +279,166 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
279279
return NULL;
280280
}
281281

282+
/**
283+
* isolate_single_pageblock() -- tries to isolate a pageblock that might be
284+
* within a free or in-use page.
285+
* @boundary_pfn: pageblock-aligned pfn that a page might cross
286+
* @gfp_flags: GFP flags used for migrating pages
287+
* @isolate_before: isolate the pageblock before the boundary_pfn
288+
*
289+
* Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
290+
* pageblock. When not all pageblocks within a page are isolated at the same
291+
* time, free page accounting can go wrong. For example, in the case of
292+
* MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
293+
* [ MAX_ORDER-1 ]
294+
* [ pageblock0 | pageblock1 ]
295+
* When either pageblock is isolated, if it is a free page, the page is not
296+
* split into separate migratetype lists, which is supposed to; if it is an
297+
* in-use page and freed later, __free_one_page() does not split the free page
298+
* either. The function handles this by splitting the free page or migrating
299+
* the in-use page then splitting the free page.
300+
*/
301+
static int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
302+
bool isolate_before)
303+
{
304+
unsigned char saved_mt;
305+
unsigned long start_pfn;
306+
unsigned long isolate_pageblock;
307+
unsigned long pfn;
308+
struct zone *zone;
309+
310+
VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
311+
312+
if (isolate_before)
313+
isolate_pageblock = boundary_pfn - pageblock_nr_pages;
314+
else
315+
isolate_pageblock = boundary_pfn;
316+
317+
/*
318+
* scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid
319+
* only isolating a subset of pageblocks from a bigger than pageblock
320+
* free or in-use page. Also make sure all to-be-isolated pageblocks
321+
* are within the same zone.
322+
*/
323+
zone = page_zone(pfn_to_page(isolate_pageblock));
324+
start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
325+
zone->zone_start_pfn);
326+
327+
saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
328+
set_pageblock_migratetype(pfn_to_page(isolate_pageblock), MIGRATE_ISOLATE);
329+
330+
/*
331+
* Bail out early when the to-be-isolated pageblock does not form
332+
* a free or in-use page across boundary_pfn:
333+
*
334+
* 1. isolate before boundary_pfn: the page after is not online
335+
* 2. isolate after boundary_pfn: the page before is not online
336+
*
337+
* This also ensures correctness. Without it, when isolate after
338+
* boundary_pfn and [start_pfn, boundary_pfn) are not online,
339+
* __first_valid_page() will return unexpected NULL in the for loop
340+
* below.
341+
*/
342+
if (isolate_before) {
343+
if (!pfn_to_online_page(boundary_pfn))
344+
return 0;
345+
} else {
346+
if (!pfn_to_online_page(boundary_pfn - 1))
347+
return 0;
348+
}
349+
350+
for (pfn = start_pfn; pfn < boundary_pfn;) {
351+
struct page *page = __first_valid_page(pfn, boundary_pfn - pfn);
352+
353+
VM_BUG_ON(!page);
354+
pfn = page_to_pfn(page);
355+
/*
356+
* start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any
357+
* free pages in [start_pfn, boundary_pfn), its head page will
358+
* always be in the range.
359+
*/
360+
if (PageBuddy(page)) {
361+
int order = buddy_order(page);
362+
363+
if (pfn + (1UL << order) > boundary_pfn)
364+
split_free_page(page, order, boundary_pfn - pfn);
365+
pfn += (1UL << order);
366+
continue;
367+
}
368+
/*
369+
* migrate compound pages then let the free page handling code
370+
* above do the rest. If migration is not possible, just fail.
371+
*/
372+
if (PageCompound(page)) {
373+
unsigned long nr_pages = compound_nr(page);
374+
struct page *head = compound_head(page);
375+
unsigned long head_pfn = page_to_pfn(head);
376+
377+
if (head_pfn + nr_pages < boundary_pfn) {
378+
pfn = head_pfn + nr_pages;
379+
continue;
380+
}
381+
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
382+
/*
383+
* hugetlb, lru compound (THP), and movable compound pages
384+
* can be migrated. Otherwise, fail the isolation.
385+
*/
386+
if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) {
387+
int order;
388+
unsigned long outer_pfn;
389+
int ret;
390+
struct compact_control cc = {
391+
.nr_migratepages = 0,
392+
.order = -1,
393+
.zone = page_zone(pfn_to_page(head_pfn)),
394+
.mode = MIGRATE_SYNC,
395+
.ignore_skip_hint = true,
396+
.no_set_skip_hint = true,
397+
.gfp_mask = gfp_flags,
398+
.alloc_contig = true,
399+
};
400+
INIT_LIST_HEAD(&cc.migratepages);
401+
402+
ret = __alloc_contig_migrate_range(&cc, head_pfn,
403+
head_pfn + nr_pages);
404+
405+
if (ret)
406+
goto failed;
407+
/*
408+
* reset pfn to the head of the free page, so
409+
* that the free page handling code above can split
410+
* the free page to the right migratetype list.
411+
*
412+
* head_pfn is not used here as a hugetlb page order
413+
* can be bigger than MAX_ORDER-1, but after it is
414+
* freed, the free page order is not. Use pfn within
415+
* the range to find the head of the free page.
416+
*/
417+
order = 0;
418+
outer_pfn = pfn;
419+
while (!PageBuddy(pfn_to_page(outer_pfn))) {
420+
if (++order >= MAX_ORDER) {
421+
outer_pfn = pfn;
422+
break;
423+
}
424+
outer_pfn &= ~0UL << order;
425+
}
426+
pfn = outer_pfn;
427+
continue;
428+
} else
429+
#endif
430+
goto failed;
431+
}
432+
433+
pfn++;
434+
}
435+
return 0;
436+
failed:
437+
/* restore the original migratetype */
438+
set_pageblock_migratetype(pfn_to_page(isolate_pageblock), saved_mt);
439+
return -EBUSY;
440+
}
441+
282442
/**
283443
* start_isolate_page_range() - make page-allocation-type of range of pages to
284444
* be MIGRATE_ISOLATE.
@@ -293,6 +453,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
293453
* and PageOffline() pages.
294454
* REPORT_FAILURE - report details about the failure to
295455
* isolate the range
456+
* @gfp_flags: GFP flags used for migrating pages that sit across the
457+
* range boundaries.
296458
*
297459
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
298460
* the range will never be allocated. Any free pages and pages freed in the
@@ -301,6 +463,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
301463
* pages in the range finally, the caller have to free all pages in the range.
302464
* test_page_isolated() can be used for test it.
303465
*
466+
* The function first tries to isolate the pageblocks at the beginning and end
467+
* of the range, since there might be pages across the range boundaries.
468+
* Afterwards, it isolates the rest of the range.
469+
*
304470
* There is no high level synchronization mechanism that prevents two threads
305471
* from trying to isolate overlapping ranges. If this happens, one thread
306472
* will notice pageblocks in the overlapping range already set to isolate.
@@ -321,21 +487,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
321487
* Return: 0 on success and -EBUSY if any part of range cannot be isolated.
322488
*/
323489
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
324-
unsigned migratetype, int flags)
490+
int migratetype, int flags, gfp_t gfp_flags)
325491
{
326492
unsigned long pfn;
327493
struct page *page;
494+
int ret;
328495

329496
BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
330497
BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
331498

332-
for (pfn = start_pfn;
333-
pfn < end_pfn;
499+
/* isolate [start_pfn, start_pfn + pageblock_nr_pages) pageblock */
500+
ret = isolate_single_pageblock(start_pfn, gfp_flags, false);
501+
if (ret)
502+
return ret;
503+
504+
/* isolate [end_pfn - pageblock_nr_pages, end_pfn) pageblock */
505+
ret = isolate_single_pageblock(end_pfn, gfp_flags, true);
506+
if (ret) {
507+
unset_migratetype_isolate(pfn_to_page(start_pfn), migratetype);
508+
return ret;
509+
}
510+
511+
/* skip isolated pageblocks at the beginning and end */
512+
for (pfn = start_pfn + pageblock_nr_pages;
513+
pfn < end_pfn - pageblock_nr_pages;
334514
pfn += pageblock_nr_pages) {
335515
page = __first_valid_page(pfn, pageblock_nr_pages);
336516
if (page && set_migratetype_isolate(page, migratetype, flags,
337517
start_pfn, end_pfn)) {
338518
undo_isolate_page_range(start_pfn, pfn, migratetype);
519+
unset_migratetype_isolate(
520+
pfn_to_page(end_pfn - pageblock_nr_pages),
521+
migratetype);
339522
return -EBUSY;
340523
}
341524
}
@@ -346,7 +529,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
346529
* Make isolated pages available again.
347530
*/
348531
void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
349-
unsigned migratetype)
532+
int migratetype)
350533
{
351534
unsigned long pfn;
352535
struct page *page;

0 commit comments

Comments
 (0)