@@ -203,7 +203,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
203
203
return - EBUSY ;
204
204
}
205
205
206
- static void unset_migratetype_isolate (struct page * page , unsigned migratetype )
206
+ static void unset_migratetype_isolate (struct page * page , int migratetype )
207
207
{
208
208
struct zone * zone ;
209
209
unsigned long flags , nr_pages ;
@@ -279,6 +279,166 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
279
279
return NULL ;
280
280
}
281
281
282
+ /**
283
+ * isolate_single_pageblock() -- tries to isolate a pageblock that might be
284
+ * within a free or in-use page.
285
+ * @boundary_pfn: pageblock-aligned pfn that a page might cross
286
+ * @gfp_flags: GFP flags used for migrating pages
287
+ * @isolate_before: isolate the pageblock before the boundary_pfn
288
+ *
289
+ * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
290
+ * pageblock. When not all pageblocks within a page are isolated at the same
291
+ * time, free page accounting can go wrong. For example, in the case of
292
+ * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
293
+ * [ MAX_ORDER-1 ]
294
+ * [ pageblock0 | pageblock1 ]
295
+ * When either pageblock is isolated, if it is a free page, the page is not
296
+ * split into separate migratetype lists, which is supposed to; if it is an
297
+ * in-use page and freed later, __free_one_page() does not split the free page
298
+ * either. The function handles this by splitting the free page or migrating
299
+ * the in-use page then splitting the free page.
300
+ */
301
+ static int isolate_single_pageblock (unsigned long boundary_pfn , gfp_t gfp_flags ,
302
+ bool isolate_before )
303
+ {
304
+ unsigned char saved_mt ;
305
+ unsigned long start_pfn ;
306
+ unsigned long isolate_pageblock ;
307
+ unsigned long pfn ;
308
+ struct zone * zone ;
309
+
310
+ VM_BUG_ON (!IS_ALIGNED (boundary_pfn , pageblock_nr_pages ));
311
+
312
+ if (isolate_before )
313
+ isolate_pageblock = boundary_pfn - pageblock_nr_pages ;
314
+ else
315
+ isolate_pageblock = boundary_pfn ;
316
+
317
+ /*
318
+ * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid
319
+ * only isolating a subset of pageblocks from a bigger than pageblock
320
+ * free or in-use page. Also make sure all to-be-isolated pageblocks
321
+ * are within the same zone.
322
+ */
323
+ zone = page_zone (pfn_to_page (isolate_pageblock ));
324
+ start_pfn = max (ALIGN_DOWN (isolate_pageblock , MAX_ORDER_NR_PAGES ),
325
+ zone -> zone_start_pfn );
326
+
327
+ saved_mt = get_pageblock_migratetype (pfn_to_page (isolate_pageblock ));
328
+ set_pageblock_migratetype (pfn_to_page (isolate_pageblock ), MIGRATE_ISOLATE );
329
+
330
+ /*
331
+ * Bail out early when the to-be-isolated pageblock does not form
332
+ * a free or in-use page across boundary_pfn:
333
+ *
334
+ * 1. isolate before boundary_pfn: the page after is not online
335
+ * 2. isolate after boundary_pfn: the page before is not online
336
+ *
337
+ * This also ensures correctness. Without it, when isolate after
338
+ * boundary_pfn and [start_pfn, boundary_pfn) are not online,
339
+ * __first_valid_page() will return unexpected NULL in the for loop
340
+ * below.
341
+ */
342
+ if (isolate_before ) {
343
+ if (!pfn_to_online_page (boundary_pfn ))
344
+ return 0 ;
345
+ } else {
346
+ if (!pfn_to_online_page (boundary_pfn - 1 ))
347
+ return 0 ;
348
+ }
349
+
350
+ for (pfn = start_pfn ; pfn < boundary_pfn ;) {
351
+ struct page * page = __first_valid_page (pfn , boundary_pfn - pfn );
352
+
353
+ VM_BUG_ON (!page );
354
+ pfn = page_to_pfn (page );
355
+ /*
356
+ * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any
357
+ * free pages in [start_pfn, boundary_pfn), its head page will
358
+ * always be in the range.
359
+ */
360
+ if (PageBuddy (page )) {
361
+ int order = buddy_order (page );
362
+
363
+ if (pfn + (1UL << order ) > boundary_pfn )
364
+ split_free_page (page , order , boundary_pfn - pfn );
365
+ pfn += (1UL << order );
366
+ continue ;
367
+ }
368
+ /*
369
+ * migrate compound pages then let the free page handling code
370
+ * above do the rest. If migration is not possible, just fail.
371
+ */
372
+ if (PageCompound (page )) {
373
+ unsigned long nr_pages = compound_nr (page );
374
+ struct page * head = compound_head (page );
375
+ unsigned long head_pfn = page_to_pfn (head );
376
+
377
+ if (head_pfn + nr_pages < boundary_pfn ) {
378
+ pfn = head_pfn + nr_pages ;
379
+ continue ;
380
+ }
381
+ #if defined CONFIG_COMPACTION || defined CONFIG_CMA
382
+ /*
383
+ * hugetlb, lru compound (THP), and movable compound pages
384
+ * can be migrated. Otherwise, fail the isolation.
385
+ */
386
+ if (PageHuge (page ) || PageLRU (page ) || __PageMovable (page )) {
387
+ int order ;
388
+ unsigned long outer_pfn ;
389
+ int ret ;
390
+ struct compact_control cc = {
391
+ .nr_migratepages = 0 ,
392
+ .order = -1 ,
393
+ .zone = page_zone (pfn_to_page (head_pfn )),
394
+ .mode = MIGRATE_SYNC ,
395
+ .ignore_skip_hint = true,
396
+ .no_set_skip_hint = true,
397
+ .gfp_mask = gfp_flags ,
398
+ .alloc_contig = true,
399
+ };
400
+ INIT_LIST_HEAD (& cc .migratepages );
401
+
402
+ ret = __alloc_contig_migrate_range (& cc , head_pfn ,
403
+ head_pfn + nr_pages );
404
+
405
+ if (ret )
406
+ goto failed ;
407
+ /*
408
+ * reset pfn to the head of the free page, so
409
+ * that the free page handling code above can split
410
+ * the free page to the right migratetype list.
411
+ *
412
+ * head_pfn is not used here as a hugetlb page order
413
+ * can be bigger than MAX_ORDER-1, but after it is
414
+ * freed, the free page order is not. Use pfn within
415
+ * the range to find the head of the free page.
416
+ */
417
+ order = 0 ;
418
+ outer_pfn = pfn ;
419
+ while (!PageBuddy (pfn_to_page (outer_pfn ))) {
420
+ if (++ order >= MAX_ORDER ) {
421
+ outer_pfn = pfn ;
422
+ break ;
423
+ }
424
+ outer_pfn &= ~0UL << order ;
425
+ }
426
+ pfn = outer_pfn ;
427
+ continue ;
428
+ } else
429
+ #endif
430
+ goto failed ;
431
+ }
432
+
433
+ pfn ++ ;
434
+ }
435
+ return 0 ;
436
+ failed :
437
+ /* restore the original migratetype */
438
+ set_pageblock_migratetype (pfn_to_page (isolate_pageblock ), saved_mt );
439
+ return - EBUSY ;
440
+ }
441
+
282
442
/**
283
443
* start_isolate_page_range() - make page-allocation-type of range of pages to
284
444
* be MIGRATE_ISOLATE.
@@ -293,6 +453,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
293
453
* and PageOffline() pages.
294
454
* REPORT_FAILURE - report details about the failure to
295
455
* isolate the range
456
+ * @gfp_flags: GFP flags used for migrating pages that sit across the
457
+ * range boundaries.
296
458
*
297
459
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
298
460
* the range will never be allocated. Any free pages and pages freed in the
@@ -301,6 +463,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
301
463
* pages in the range finally, the caller have to free all pages in the range.
302
464
* test_page_isolated() can be used for test it.
303
465
*
466
+ * The function first tries to isolate the pageblocks at the beginning and end
467
+ * of the range, since there might be pages across the range boundaries.
468
+ * Afterwards, it isolates the rest of the range.
469
+ *
304
470
* There is no high level synchronization mechanism that prevents two threads
305
471
* from trying to isolate overlapping ranges. If this happens, one thread
306
472
* will notice pageblocks in the overlapping range already set to isolate.
@@ -321,21 +487,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
321
487
* Return: 0 on success and -EBUSY if any part of range cannot be isolated.
322
488
*/
323
489
int start_isolate_page_range (unsigned long start_pfn , unsigned long end_pfn ,
324
- unsigned migratetype , int flags )
490
+ int migratetype , int flags , gfp_t gfp_flags )
325
491
{
326
492
unsigned long pfn ;
327
493
struct page * page ;
494
+ int ret ;
328
495
329
496
BUG_ON (!IS_ALIGNED (start_pfn , pageblock_nr_pages ));
330
497
BUG_ON (!IS_ALIGNED (end_pfn , pageblock_nr_pages ));
331
498
332
- for (pfn = start_pfn ;
333
- pfn < end_pfn ;
499
+ /* isolate [start_pfn, start_pfn + pageblock_nr_pages) pageblock */
500
+ ret = isolate_single_pageblock (start_pfn , gfp_flags , false);
501
+ if (ret )
502
+ return ret ;
503
+
504
+ /* isolate [end_pfn - pageblock_nr_pages, end_pfn) pageblock */
505
+ ret = isolate_single_pageblock (end_pfn , gfp_flags , true);
506
+ if (ret ) {
507
+ unset_migratetype_isolate (pfn_to_page (start_pfn ), migratetype );
508
+ return ret ;
509
+ }
510
+
511
+ /* skip isolated pageblocks at the beginning and end */
512
+ for (pfn = start_pfn + pageblock_nr_pages ;
513
+ pfn < end_pfn - pageblock_nr_pages ;
334
514
pfn += pageblock_nr_pages ) {
335
515
page = __first_valid_page (pfn , pageblock_nr_pages );
336
516
if (page && set_migratetype_isolate (page , migratetype , flags ,
337
517
start_pfn , end_pfn )) {
338
518
undo_isolate_page_range (start_pfn , pfn , migratetype );
519
+ unset_migratetype_isolate (
520
+ pfn_to_page (end_pfn - pageblock_nr_pages ),
521
+ migratetype );
339
522
return - EBUSY ;
340
523
}
341
524
}
@@ -346,7 +529,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
346
529
* Make isolated pages available again.
347
530
*/
348
531
void undo_isolate_page_range (unsigned long start_pfn , unsigned long end_pfn ,
349
- unsigned migratetype )
532
+ int migratetype )
350
533
{
351
534
unsigned long pfn ;
352
535
struct page * page ;
0 commit comments