Skip to content

Commit e98337d

Browse files
yuzhaogoogleakpm00
authored andcommitted
mm/contig_alloc: support __GFP_COMP
Patch series "mm/hugetlb: alloc/free gigantic folios", v2. Use __GFP_COMP for gigantic folios can greatly reduce not only the amount of code but also the allocation and free time. Approximate LOC to mm/hugetlb.c: +60, -240 Allocate and free 500 1GB hugeTLB memory without HVO by: time echo 500 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages time echo 0 >/sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages Before After Alloc ~13s ~10s Free ~15s <1s The above magnitude generally holds for multiple x86 and arm64 CPU models. Perf profile before: Alloc - 99.99% alloc_pool_huge_folio - __alloc_fresh_hugetlb_folio - 83.23% alloc_contig_pages_noprof - 47.46% alloc_contig_range_noprof - 20.96% isolate_freepages_range 16.10% split_page - 14.10% start_isolate_page_range - 12.02% undo_isolate_page_range Free - update_and_free_pages_bulk - 87.71% free_contig_range - 76.02% free_unref_page - 41.30% free_unref_page_commit - 32.58% free_pcppages_bulk - 24.75% __free_one_page 13.96% _raw_spin_trylock 12.27% __update_and_free_hugetlb_folio Perf profile after: Alloc - 99.99% alloc_pool_huge_folio alloc_gigantic_folio - alloc_contig_pages_noprof - 59.15% alloc_contig_range_noprof - 20.72% start_isolate_page_range 20.64% prep_new_page - 17.13% undo_isolate_page_range Free - update_and_free_pages_bulk - __folio_put - __free_pages_ok 7.46% free_tail_page_prepare - 1.97% free_one_page 1.86% __free_one_page This patch (of 3): Support __GFP_COMP in alloc_contig_range(). When the flag is set, upon success the function returns a large folio prepared by prep_new_page(), rather than a range of order-0 pages prepared by split_free_pages() (which is renamed from split_map_pages()). alloc_contig_range() can be used to allocate folios larger than MAX_PAGE_ORDER, e.g., gigantic hugeTLB folios. So on the free path, free_one_page() needs to handle that by split_large_buddy(). [[email protected]: fix folio_alloc_gigantic_noprof() WARN expression, per Yu Liao] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Yu Zhao <[email protected]> Acked-by: Zi Yan <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Muchun Song <[email protected]> Cc: Frank van der Linden <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent f77f0c7 commit e98337d

File tree

3 files changed

+108
-67
lines changed

3 files changed

+108
-67
lines changed

include/linux/gfp.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,4 +446,27 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_
446446
#endif
447447
void free_contig_range(unsigned long pfn, unsigned long nr_pages);
448448

449+
#ifdef CONFIG_CONTIG_ALLOC
450+
static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
451+
int nid, nodemask_t *node)
452+
{
453+
struct page *page;
454+
455+
if (WARN_ON(!order || !(gfp & __GFP_COMP)))
456+
return NULL;
457+
458+
page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
459+
460+
return page ? page_folio(page) : NULL;
461+
}
462+
#else
463+
static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
464+
int nid, nodemask_t *node)
465+
{
466+
return NULL;
467+
}
468+
#endif
469+
/* This should be paired with folio_put() rather than free_contig_range(). */
470+
#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
471+
449472
#endif /* __LINUX_GFP_H */

mm/compaction.c

Lines changed: 5 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -86,33 +86,6 @@ static struct page *mark_allocated_noprof(struct page *page, unsigned int order,
8686
}
8787
#define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__))
8888

89-
static void split_map_pages(struct list_head *freepages)
90-
{
91-
unsigned int i, order;
92-
struct page *page, *next;
93-
LIST_HEAD(tmp_list);
94-
95-
for (order = 0; order < NR_PAGE_ORDERS; order++) {
96-
list_for_each_entry_safe(page, next, &freepages[order], lru) {
97-
unsigned int nr_pages;
98-
99-
list_del(&page->lru);
100-
101-
nr_pages = 1 << order;
102-
103-
mark_allocated(page, order, __GFP_MOVABLE);
104-
if (order)
105-
split_page(page, order);
106-
107-
for (i = 0; i < nr_pages; i++) {
108-
list_add(&page->lru, &tmp_list);
109-
page++;
110-
}
111-
}
112-
list_splice_init(&tmp_list, &freepages[0]);
113-
}
114-
}
115-
11689
static unsigned long release_free_list(struct list_head *freepages)
11790
{
11891
int order;
@@ -742,22 +715,21 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
742715
*
743716
* Non-free pages, invalid PFNs, or zone boundaries within the
744717
* [start_pfn, end_pfn) range are considered errors, cause function to
745-
* undo its actions and return zero.
718+
* undo its actions and return zero. cc->freepages[] are empty.
746719
*
747720
* Otherwise, function returns one-past-the-last PFN of isolated page
748721
* (which may be greater then end_pfn if end fell in a middle of
749-
* a free page).
722+
* a free page). cc->freepages[] contain free pages isolated.
750723
*/
751724
unsigned long
752725
isolate_freepages_range(struct compact_control *cc,
753726
unsigned long start_pfn, unsigned long end_pfn)
754727
{
755728
unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
756729
int order;
757-
struct list_head tmp_freepages[NR_PAGE_ORDERS];
758730

759731
for (order = 0; order < NR_PAGE_ORDERS; order++)
760-
INIT_LIST_HEAD(&tmp_freepages[order]);
732+
INIT_LIST_HEAD(&cc->freepages[order]);
761733

762734
pfn = start_pfn;
763735
block_start_pfn = pageblock_start_pfn(pfn);
@@ -788,7 +760,7 @@ isolate_freepages_range(struct compact_control *cc,
788760
break;
789761

790762
isolated = isolate_freepages_block(cc, &isolate_start_pfn,
791-
block_end_pfn, tmp_freepages, 0, true);
763+
block_end_pfn, cc->freepages, 0, true);
792764

793765
/*
794766
* In strict mode, isolate_freepages_block() returns 0 if
@@ -807,13 +779,10 @@ isolate_freepages_range(struct compact_control *cc,
807779

808780
if (pfn < end_pfn) {
809781
/* Loop terminated early, cleanup. */
810-
release_free_list(tmp_freepages);
782+
release_free_list(cc->freepages);
811783
return 0;
812784
}
813785

814-
/* __isolate_free_page() does not map the pages */
815-
split_map_pages(tmp_freepages);
816-
817786
/* We don't use freelists for anything. */
818787
return pfn;
819788
}

mm/page_alloc.c

Lines changed: 80 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,16 +1196,36 @@ static void free_pcppages_bulk(struct zone *zone, int count,
11961196
spin_unlock_irqrestore(&zone->lock, flags);
11971197
}
11981198

1199+
/* Split a multi-block free page into its individual pageblocks. */
1200+
static void split_large_buddy(struct zone *zone, struct page *page,
1201+
unsigned long pfn, int order, fpi_t fpi)
1202+
{
1203+
unsigned long end = pfn + (1 << order);
1204+
1205+
VM_WARN_ON_ONCE(!IS_ALIGNED(pfn, 1 << order));
1206+
/* Caller removed page from freelist, buddy info cleared! */
1207+
VM_WARN_ON_ONCE(PageBuddy(page));
1208+
1209+
if (order > pageblock_order)
1210+
order = pageblock_order;
1211+
1212+
while (pfn != end) {
1213+
int mt = get_pfnblock_migratetype(page, pfn);
1214+
1215+
__free_one_page(page, pfn, zone, order, mt, fpi);
1216+
pfn += 1 << order;
1217+
page = pfn_to_page(pfn);
1218+
}
1219+
}
1220+
11991221
static void free_one_page(struct zone *zone, struct page *page,
12001222
unsigned long pfn, unsigned int order,
12011223
fpi_t fpi_flags)
12021224
{
12031225
unsigned long flags;
1204-
int migratetype;
12051226

12061227
spin_lock_irqsave(&zone->lock, flags);
1207-
migratetype = get_pfnblock_migratetype(page, pfn);
1208-
__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
1228+
split_large_buddy(zone, page, pfn, order, fpi_flags);
12091229
spin_unlock_irqrestore(&zone->lock, flags);
12101230
}
12111231

@@ -1697,27 +1717,6 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
16971717
return start_pfn;
16981718
}
16991719

1700-
/* Split a multi-block free page into its individual pageblocks */
1701-
static void split_large_buddy(struct zone *zone, struct page *page,
1702-
unsigned long pfn, int order)
1703-
{
1704-
unsigned long end_pfn = pfn + (1 << order);
1705-
1706-
VM_WARN_ON_ONCE(order <= pageblock_order);
1707-
VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1));
1708-
1709-
/* Caller removed page from freelist, buddy info cleared! */
1710-
VM_WARN_ON_ONCE(PageBuddy(page));
1711-
1712-
while (pfn != end_pfn) {
1713-
int mt = get_pfnblock_migratetype(page, pfn);
1714-
1715-
__free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE);
1716-
pfn += pageblock_nr_pages;
1717-
page = pfn_to_page(pfn);
1718-
}
1719-
}
1720-
17211720
/**
17221721
* move_freepages_block_isolate - move free pages in block for page isolation
17231722
* @zone: the zone
@@ -1758,7 +1757,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
17581757
del_page_from_free_list(buddy, zone, order,
17591758
get_pfnblock_migratetype(buddy, pfn));
17601759
set_pageblock_migratetype(page, migratetype);
1761-
split_large_buddy(zone, buddy, pfn, order);
1760+
split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
17621761
return true;
17631762
}
17641763

@@ -1769,7 +1768,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
17691768
del_page_from_free_list(page, zone, order,
17701769
get_pfnblock_migratetype(page, pfn));
17711770
set_pageblock_migratetype(page, migratetype);
1772-
split_large_buddy(zone, page, pfn, order);
1771+
split_large_buddy(zone, page, pfn, order, FPI_NONE);
17731772
return true;
17741773
}
17751774
move:
@@ -6437,6 +6436,31 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
64376436
return (ret < 0) ? ret : 0;
64386437
}
64396438

6439+
static void split_free_pages(struct list_head *list)
6440+
{
6441+
int order;
6442+
6443+
for (order = 0; order < NR_PAGE_ORDERS; order++) {
6444+
struct page *page, *next;
6445+
int nr_pages = 1 << order;
6446+
6447+
list_for_each_entry_safe(page, next, &list[order], lru) {
6448+
int i;
6449+
6450+
post_alloc_hook(page, order, __GFP_MOVABLE);
6451+
if (!order)
6452+
continue;
6453+
6454+
split_page(page, order);
6455+
6456+
/* Add all subpages to the order-0 head, in sequence. */
6457+
list_del(&page->lru);
6458+
for (i = 0; i < nr_pages; i++)
6459+
list_add_tail(&page[i].lru, &list[0]);
6460+
}
6461+
}
6462+
}
6463+
64406464
/**
64416465
* alloc_contig_range() -- tries to allocate given range of pages
64426466
* @start: start PFN to allocate
@@ -6549,12 +6573,25 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
65496573
goto done;
65506574
}
65516575

6552-
/* Free head and tail (if any) */
6553-
if (start != outer_start)
6554-
free_contig_range(outer_start, start - outer_start);
6555-
if (end != outer_end)
6556-
free_contig_range(end, outer_end - end);
6576+
if (!(gfp_mask & __GFP_COMP)) {
6577+
split_free_pages(cc.freepages);
65576578

6579+
/* Free head and tail (if any) */
6580+
if (start != outer_start)
6581+
free_contig_range(outer_start, start - outer_start);
6582+
if (end != outer_end)
6583+
free_contig_range(end, outer_end - end);
6584+
} else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
6585+
struct page *head = pfn_to_page(start);
6586+
int order = ilog2(end - start);
6587+
6588+
check_new_pages(head, order);
6589+
prep_new_page(head, order, gfp_mask, 0);
6590+
} else {
6591+
ret = -EINVAL;
6592+
WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
6593+
start, end, outer_start, outer_end);
6594+
}
65586595
done:
65596596
undo_isolate_page_range(start, end, migratetype);
65606597
return ret;
@@ -6663,6 +6700,18 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
66636700
void free_contig_range(unsigned long pfn, unsigned long nr_pages)
66646701
{
66656702
unsigned long count = 0;
6703+
struct folio *folio = pfn_folio(pfn);
6704+
6705+
if (folio_test_large(folio)) {
6706+
int expected = folio_nr_pages(folio);
6707+
6708+
if (nr_pages == expected)
6709+
folio_put(folio);
6710+
else
6711+
WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
6712+
pfn, nr_pages, expected);
6713+
return;
6714+
}
66666715

66676716
for (; nr_pages--; pfn++) {
66686717
struct page *page = pfn_to_page(pfn);

0 commit comments

Comments
 (0)