Skip to content

Commit 3a80a7f

Browse files
Mel Gormantorvalds
authored andcommitted
mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set
This patch initalises all low memory struct pages and 2G of the highest zone on each node during memory initialisation if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set. That config option cannot be set but will be available in a later patch. Parallel initialisation of struct page depends on some features from memory hotplug and it is necessary to alter alter section annotations. Signed-off-by: Mel Gorman <[email protected]> Tested-by: Nate Zimmer <[email protected]> Tested-by: Waiman Long <[email protected]> Tested-by: Daniel J Blueman <[email protected]> Acked-by: Pekka Enberg <[email protected]> Cc: Robin Holt <[email protected]> Cc: Nate Zimmer <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Waiman Long <[email protected]> Cc: Scott Norton <[email protected]> Cc: "Luck, Tony" <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Thomas Gleixner <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 75a592a commit 3a80a7f

File tree

5 files changed

+124
-4
lines changed

5 files changed

+124
-4
lines changed

drivers/base/node.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,12 +359,16 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
359359
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
360360
#define page_initialized(page) (page->lru.next)
361361

362-
static int get_nid_for_pfn(unsigned long pfn)
362+
static int __init_refok get_nid_for_pfn(unsigned long pfn)
363363
{
364364
struct page *page;
365365

366366
if (!pfn_valid_within(pfn))
367367
return -1;
368+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
369+
if (system_state == SYSTEM_BOOTING)
370+
return early_pfn_to_nid(pfn);
371+
#endif
368372
page = pfn_to_page(pfn);
369373
if (!page_initialized(page))
370374
return -1;

include/linux/mmzone.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,14 @@ typedef struct pglist_data {
762762
/* Number of pages migrated during the rate limiting time interval */
763763
unsigned long numabalancing_migrate_nr_pages;
764764
#endif
765+
766+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
767+
/*
768+
* If memory initialisation on large machines is deferred then this
769+
* is the first PFN that needs to be initialised.
770+
*/
771+
unsigned long first_deferred_pfn;
772+
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
765773
} pg_data_t;
766774

767775
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)

mm/Kconfig

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,3 +636,21 @@ config MAX_STACK_SIZE_MB
636636
changed to a smaller value in which case that is used.
637637

638638
A sane initial value is 80 MB.
639+
640+
# For architectures that support deferred memory initialisation
641+
config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
642+
bool
643+
644+
config DEFERRED_STRUCT_PAGE_INIT
645+
bool "Defer initialisation of struct pages to kswapd"
646+
default n
647+
depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
648+
depends on MEMORY_HOTPLUG
649+
help
650+
Ordinarily all struct pages are initialised during early boot in a
651+
single thread. On very large machines this can take a considerable
652+
amount of time. If this option is set, large machines will bring up
653+
a subset of memmap at boot and then initialise the rest in parallel
654+
when kswapd starts. This has a potential performance impact on
655+
processes running early in the lifetime of the systemm until kswapd
656+
finishes the initialisation.

mm/internal.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,24 @@ static inline void mminit_verify_zonelist(void)
387387
}
388388
#endif /* CONFIG_DEBUG_MEMORY_INIT */
389389

390+
/*
391+
* Deferred struct page initialisation requires init functions that are freed
392+
* before kswapd is available. Reuse the memory hotplug section annotation
393+
* to mark the required code.
394+
*
395+
* __defermem_init is code that always exists but is annotated __meminit to
396+
* avoid section warnings.
397+
* __defer_init code gets marked __meminit when deferring struct page
398+
* initialistion but is otherwise in the init section.
399+
*/
400+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
401+
#define __defermem_init __meminit
402+
#define __defer_init __meminit
403+
#else
404+
#define __defermem_init
405+
#define __defer_init __init
406+
#endif
407+
390408
/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
391409
#if defined(CONFIG_SPARSEMEM)
392410
extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,

mm/page_alloc.c

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,64 @@ EXPORT_SYMBOL(nr_online_nodes);
235235

236236
int page_group_by_mobility_disabled __read_mostly;
237237

238+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
239+
static inline void reset_deferred_meminit(pg_data_t *pgdat)
240+
{
241+
pgdat->first_deferred_pfn = ULONG_MAX;
242+
}
243+
244+
/* Returns true if the struct page for the pfn is uninitialised */
245+
static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
246+
{
247+
int nid = early_pfn_to_nid(pfn);
248+
249+
if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
250+
return true;
251+
252+
return false;
253+
}
254+
255+
/*
256+
* Returns false when the remaining initialisation should be deferred until
257+
* later in the boot cycle when it can be parallelised.
258+
*/
259+
static inline bool update_defer_init(pg_data_t *pgdat,
260+
unsigned long pfn, unsigned long zone_end,
261+
unsigned long *nr_initialised)
262+
{
263+
/* Always populate low zones for address-contrained allocations */
264+
if (zone_end < pgdat_end_pfn(pgdat))
265+
return true;
266+
267+
/* Initialise at least 2G of the highest zone */
268+
(*nr_initialised)++;
269+
if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
270+
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
271+
pgdat->first_deferred_pfn = pfn;
272+
return false;
273+
}
274+
275+
return true;
276+
}
277+
#else
278+
static inline void reset_deferred_meminit(pg_data_t *pgdat)
279+
{
280+
}
281+
282+
static inline bool early_page_uninitialised(unsigned long pfn)
283+
{
284+
return false;
285+
}
286+
287+
static inline bool update_defer_init(pg_data_t *pgdat,
288+
unsigned long pfn, unsigned long zone_end,
289+
unsigned long *nr_initialised)
290+
{
291+
return true;
292+
}
293+
#endif
294+
295+
238296
void set_pageblock_migratetype(struct page *page, int migratetype)
239297
{
240298
if (unlikely(page_group_by_mobility_disabled &&
@@ -878,8 +936,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
878936
local_irq_restore(flags);
879937
}
880938

881-
void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
882-
unsigned int order)
939+
static void __defer_init __free_pages_boot_core(struct page *page,
940+
unsigned long pfn, unsigned int order)
883941
{
884942
unsigned int nr_pages = 1 << order;
885943
struct page *p = page;
@@ -951,6 +1009,14 @@ static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
9511009
#endif
9521010

9531011

1012+
void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
1013+
unsigned int order)
1014+
{
1015+
if (early_page_uninitialised(pfn))
1016+
return;
1017+
return __free_pages_boot_core(page, pfn, order);
1018+
}
1019+
9541020
#ifdef CONFIG_CMA
9551021
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
9561022
void __init init_cma_reserved_pageblock(struct page *page)
@@ -4325,14 +4391,16 @@ static void setup_zone_migrate_reserve(struct zone *zone)
43254391
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
43264392
unsigned long start_pfn, enum memmap_context context)
43274393
{
4394+
pg_data_t *pgdat = NODE_DATA(nid);
43284395
unsigned long end_pfn = start_pfn + size;
43294396
unsigned long pfn;
43304397
struct zone *z;
4398+
unsigned long nr_initialised = 0;
43314399

43324400
if (highest_memmap_pfn < end_pfn - 1)
43334401
highest_memmap_pfn = end_pfn - 1;
43344402

4335-
z = &NODE_DATA(nid)->node_zones[zone];
4403+
z = &pgdat->node_zones[zone];
43364404
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
43374405
/*
43384406
* There can be holes in boot-time mem_map[]s
@@ -4344,6 +4412,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
43444412
continue;
43454413
if (!early_pfn_in_nid(pfn, nid))
43464414
continue;
4415+
if (!update_defer_init(pgdat, pfn, end_pfn,
4416+
&nr_initialised))
4417+
break;
43474418
}
43484419
__init_single_pfn(pfn, zone, nid);
43494420
}
@@ -5144,6 +5215,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
51445215
/* pg_data_t should be reset to zero when it's allocated */
51455216
WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
51465217

5218+
reset_deferred_meminit(pgdat);
51475219
pgdat->node_id = nid;
51485220
pgdat->node_start_pfn = node_start_pfn;
51495221
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP

0 commit comments

Comments
 (0)