Skip to content

Commit 6ae11b2

Browse files
Lee Schermerhorntorvalds
authored andcommitted
hugetlb: add nodemask arg to huge page alloc, free and surplus adjust functions
In preparation for constraining huge page allocation and freeing by the controlling task's numa mempolicy, add a "nodes_allowed" nodemask pointer to the allocate, free and surplus adjustment functions. For now, pass NULL to indicate default behavior--i.e., use node_online_map. A subsqeuent patch will derive a non-default mask from the controlling task's numa mempolicy. Note that this method of updating the global hstate nr_hugepages under the constraint of a nodemask simplifies keeping the global state consistent--especially the number of persistent and surplus pages relative to reservations and overcommit limits. There are undoubtedly other ways to do this, but this works for both interfaces: mempolicy and per node attributes. [[email protected]: fix HIGHMEM compile error] Signed-off-by: Lee Schermerhorn <[email protected]> Reviewed-by: Mel Gorman <[email protected]> Acked-by: David Rientjes <[email protected]> Reviewed-by: Andi Kleen <[email protected]> Cc: KAMEZAWA Hiroyuki <[email protected]> Cc: Randy Dunlap <[email protected]> Cc: Nishanth Aravamudan <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Adam Litke <[email protected]> Cc: Andy Whitcroft <[email protected]> Cc: Eric Whitney <[email protected]> Cc: Christoph Lameter <[email protected]> Signed-off-by: David Rientjes <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 9a76db0 commit 6ae11b2

File tree

1 file changed

+72
-53
lines changed

1 file changed

+72
-53
lines changed

mm/hugetlb.c

Lines changed: 72 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -622,48 +622,56 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
622622
}
623623

624624
/*
625-
* common helper function for hstate_next_node_to_{alloc|free}.
626-
* return next node in node_online_map, wrapping at end.
625+
* common helper functions for hstate_next_node_to_{alloc|free}.
626+
* We may have allocated or freed a huge page based on a different
627+
* nodes_allowed previously, so h->next_node_to_{alloc|free} might
628+
* be outside of *nodes_allowed. Ensure that we use an allowed
629+
* node for alloc or free.
627630
*/
628-
static int next_node_allowed(int nid)
631+
static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
629632
{
630-
nid = next_node(nid, node_online_map);
633+
nid = next_node(nid, *nodes_allowed);
631634
if (nid == MAX_NUMNODES)
632-
nid = first_node(node_online_map);
635+
nid = first_node(*nodes_allowed);
633636
VM_BUG_ON(nid >= MAX_NUMNODES);
634637

635638
return nid;
636639
}
637640

641+
static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
642+
{
643+
if (!node_isset(nid, *nodes_allowed))
644+
nid = next_node_allowed(nid, nodes_allowed);
645+
return nid;
646+
}
647+
638648
/*
639-
* Use a helper variable to find the next node and then
640-
* copy it back to next_nid_to_alloc afterwards:
641-
* otherwise there's a window in which a racer might
642-
* pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
643-
* But we don't need to use a spin_lock here: it really
644-
* doesn't matter if occasionally a racer chooses the
645-
* same nid as we do. Move nid forward in the mask even
646-
* if we just successfully allocated a hugepage so that
647-
* the next caller gets hugepages on the next node.
649+
* returns the previously saved node ["this node"] from which to
650+
* allocate a persistent huge page for the pool and advance the
651+
* next node from which to allocate, handling wrap at end of node
652+
* mask.
648653
*/
649-
static int hstate_next_node_to_alloc(struct hstate *h)
654+
static int hstate_next_node_to_alloc(struct hstate *h,
655+
nodemask_t *nodes_allowed)
650656
{
651-
int nid, next_nid;
657+
int nid;
658+
659+
VM_BUG_ON(!nodes_allowed);
660+
661+
nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
662+
h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
652663

653-
nid = h->next_nid_to_alloc;
654-
next_nid = next_node_allowed(nid);
655-
h->next_nid_to_alloc = next_nid;
656664
return nid;
657665
}
658666

659-
static int alloc_fresh_huge_page(struct hstate *h)
667+
static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
660668
{
661669
struct page *page;
662670
int start_nid;
663671
int next_nid;
664672
int ret = 0;
665673

666-
start_nid = hstate_next_node_to_alloc(h);
674+
start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
667675
next_nid = start_nid;
668676

669677
do {
@@ -672,7 +680,7 @@ static int alloc_fresh_huge_page(struct hstate *h)
672680
ret = 1;
673681
break;
674682
}
675-
next_nid = hstate_next_node_to_alloc(h);
683+
next_nid = hstate_next_node_to_alloc(h, nodes_allowed);
676684
} while (next_nid != start_nid);
677685

678686
if (ret)
@@ -684,18 +692,20 @@ static int alloc_fresh_huge_page(struct hstate *h)
684692
}
685693

686694
/*
687-
* helper for free_pool_huge_page() - return the next node
688-
* from which to free a huge page. Advance the next node id
689-
* whether or not we find a free huge page to free so that the
690-
* next attempt to free addresses the next node.
695+
* helper for free_pool_huge_page() - return the previously saved
696+
* node ["this node"] from which to free a huge page. Advance the
697+
* next node id whether or not we find a free huge page to free so
698+
* that the next attempt to free addresses the next node.
691699
*/
692-
static int hstate_next_node_to_free(struct hstate *h)
700+
static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
693701
{
694-
int nid, next_nid;
702+
int nid;
703+
704+
VM_BUG_ON(!nodes_allowed);
705+
706+
nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
707+
h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
695708

696-
nid = h->next_nid_to_free;
697-
next_nid = next_node_allowed(nid);
698-
h->next_nid_to_free = next_nid;
699709
return nid;
700710
}
701711

@@ -705,13 +715,14 @@ static int hstate_next_node_to_free(struct hstate *h)
705715
* balanced over allowed nodes.
706716
* Called with hugetlb_lock locked.
707717
*/
708-
static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
718+
static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
719+
bool acct_surplus)
709720
{
710721
int start_nid;
711722
int next_nid;
712723
int ret = 0;
713724

714-
start_nid = hstate_next_node_to_free(h);
725+
start_nid = hstate_next_node_to_free(h, nodes_allowed);
715726
next_nid = start_nid;
716727

717728
do {
@@ -735,7 +746,7 @@ static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
735746
ret = 1;
736747
break;
737748
}
738-
next_nid = hstate_next_node_to_free(h);
749+
next_nid = hstate_next_node_to_free(h, nodes_allowed);
739750
} while (next_nid != start_nid);
740751

741752
return ret;
@@ -937,7 +948,7 @@ static void return_unused_surplus_pages(struct hstate *h,
937948
* on-line nodes for us and will handle the hstate accounting.
938949
*/
939950
while (nr_pages--) {
940-
if (!free_pool_huge_page(h, 1))
951+
if (!free_pool_huge_page(h, &node_online_map, 1))
941952
break;
942953
}
943954
}
@@ -1047,7 +1058,8 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
10471058
void *addr;
10481059

10491060
addr = __alloc_bootmem_node_nopanic(
1050-
NODE_DATA(hstate_next_node_to_alloc(h)),
1061+
NODE_DATA(hstate_next_node_to_alloc(h,
1062+
&node_online_map)),
10511063
huge_page_size(h), huge_page_size(h), 0);
10521064

10531065
if (addr) {
@@ -1102,7 +1114,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
11021114
if (h->order >= MAX_ORDER) {
11031115
if (!alloc_bootmem_huge_page(h))
11041116
break;
1105-
} else if (!alloc_fresh_huge_page(h))
1117+
} else if (!alloc_fresh_huge_page(h, &node_online_map))
11061118
break;
11071119
}
11081120
h->max_huge_pages = i;
@@ -1144,14 +1156,15 @@ static void __init report_hugepages(void)
11441156
}
11451157

11461158
#ifdef CONFIG_HIGHMEM
1147-
static void try_to_free_low(struct hstate *h, unsigned long count)
1159+
static void try_to_free_low(struct hstate *h, unsigned long count,
1160+
nodemask_t *nodes_allowed)
11481161
{
11491162
int i;
11501163

11511164
if (h->order >= MAX_ORDER)
11521165
return;
11531166

1154-
for (i = 0; i < MAX_NUMNODES; ++i) {
1167+
for_each_node_mask(i, *nodes_allowed) {
11551168
struct page *page, *next;
11561169
struct list_head *freel = &h->hugepage_freelists[i];
11571170
list_for_each_entry_safe(page, next, freel, lru) {
@@ -1167,7 +1180,8 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
11671180
}
11681181
}
11691182
#else
1170-
static inline void try_to_free_low(struct hstate *h, unsigned long count)
1183+
static inline void try_to_free_low(struct hstate *h, unsigned long count,
1184+
nodemask_t *nodes_allowed)
11711185
{
11721186
}
11731187
#endif
@@ -1177,17 +1191,18 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
11771191
* balanced by operating on them in a round-robin fashion.
11781192
* Returns 1 if an adjustment was made.
11791193
*/
1180-
static int adjust_pool_surplus(struct hstate *h, int delta)
1194+
static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
1195+
int delta)
11811196
{
11821197
int start_nid, next_nid;
11831198
int ret = 0;
11841199

11851200
VM_BUG_ON(delta != -1 && delta != 1);
11861201

11871202
if (delta < 0)
1188-
start_nid = hstate_next_node_to_alloc(h);
1203+
start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
11891204
else
1190-
start_nid = hstate_next_node_to_free(h);
1205+
start_nid = hstate_next_node_to_free(h, nodes_allowed);
11911206
next_nid = start_nid;
11921207

11931208
do {
@@ -1197,7 +1212,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
11971212
* To shrink on this node, there must be a surplus page
11981213
*/
11991214
if (!h->surplus_huge_pages_node[nid]) {
1200-
next_nid = hstate_next_node_to_alloc(h);
1215+
next_nid = hstate_next_node_to_alloc(h,
1216+
nodes_allowed);
12011217
continue;
12021218
}
12031219
}
@@ -1207,7 +1223,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
12071223
*/
12081224
if (h->surplus_huge_pages_node[nid] >=
12091225
h->nr_huge_pages_node[nid]) {
1210-
next_nid = hstate_next_node_to_free(h);
1226+
next_nid = hstate_next_node_to_free(h,
1227+
nodes_allowed);
12111228
continue;
12121229
}
12131230
}
@@ -1222,7 +1239,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
12221239
}
12231240

12241241
#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
1225-
static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1242+
static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
1243+
nodemask_t *nodes_allowed)
12261244
{
12271245
unsigned long min_count, ret;
12281246

@@ -1242,7 +1260,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
12421260
*/
12431261
spin_lock(&hugetlb_lock);
12441262
while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
1245-
if (!adjust_pool_surplus(h, -1))
1263+
if (!adjust_pool_surplus(h, nodes_allowed, -1))
12461264
break;
12471265
}
12481266

@@ -1253,7 +1271,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
12531271
* and reducing the surplus.
12541272
*/
12551273
spin_unlock(&hugetlb_lock);
1256-
ret = alloc_fresh_huge_page(h);
1274+
ret = alloc_fresh_huge_page(h, nodes_allowed);
12571275
spin_lock(&hugetlb_lock);
12581276
if (!ret)
12591277
goto out;
@@ -1277,13 +1295,13 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
12771295
*/
12781296
min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
12791297
min_count = max(count, min_count);
1280-
try_to_free_low(h, min_count);
1298+
try_to_free_low(h, min_count, nodes_allowed);
12811299
while (min_count < persistent_huge_pages(h)) {
1282-
if (!free_pool_huge_page(h, 0))
1300+
if (!free_pool_huge_page(h, nodes_allowed, 0))
12831301
break;
12841302
}
12851303
while (count < persistent_huge_pages(h)) {
1286-
if (!adjust_pool_surplus(h, 1))
1304+
if (!adjust_pool_surplus(h, nodes_allowed, 1))
12871305
break;
12881306
}
12891307
out:
@@ -1329,7 +1347,7 @@ static ssize_t nr_hugepages_store(struct kobject *kobj,
13291347
if (err)
13301348
return 0;
13311349

1332-
h->max_huge_pages = set_max_huge_pages(h, input);
1350+
h->max_huge_pages = set_max_huge_pages(h, input, &node_online_map);
13331351

13341352
return count;
13351353
}
@@ -1571,7 +1589,8 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
15711589
proc_doulongvec_minmax(table, write, buffer, length, ppos);
15721590

15731591
if (write)
1574-
h->max_huge_pages = set_max_huge_pages(h, tmp);
1592+
h->max_huge_pages = set_max_huge_pages(h, tmp,
1593+
&node_online_map);
15751594

15761595
return 0;
15771596
}

0 commit comments

Comments
 (0)