@@ -622,48 +622,56 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
622
622
}
623
623
624
624
/*
625
- * common helper function for hstate_next_node_to_{alloc|free}.
626
- * return next node in node_online_map, wrapping at end.
625
+ * common helper functions for hstate_next_node_to_{alloc|free}.
626
+ * We may have allocated or freed a huge page based on a different
627
+ * nodes_allowed previously, so h->next_node_to_{alloc|free} might
628
+ * be outside of *nodes_allowed. Ensure that we use an allowed
629
+ * node for alloc or free.
627
630
*/
628
- static int next_node_allowed (int nid )
631
+ static int next_node_allowed (int nid , nodemask_t * nodes_allowed )
629
632
{
630
- nid = next_node (nid , node_online_map );
633
+ nid = next_node (nid , * nodes_allowed );
631
634
if (nid == MAX_NUMNODES )
632
- nid = first_node (node_online_map );
635
+ nid = first_node (* nodes_allowed );
633
636
VM_BUG_ON (nid >= MAX_NUMNODES );
634
637
635
638
return nid ;
636
639
}
637
640
641
+ static int get_valid_node_allowed (int nid , nodemask_t * nodes_allowed )
642
+ {
643
+ if (!node_isset (nid , * nodes_allowed ))
644
+ nid = next_node_allowed (nid , nodes_allowed );
645
+ return nid ;
646
+ }
647
+
638
648
/*
639
- * Use a helper variable to find the next node and then
640
- * copy it back to next_nid_to_alloc afterwards:
641
- * otherwise there's a window in which a racer might
642
- * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
643
- * But we don't need to use a spin_lock here: it really
644
- * doesn't matter if occasionally a racer chooses the
645
- * same nid as we do. Move nid forward in the mask even
646
- * if we just successfully allocated a hugepage so that
647
- * the next caller gets hugepages on the next node.
649
+ * returns the previously saved node ["this node"] from which to
650
+ * allocate a persistent huge page for the pool and advance the
651
+ * next node from which to allocate, handling wrap at end of node
652
+ * mask.
648
653
*/
649
- static int hstate_next_node_to_alloc (struct hstate * h )
654
+ static int hstate_next_node_to_alloc (struct hstate * h ,
655
+ nodemask_t * nodes_allowed )
650
656
{
651
- int nid , next_nid ;
657
+ int nid ;
658
+
659
+ VM_BUG_ON (!nodes_allowed );
660
+
661
+ nid = get_valid_node_allowed (h -> next_nid_to_alloc , nodes_allowed );
662
+ h -> next_nid_to_alloc = next_node_allowed (nid , nodes_allowed );
652
663
653
- nid = h -> next_nid_to_alloc ;
654
- next_nid = next_node_allowed (nid );
655
- h -> next_nid_to_alloc = next_nid ;
656
664
return nid ;
657
665
}
658
666
659
- static int alloc_fresh_huge_page (struct hstate * h )
667
+ static int alloc_fresh_huge_page (struct hstate * h , nodemask_t * nodes_allowed )
660
668
{
661
669
struct page * page ;
662
670
int start_nid ;
663
671
int next_nid ;
664
672
int ret = 0 ;
665
673
666
- start_nid = hstate_next_node_to_alloc (h );
674
+ start_nid = hstate_next_node_to_alloc (h , nodes_allowed );
667
675
next_nid = start_nid ;
668
676
669
677
do {
@@ -672,7 +680,7 @@ static int alloc_fresh_huge_page(struct hstate *h)
672
680
ret = 1 ;
673
681
break ;
674
682
}
675
- next_nid = hstate_next_node_to_alloc (h );
683
+ next_nid = hstate_next_node_to_alloc (h , nodes_allowed );
676
684
} while (next_nid != start_nid );
677
685
678
686
if (ret )
@@ -684,18 +692,20 @@ static int alloc_fresh_huge_page(struct hstate *h)
684
692
}
685
693
686
694
/*
687
- * helper for free_pool_huge_page() - return the next node
688
- * from which to free a huge page. Advance the next node id
689
- * whether or not we find a free huge page to free so that the
690
- * next attempt to free addresses the next node.
695
+ * helper for free_pool_huge_page() - return the previously saved
696
+ * node ["this node"] from which to free a huge page. Advance the
697
+ * next node id whether or not we find a free huge page to free so
698
+ * that the next attempt to free addresses the next node.
691
699
*/
692
- static int hstate_next_node_to_free (struct hstate * h )
700
+ static int hstate_next_node_to_free (struct hstate * h , nodemask_t * nodes_allowed )
693
701
{
694
- int nid , next_nid ;
702
+ int nid ;
703
+
704
+ VM_BUG_ON (!nodes_allowed );
705
+
706
+ nid = get_valid_node_allowed (h -> next_nid_to_free , nodes_allowed );
707
+ h -> next_nid_to_free = next_node_allowed (nid , nodes_allowed );
695
708
696
- nid = h -> next_nid_to_free ;
697
- next_nid = next_node_allowed (nid );
698
- h -> next_nid_to_free = next_nid ;
699
709
return nid ;
700
710
}
701
711
@@ -705,13 +715,14 @@ static int hstate_next_node_to_free(struct hstate *h)
705
715
* balanced over allowed nodes.
706
716
* Called with hugetlb_lock locked.
707
717
*/
708
- static int free_pool_huge_page (struct hstate * h , bool acct_surplus )
718
+ static int free_pool_huge_page (struct hstate * h , nodemask_t * nodes_allowed ,
719
+ bool acct_surplus )
709
720
{
710
721
int start_nid ;
711
722
int next_nid ;
712
723
int ret = 0 ;
713
724
714
- start_nid = hstate_next_node_to_free (h );
725
+ start_nid = hstate_next_node_to_free (h , nodes_allowed );
715
726
next_nid = start_nid ;
716
727
717
728
do {
@@ -735,7 +746,7 @@ static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
735
746
ret = 1 ;
736
747
break ;
737
748
}
738
- next_nid = hstate_next_node_to_free (h );
749
+ next_nid = hstate_next_node_to_free (h , nodes_allowed );
739
750
} while (next_nid != start_nid );
740
751
741
752
return ret ;
@@ -937,7 +948,7 @@ static void return_unused_surplus_pages(struct hstate *h,
937
948
* on-line nodes for us and will handle the hstate accounting.
938
949
*/
939
950
while (nr_pages -- ) {
940
- if (!free_pool_huge_page (h , 1 ))
951
+ if (!free_pool_huge_page (h , & node_online_map , 1 ))
941
952
break ;
942
953
}
943
954
}
@@ -1047,7 +1058,8 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
1047
1058
void * addr ;
1048
1059
1049
1060
addr = __alloc_bootmem_node_nopanic (
1050
- NODE_DATA (hstate_next_node_to_alloc (h )),
1061
+ NODE_DATA (hstate_next_node_to_alloc (h ,
1062
+ & node_online_map )),
1051
1063
huge_page_size (h ), huge_page_size (h ), 0 );
1052
1064
1053
1065
if (addr ) {
@@ -1102,7 +1114,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
1102
1114
if (h -> order >= MAX_ORDER ) {
1103
1115
if (!alloc_bootmem_huge_page (h ))
1104
1116
break ;
1105
- } else if (!alloc_fresh_huge_page (h ))
1117
+ } else if (!alloc_fresh_huge_page (h , & node_online_map ))
1106
1118
break ;
1107
1119
}
1108
1120
h -> max_huge_pages = i ;
@@ -1144,14 +1156,15 @@ static void __init report_hugepages(void)
1144
1156
}
1145
1157
1146
1158
#ifdef CONFIG_HIGHMEM
1147
- static void try_to_free_low (struct hstate * h , unsigned long count )
1159
+ static void try_to_free_low (struct hstate * h , unsigned long count ,
1160
+ nodemask_t * nodes_allowed )
1148
1161
{
1149
1162
int i ;
1150
1163
1151
1164
if (h -> order >= MAX_ORDER )
1152
1165
return ;
1153
1166
1154
- for ( i = 0 ; i < MAX_NUMNODES ; ++ i ) {
1167
+ for_each_node_mask ( i , * nodes_allowed ) {
1155
1168
struct page * page , * next ;
1156
1169
struct list_head * freel = & h -> hugepage_freelists [i ];
1157
1170
list_for_each_entry_safe (page , next , freel , lru ) {
@@ -1167,7 +1180,8 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
1167
1180
}
1168
1181
}
1169
1182
#else
1170
- static inline void try_to_free_low (struct hstate * h , unsigned long count )
1183
+ static inline void try_to_free_low (struct hstate * h , unsigned long count ,
1184
+ nodemask_t * nodes_allowed )
1171
1185
{
1172
1186
}
1173
1187
#endif
@@ -1177,17 +1191,18 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
1177
1191
* balanced by operating on them in a round-robin fashion.
1178
1192
* Returns 1 if an adjustment was made.
1179
1193
*/
1180
- static int adjust_pool_surplus (struct hstate * h , int delta )
1194
+ static int adjust_pool_surplus (struct hstate * h , nodemask_t * nodes_allowed ,
1195
+ int delta )
1181
1196
{
1182
1197
int start_nid , next_nid ;
1183
1198
int ret = 0 ;
1184
1199
1185
1200
VM_BUG_ON (delta != -1 && delta != 1 );
1186
1201
1187
1202
if (delta < 0 )
1188
- start_nid = hstate_next_node_to_alloc (h );
1203
+ start_nid = hstate_next_node_to_alloc (h , nodes_allowed );
1189
1204
else
1190
- start_nid = hstate_next_node_to_free (h );
1205
+ start_nid = hstate_next_node_to_free (h , nodes_allowed );
1191
1206
next_nid = start_nid ;
1192
1207
1193
1208
do {
@@ -1197,7 +1212,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
1197
1212
* To shrink on this node, there must be a surplus page
1198
1213
*/
1199
1214
if (!h -> surplus_huge_pages_node [nid ]) {
1200
- next_nid = hstate_next_node_to_alloc (h );
1215
+ next_nid = hstate_next_node_to_alloc (h ,
1216
+ nodes_allowed );
1201
1217
continue ;
1202
1218
}
1203
1219
}
@@ -1207,7 +1223,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
1207
1223
*/
1208
1224
if (h -> surplus_huge_pages_node [nid ] >=
1209
1225
h -> nr_huge_pages_node [nid ]) {
1210
- next_nid = hstate_next_node_to_free (h );
1226
+ next_nid = hstate_next_node_to_free (h ,
1227
+ nodes_allowed );
1211
1228
continue ;
1212
1229
}
1213
1230
}
@@ -1222,7 +1239,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
1222
1239
}
1223
1240
1224
1241
#define persistent_huge_pages (h ) (h->nr_huge_pages - h->surplus_huge_pages)
1225
- static unsigned long set_max_huge_pages (struct hstate * h , unsigned long count )
1242
+ static unsigned long set_max_huge_pages (struct hstate * h , unsigned long count ,
1243
+ nodemask_t * nodes_allowed )
1226
1244
{
1227
1245
unsigned long min_count , ret ;
1228
1246
@@ -1242,7 +1260,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1242
1260
*/
1243
1261
spin_lock (& hugetlb_lock );
1244
1262
while (h -> surplus_huge_pages && count > persistent_huge_pages (h )) {
1245
- if (!adjust_pool_surplus (h , -1 ))
1263
+ if (!adjust_pool_surplus (h , nodes_allowed , -1 ))
1246
1264
break ;
1247
1265
}
1248
1266
@@ -1253,7 +1271,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1253
1271
* and reducing the surplus.
1254
1272
*/
1255
1273
spin_unlock (& hugetlb_lock );
1256
- ret = alloc_fresh_huge_page (h );
1274
+ ret = alloc_fresh_huge_page (h , nodes_allowed );
1257
1275
spin_lock (& hugetlb_lock );
1258
1276
if (!ret )
1259
1277
goto out ;
@@ -1277,13 +1295,13 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1277
1295
*/
1278
1296
min_count = h -> resv_huge_pages + h -> nr_huge_pages - h -> free_huge_pages ;
1279
1297
min_count = max (count , min_count );
1280
- try_to_free_low (h , min_count );
1298
+ try_to_free_low (h , min_count , nodes_allowed );
1281
1299
while (min_count < persistent_huge_pages (h )) {
1282
- if (!free_pool_huge_page (h , 0 ))
1300
+ if (!free_pool_huge_page (h , nodes_allowed , 0 ))
1283
1301
break ;
1284
1302
}
1285
1303
while (count < persistent_huge_pages (h )) {
1286
- if (!adjust_pool_surplus (h , 1 ))
1304
+ if (!adjust_pool_surplus (h , nodes_allowed , 1 ))
1287
1305
break ;
1288
1306
}
1289
1307
out :
@@ -1329,7 +1347,7 @@ static ssize_t nr_hugepages_store(struct kobject *kobj,
1329
1347
if (err )
1330
1348
return 0 ;
1331
1349
1332
- h -> max_huge_pages = set_max_huge_pages (h , input );
1350
+ h -> max_huge_pages = set_max_huge_pages (h , input , & node_online_map );
1333
1351
1334
1352
return count ;
1335
1353
}
@@ -1571,7 +1589,8 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1571
1589
proc_doulongvec_minmax (table , write , buffer , length , ppos );
1572
1590
1573
1591
if (write )
1574
- h -> max_huge_pages = set_max_huge_pages (h , tmp );
1592
+ h -> max_huge_pages = set_max_huge_pages (h , tmp ,
1593
+ & node_online_map );
1575
1594
1576
1595
return 0 ;
1577
1596
}
0 commit comments