@@ -198,40 +198,73 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
198
198
return end ;
199
199
}
200
200
201
+ static u64 uniform_size (u64 max_addr , u64 base , u64 hole , int nr_nodes )
202
+ {
203
+ unsigned long max_pfn = PHYS_PFN (max_addr );
204
+ unsigned long base_pfn = PHYS_PFN (base );
205
+ unsigned long hole_pfns = PHYS_PFN (hole );
206
+
207
+ return PFN_PHYS ((max_pfn - base_pfn - hole_pfns ) / nr_nodes );
208
+ }
209
+
201
210
/*
202
211
* Sets up fake nodes of `size' interleaved over physical nodes ranging from
203
212
* `addr' to `max_addr'.
204
213
*
205
214
* Returns zero on success or negative on error.
206
215
*/
207
- static int __init split_nodes_size_interleave (struct numa_meminfo * ei ,
216
+ static int __init split_nodes_size_interleave_uniform (struct numa_meminfo * ei ,
208
217
struct numa_meminfo * pi ,
209
- u64 addr , u64 max_addr , u64 size )
218
+ u64 addr , u64 max_addr , u64 size ,
219
+ int nr_nodes , struct numa_memblk * pblk ,
220
+ int nid )
210
221
{
211
222
nodemask_t physnode_mask = numa_nodes_parsed ;
223
+ int i , ret , uniform = 0 ;
212
224
u64 min_size ;
213
- int nid = 0 ;
214
- int i , ret ;
215
225
216
- if (!size )
226
+ if (( !size && ! nr_nodes ) || ( nr_nodes && ! pblk ) )
217
227
return -1 ;
228
+
218
229
/*
219
- * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
220
- * increased accordingly if the requested size is too small. This
221
- * creates a uniform distribution of node sizes across the entire
222
- * machine (but not necessarily over physical nodes).
230
+ * In the 'uniform' case split the passed in physical node by
231
+ * nr_nodes, in the non-uniform case, ignore the passed in
232
+ * physical block and try to create nodes of at least size
233
+ * @size.
234
+ *
235
+ * In the uniform case, split the nodes strictly by physical
236
+ * capacity, i.e. ignore holes. In the non-uniform case account
237
+ * for holes and treat @size as a minimum floor.
223
238
*/
224
- min_size = (max_addr - addr - mem_hole_size (addr , max_addr )) / MAX_NUMNODES ;
225
- min_size = max (min_size , FAKE_NODE_MIN_SIZE );
226
- if ((min_size & FAKE_NODE_MIN_HASH_MASK ) < min_size )
227
- min_size = (min_size + FAKE_NODE_MIN_SIZE ) &
228
- FAKE_NODE_MIN_HASH_MASK ;
239
+ if (!nr_nodes )
240
+ nr_nodes = MAX_NUMNODES ;
241
+ else {
242
+ nodes_clear (physnode_mask );
243
+ node_set (pblk -> nid , physnode_mask );
244
+ uniform = 1 ;
245
+ }
246
+
247
+ if (uniform ) {
248
+ min_size = uniform_size (max_addr , addr , 0 , nr_nodes );
249
+ size = min_size ;
250
+ } else {
251
+ /*
252
+ * The limit on emulated nodes is MAX_NUMNODES, so the
253
+ * size per node is increased accordingly if the
254
+ * requested size is too small. This creates a uniform
255
+ * distribution of node sizes across the entire machine
256
+ * (but not necessarily over physical nodes).
257
+ */
258
+ min_size = uniform_size (max_addr , addr ,
259
+ mem_hole_size (addr , max_addr ), nr_nodes );
260
+ }
261
+ min_size = ALIGN (max (min_size , FAKE_NODE_MIN_SIZE ), FAKE_NODE_MIN_SIZE );
229
262
if (size < min_size ) {
230
263
pr_err ("Fake node size %LuMB too small, increasing to %LuMB\n" ,
231
264
size >> 20 , min_size >> 20 );
232
265
size = min_size ;
233
266
}
234
- size &= FAKE_NODE_MIN_HASH_MASK ;
267
+ size = ALIGN_DOWN ( size , FAKE_NODE_MIN_SIZE ) ;
235
268
236
269
/*
237
270
* Fill physical nodes with fake nodes of size until there is no memory
@@ -248,10 +281,14 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
248
281
node_clear (i , physnode_mask );
249
282
continue ;
250
283
}
284
+
251
285
start = pi -> blk [phys_blk ].start ;
252
286
limit = pi -> blk [phys_blk ].end ;
253
287
254
- end = find_end_of_node (start , limit , size );
288
+ if (uniform )
289
+ end = start + size ;
290
+ else
291
+ end = find_end_of_node (start , limit , size );
255
292
/*
256
293
* If there won't be at least FAKE_NODE_MIN_SIZE of
257
294
* non-reserved memory in ZONE_DMA32 for the next node,
@@ -266,7 +303,8 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
266
303
* next node, this one must extend to the end of the
267
304
* physical node.
268
305
*/
269
- if (limit - end - mem_hole_size (end , limit ) < size )
306
+ if ((limit - end - mem_hole_size (end , limit ) < size )
307
+ && !uniform )
270
308
end = limit ;
271
309
272
310
ret = emu_setup_memblk (ei , pi , nid ++ % MAX_NUMNODES ,
@@ -276,7 +314,15 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
276
314
return ret ;
277
315
}
278
316
}
279
- return 0 ;
317
+ return nid ;
318
+ }
319
+
320
+ static int __init split_nodes_size_interleave (struct numa_meminfo * ei ,
321
+ struct numa_meminfo * pi ,
322
+ u64 addr , u64 max_addr , u64 size )
323
+ {
324
+ return split_nodes_size_interleave_uniform (ei , pi , addr , max_addr , size ,
325
+ 0 , NULL , NUMA_NO_NODE );
280
326
}
281
327
282
328
int __init setup_emu2phys_nid (int * dfl_phys_nid )
@@ -346,7 +392,28 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
346
392
* the fixed node size. Otherwise, if it is just a single number N,
347
393
* split the system RAM into N fake nodes.
348
394
*/
349
- if (strchr (emu_cmdline , 'M' ) || strchr (emu_cmdline , 'G' )) {
395
+ if (strchr (emu_cmdline , 'U' )) {
396
+ nodemask_t physnode_mask = numa_nodes_parsed ;
397
+ unsigned long n ;
398
+ int nid = 0 ;
399
+
400
+ n = simple_strtoul (emu_cmdline , & emu_cmdline , 0 );
401
+ ret = -1 ;
402
+ for_each_node_mask (i , physnode_mask ) {
403
+ ret = split_nodes_size_interleave_uniform (& ei , & pi ,
404
+ pi .blk [i ].start , pi .blk [i ].end , 0 ,
405
+ n , & pi .blk [i ], nid );
406
+ if (ret < 0 )
407
+ break ;
408
+ if (ret < n ) {
409
+ pr_info ("%s: phys: %d only got %d of %ld nodes, failing\n" ,
410
+ __func__ , i , ret , n );
411
+ ret = -1 ;
412
+ break ;
413
+ }
414
+ nid = ret ;
415
+ }
416
+ } else if (strchr (emu_cmdline , 'M' ) || strchr (emu_cmdline , 'G' )) {
350
417
u64 size ;
351
418
352
419
size = memparse (emu_cmdline , & emu_cmdline );
0 commit comments