Skip to content

Commit 41eab6f

Browse files
antonblanchardozbenh
authored andcommitted
powerpc/numa: Use form 1 affinity to setup node distance
Form 1 affinity allows multiple entries in ibm,associativity-reference-points which represent affinity domains in decreasing order of importance. The Linux concept of a node is always the first entry, but using the other values as an input to node_distance() allows the memory allocator to make better decisions on which node to go first when local memory has been exhausted. We keep things simple and create an array indexed by NUMA node, capped at 4 entries. Each time we lookup an associativity property we initialise the array which is overkill, but since we should only hit this path during boot it didn't seem worth adding a per node valid bit. Signed-off-by: Anton Blanchard <[email protected]> Signed-off-by: Benjamin Herrenschmidt <[email protected]>
1 parent a591f6b commit 41eab6f

File tree

2 files changed

+92
-33
lines changed

2 files changed

+92
-33
lines changed

arch/powerpc/include/asm/topology.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ static inline int pcibus_to_node(struct pci_bus *bus)
8787
.balance_interval = 1, \
8888
}
8989

90+
extern int __node_distance(int, int);
91+
#define node_distance(a, b) __node_distance(a, b)
92+
9093
extern void __init dump_numa_cpu_topology(void);
9194

9295
extern int sysfs_add_device_to_node(struct sys_device *dev, int nid);

arch/powerpc/mm/numa.c

Lines changed: 89 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ EXPORT_SYMBOL(node_data);
4242

4343
static int min_common_depth;
4444
static int n_mem_addr_cells, n_mem_size_cells;
45+
static int form1_affinity;
46+
47+
#define MAX_DISTANCE_REF_POINTS 4
48+
static int distance_ref_points_depth;
49+
static const unsigned int *distance_ref_points;
50+
static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
4551

4652
/*
4753
* Allocate node_to_cpumask_map based on number of available nodes
@@ -204,6 +210,39 @@ static const u32 *of_get_usable_memory(struct device_node *memory)
204210
return prop;
205211
}
206212

213+
int __node_distance(int a, int b)
214+
{
215+
int i;
216+
int distance = LOCAL_DISTANCE;
217+
218+
if (!form1_affinity)
219+
return distance;
220+
221+
for (i = 0; i < distance_ref_points_depth; i++) {
222+
if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
223+
break;
224+
225+
/* Double the distance for each NUMA level */
226+
distance *= 2;
227+
}
228+
229+
return distance;
230+
}
231+
232+
static void initialize_distance_lookup_table(int nid,
233+
const unsigned int *associativity)
234+
{
235+
int i;
236+
237+
if (!form1_affinity)
238+
return;
239+
240+
for (i = 0; i < distance_ref_points_depth; i++) {
241+
distance_lookup_table[nid][i] =
242+
associativity[distance_ref_points[i]];
243+
}
244+
}
245+
207246
/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
208247
* info is found.
209248
*/
@@ -225,6 +264,10 @@ static int of_node_to_nid_single(struct device_node *device)
225264
/* POWER4 LPAR uses 0xffff as invalid node */
226265
if (nid == 0xffff || nid >= MAX_NUMNODES)
227266
nid = -1;
267+
268+
if (nid > 0 && tmp[0] >= distance_ref_points_depth)
269+
initialize_distance_lookup_table(nid, tmp);
270+
228271
out:
229272
return nid;
230273
}
@@ -251,26 +294,10 @@ int of_node_to_nid(struct device_node *device)
251294
}
252295
EXPORT_SYMBOL_GPL(of_node_to_nid);
253296

254-
/*
255-
* In theory, the "ibm,associativity" property may contain multiple
256-
* associativity lists because a resource may be multiply connected
257-
* into the machine. This resource then has different associativity
258-
* characteristics relative to its multiple connections. We ignore
259-
* this for now. We also assume that all cpu and memory sets have
260-
* their distances represented at a common level. This won't be
261-
* true for hierarchical NUMA.
262-
*
263-
* In any case the ibm,associativity-reference-points should give
264-
* the correct depth for a normal NUMA system.
265-
*
266-
* - Dave Hansen <[email protected]>
267-
*/
268297
static int __init find_min_common_depth(void)
269298
{
270-
int depth, index;
271-
const unsigned int *ref_points;
299+
int depth;
272300
struct device_node *rtas_root;
273-
unsigned int len;
274301
struct device_node *chosen;
275302
const char *vec5;
276303

@@ -280,38 +307,67 @@ static int __init find_min_common_depth(void)
280307
return -1;
281308

282309
/*
283-
* this property is 2 32-bit integers, each representing a level of
284-
* depth in the associativity nodes. The first is for an SMP
285-
* configuration (should be all 0's) and the second is for a normal
286-
* NUMA configuration.
310+
* This property is a set of 32-bit integers, each representing
311+
* an index into the ibm,associativity nodes.
312+
*
313+
* With form 0 affinity the first integer is for an SMP configuration
314+
* (should be all 0's) and the second is for a normal NUMA
315+
* configuration. We have only one level of NUMA.
316+
*
317+
* With form 1 affinity the first integer is the most significant
318+
* NUMA boundary and the following are progressively less significant
319+
* boundaries. There can be more than one level of NUMA.
287320
*/
288-
index = 1;
289-
ref_points = of_get_property(rtas_root,
290-
"ibm,associativity-reference-points", &len);
321+
distance_ref_points = of_get_property(rtas_root,
322+
"ibm,associativity-reference-points",
323+
&distance_ref_points_depth);
324+
325+
if (!distance_ref_points) {
326+
dbg("NUMA: ibm,associativity-reference-points not found.\n");
327+
goto err;
328+
}
329+
330+
distance_ref_points_depth /= sizeof(int);
291331

292-
/*
293-
* For form 1 affinity information we want the first field
294-
*/
295332
#define VEC5_AFFINITY_BYTE 5
296333
#define VEC5_AFFINITY 0x80
297334
chosen = of_find_node_by_path("/chosen");
298335
if (chosen) {
299336
vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL);
300337
if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) {
301338
dbg("Using form 1 affinity\n");
302-
index = 0;
339+
form1_affinity = 1;
303340
}
304341
}
305342

306-
if ((len >= 2 * sizeof(unsigned int)) && ref_points) {
307-
depth = ref_points[index];
343+
if (form1_affinity) {
344+
depth = distance_ref_points[0];
308345
} else {
309-
dbg("NUMA: ibm,associativity-reference-points not found.\n");
310-
depth = -1;
346+
if (distance_ref_points_depth < 2) {
347+
printk(KERN_WARNING "NUMA: "
348+
"short ibm,associativity-reference-points\n");
349+
goto err;
350+
}
351+
352+
depth = distance_ref_points[1];
311353
}
312-
of_node_put(rtas_root);
313354

355+
/*
356+
* Warn and cap if the hardware supports more than
357+
* MAX_DISTANCE_REF_POINTS domains.
358+
*/
359+
if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
360+
printk(KERN_WARNING "NUMA: distance array capped at "
361+
"%d entries\n", MAX_DISTANCE_REF_POINTS);
362+
distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
363+
}
364+
365+
of_node_put(rtas_root);
314366
return depth;
367+
368+
err:
369+
of_node_put(rtas_root);
370+
return -1;
315371
}
316372

317373
static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)

0 commit comments

Comments
 (0)