Skip to content

Commit 37c3ec2

Browse files
jlintonarmctmarinas
authored andcommitted
arm64: topology: divorce MC scheduling domain from core_siblings
Now that we have an accurate view of the physical topology we need to represent it correctly to the scheduler. Generally MC should equal the LLC in the system, but there are a number of special cases that need to be dealt with. In the case of NUMA in socket, we need to assure that the sched domain we build for the MC layer isn't larger than the DIE above it. Similarly for LLC's that might exist in cross socket interconnect or directory hardware we need to assure that MC is shrunk to the socket or NUMA node. This patch builds a sibling mask for the LLC, and then picks the smallest of LLC, socket siblings, or NUMA node siblings, which gives us the behavior described above. This is ever so slightly different than the similar alternative where we look for a cache layer less than or equal to the socket/NUMA siblings. The logic to pick the MC layer affects all arm64 machines, but only changes the behavior for DT/MPIDR systems if the NUMA domain is smaller than the core siblings (generally set to the cluster). Potentially this fixes a possible bug in DT systems, but really it only affects ACPI systems where the core siblings is correctly set to the socket siblings. Thus all currently available ACPI systems should have MC equal to LLC, including the NUMA in socket machines where the LLC is partitioned between the NUMA nodes. Tested-by: Ard Biesheuvel <[email protected]> Tested-by: Vijaya Kumar K <[email protected]> Tested-by: Xiongfeng Wang <[email protected]> Tested-by: Tomasz Nowicki <[email protected]> Acked-by: Sudeep Holla <[email protected]> Acked-by: Ard Biesheuvel <[email protected]> Acked-by: Morten Rasmussen <[email protected]> Signed-off-by: Jeremy Linton <[email protected]> Signed-off-by: Catalin Marinas <[email protected]>
1 parent bce1a65 commit 37c3ec2

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

arch/arm64/include/asm/topology.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ struct cpu_topology {
88
int thread_id;
99
int core_id;
1010
int package_id;
11+
int llc_id;
1112
cpumask_t thread_sibling;
1213
cpumask_t core_sibling;
14+
cpumask_t llc_siblings;
1315
};
1416

1517
extern struct cpu_topology cpu_topology[NR_CPUS];

arch/arm64/kernel/topology.c

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include <linux/acpi.h>
1515
#include <linux/arch_topology.h>
16+
#include <linux/cacheinfo.h>
1617
#include <linux/cpu.h>
1718
#include <linux/cpumask.h>
1819
#include <linux/init.h>
@@ -214,7 +215,19 @@ EXPORT_SYMBOL_GPL(cpu_topology);
214215

215216
const struct cpumask *cpu_coregroup_mask(int cpu)
216217
{
217-
return &cpu_topology[cpu].core_sibling;
218+
const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
219+
220+
/* Find the smaller of NUMA, core or LLC siblings */
221+
if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
222+
/* not numa in package, lets use the package siblings */
223+
core_mask = &cpu_topology[cpu].core_sibling;
224+
}
225+
if (cpu_topology[cpu].llc_id != -1) {
226+
if (cpumask_subset(&cpu_topology[cpu].llc_siblings, core_mask))
227+
core_mask = &cpu_topology[cpu].llc_siblings;
228+
}
229+
230+
return core_mask;
218231
}
219232

220233
static void update_siblings_masks(unsigned int cpuid)
@@ -226,6 +239,9 @@ static void update_siblings_masks(unsigned int cpuid)
226239
for_each_possible_cpu(cpu) {
227240
cpu_topo = &cpu_topology[cpu];
228241

242+
if (cpuid_topo->llc_id == cpu_topo->llc_id)
243+
cpumask_set_cpu(cpu, &cpuid_topo->llc_siblings);
244+
229245
if (cpuid_topo->package_id != cpu_topo->package_id)
230246
continue;
231247

@@ -291,6 +307,10 @@ static void __init reset_cpu_topology(void)
291307
cpu_topo->core_id = 0;
292308
cpu_topo->package_id = -1;
293309

310+
cpu_topo->llc_id = -1;
311+
cpumask_clear(&cpu_topo->llc_siblings);
312+
cpumask_set_cpu(cpu, &cpu_topo->llc_siblings);
313+
294314
cpumask_clear(&cpu_topo->core_sibling);
295315
cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
296316
cpumask_clear(&cpu_topo->thread_sibling);
@@ -311,6 +331,8 @@ static int __init parse_acpi_topology(void)
311331
is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
312332

313333
for_each_possible_cpu(cpu) {
334+
int i, cache_id;
335+
314336
topology_id = find_acpi_cpu_topology(cpu, 0);
315337
if (topology_id < 0)
316338
return topology_id;
@@ -325,6 +347,18 @@ static int __init parse_acpi_topology(void)
325347
}
326348
topology_id = find_acpi_cpu_topology_package(cpu);
327349
cpu_topology[cpu].package_id = topology_id;
350+
351+
i = acpi_find_last_cache_level(cpu);
352+
353+
if (i > 0) {
354+
/*
355+
* this is the only part of cpu_topology that has
356+
* a direct relationship with the cache topology
357+
*/
358+
cache_id = find_acpi_cpu_cache_topology(cpu, i);
359+
if (cache_id > 0)
360+
cpu_topology[cpu].llc_id = cache_id;
361+
}
328362
}
329363

330364
return 0;

0 commit comments

Comments
 (0)