Skip to content

Commit c1e5336

Browse files
srikardmpe
authored andcommitted
powerpc/smp: Cache CPU to chip lookup
On systems with large CPUs per node, even with the filtered matching of related CPUs, there can be large number of calls to cpu_to_chip_id for the same CPU. For example with 4096 vCPU, 1 node QEMU configuration, with 4 threads per core, system could be see upto 1024 calls to cpu_to_chip_id() for the same CPU. On a given system, cpu_to_chip_id() for a given CPU would always return the same. Hence cache the result in a lookup table for use in subsequent calls. Since all CPUs sharing the same core will belong to the same chip, the lookup_table has an entry for one CPU per core. chip_id_lookup_table is not being freed and would be used on subsequent CPU online post CPU offline. Reported-by: Daniel Henrique Barboza <[email protected]> Suggested-by: Michael Ellerman <[email protected]> Signed-off-by: Srikar Dronamraju <[email protected]> Tested-by: Daniel Henrique Barboza <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 131c82b commit c1e5336

File tree

3 files changed

+35
-6
lines changed

3 files changed

+35
-6
lines changed

arch/powerpc/include/asm/smp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id;
3131
extern bool coregroup_enabled;
3232

3333
extern int cpu_to_chip_id(int cpu);
34+
extern int *chip_id_lookup_table;
3435

3536
#ifdef CONFIG_SMP
3637

arch/powerpc/kernel/prom.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
#define DBG(fmt...)
6666
#endif
6767

68+
int *chip_id_lookup_table;
69+
6870
#ifdef CONFIG_PPC64
6971
int __initdata iommu_is_off;
7072
int __initdata iommu_force_on;
@@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id);
914916
int cpu_to_chip_id(int cpu)
915917
{
916918
struct device_node *np;
919+
int ret = -1, idx;
920+
921+
idx = cpu / threads_per_core;
922+
if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1)
923+
return chip_id_lookup_table[idx];
917924

918925
np = of_get_cpu_node(cpu, NULL);
919-
if (!np)
920-
return -1;
926+
if (np) {
927+
ret = of_get_ibm_chip_id(np);
928+
of_node_put(np);
929+
930+
if (chip_id_lookup_table)
931+
chip_id_lookup_table[idx] = ret;
932+
}
921933

922-
of_node_put(np);
923-
return of_get_ibm_chip_id(np);
934+
return ret;
924935
}
925936
EXPORT_SYMBOL(cpu_to_chip_id);
926937

arch/powerpc/kernel/smp.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
10731073
cpu_smallcore_mask(boot_cpuid));
10741074
}
10751075

1076+
if (cpu_to_chip_id(boot_cpuid) != -1) {
1077+
int idx = num_possible_cpus() / threads_per_core;
1078+
1079+
/*
1080+
* All threads of a core will all belong to the same core,
1081+
* chip_id_lookup_table will have one entry per core.
1082+
* Assumption: if boot_cpuid doesn't have a chip-id, then no
1083+
* other CPUs, will also not have chip-id.
1084+
*/
1085+
chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
1086+
if (chip_id_lookup_table)
1087+
memset(chip_id_lookup_table, -1, sizeof(int) * idx);
1088+
}
1089+
10761090
if (smp_ops && smp_ops->probe)
10771091
smp_ops->probe();
10781092
}
@@ -1468,8 +1482,8 @@ static void add_cpu_to_masks(int cpu)
14681482
{
14691483
struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
14701484
int first_thread = cpu_first_thread_sibling(cpu);
1471-
int chip_id = cpu_to_chip_id(cpu);
14721485
cpumask_var_t mask;
1486+
int chip_id = -1;
14731487
bool ret;
14741488
int i;
14751489

@@ -1492,7 +1506,10 @@ static void add_cpu_to_masks(int cpu)
14921506
if (has_coregroup_support())
14931507
update_coregroup_mask(cpu, &mask);
14941508

1495-
if (chip_id == -1 || !ret) {
1509+
if (chip_id_lookup_table && ret)
1510+
chip_id = cpu_to_chip_id(cpu);
1511+
1512+
if (chip_id == -1) {
14961513
cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
14971514
goto out;
14981515
}

0 commit comments

Comments
 (0)