Skip to content

Commit 79a8b9a

Browse files
suryasaimadhuIngo Molnar
authored andcommitted
x86/CPU/AMD: Bring back Compute Unit ID
Commit: a33d331 ("x86/CPU/AMD: Fix Bulldozer topology") restored the initial approach we had with the Fam15h topology of enumerating CU (Compute Unit) threads as cores. And this is still correct - they're beefier than HT threads but still have some shared functionality. Our current approach has a problem with the Mad Max Steam game, for example. Yves Dionne reported a certain "choppiness" while playing on v4.9.5. That problem stems most likely from the fact that the CU threads share resources within one CU and when we schedule to a thread of a different compute unit, this incurs latency due to migrating the working set to a different CU through the caches. When the thread siblings mask mirrors that aspect of the CUs and threads, the scheduler pays attention to it and tries to schedule within one CU first. Which takes care of the latency, of course. Reported-by: Yves Dionne <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Cc: <[email protected]> # 4.9 Cc: Brice Goglin <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Yazen Ghannam <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent a0a2864 commit 79a8b9a

File tree

4 files changed

+19
-4
lines changed

4 files changed

+19
-4
lines changed

arch/x86/include/asm/processor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ struct cpuinfo_x86 {
104104
__u8 x86_phys_bits;
105105
/* CPUID returned core id bits: */
106106
__u8 x86_coreid_bits;
107+
__u8 cu_id;
107108
/* Max extended CPUID function supported: */
108109
__u32 extended_cpuid_level;
109110
/* Maximum supported CPUID level, -1=no CPUID: */

arch/x86/kernel/cpu/amd.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
309309

310310
/* get information required for multi-node processors */
311311
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
312+
u32 eax, ebx, ecx, edx;
312313

313-
node_id = cpuid_ecx(0x8000001e) & 7;
314+
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
315+
316+
node_id = ecx & 0xff;
317+
smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
318+
319+
if (c->x86 == 0x15)
320+
c->cu_id = ebx & 0xff;
314321

315322
/*
316323
* We may have multiple LLCs if L3 caches exist, so check if we

arch/x86/kernel/cpu/common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
10151015
c->x86_model_id[0] = '\0'; /* Unset */
10161016
c->x86_max_cores = 1;
10171017
c->x86_coreid_bits = 0;
1018+
c->cu_id = 0xff;
10181019
#ifdef CONFIG_X86_64
10191020
c->x86_clflush_size = 64;
10201021
c->x86_phys_bits = 36;

arch/x86/kernel/smpboot.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -433,9 +433,15 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
433433
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
434434

435435
if (c->phys_proc_id == o->phys_proc_id &&
436-
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
437-
c->cpu_core_id == o->cpu_core_id)
438-
return topology_sane(c, o, "smt");
436+
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
437+
if (c->cpu_core_id == o->cpu_core_id)
438+
return topology_sane(c, o, "smt");
439+
440+
if ((c->cu_id != 0xff) &&
441+
(o->cu_id != 0xff) &&
442+
(c->cu_id == o->cu_id))
443+
return topology_sane(c, o, "smt");
444+
}
439445

440446
} else if (c->phys_proc_id == o->phys_proc_id &&
441447
c->cpu_core_id == o->cpu_core_id) {

0 commit comments

Comments
 (0)