Skip to content

Commit e5a710d

Browse files
committed
Merge tag 'sched-urgent-2023-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Fix a performance regression on large SMT systems, an Intel SMT4 balancing bug, and a topology setup bug on (Intel) hybrid processors" * tag 'sched-urgent-2023-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/sched: Restore the SD_ASYM_PACKING flag in the DIE domain sched/fair: Fix SMT4 group_smt_balance handling sched/fair: Optimize should_we_balance() for large SMT systems
2 parents e54ca3c + 108af4b commit e5a710d

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

arch/x86/kernel/smpboot.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,6 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
579579
}
580580

581581

582-
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC)
583582
static inline int x86_sched_itmt_flags(void)
584583
{
585584
return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
@@ -603,7 +602,14 @@ static int x86_cluster_flags(void)
603602
return cpu_cluster_flags() | x86_sched_itmt_flags();
604603
}
605604
#endif
606-
#endif
605+
606+
static int x86_die_flags(void)
607+
{
608+
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
609+
return x86_sched_itmt_flags();
610+
611+
return 0;
612+
}
607613

608614
/*
609615
* Set if a package/die has multiple NUMA nodes inside.
@@ -640,7 +646,7 @@ static void __init build_sched_topology(void)
640646
*/
641647
if (!x86_has_numa_in_package) {
642648
x86_topology[i++] = (struct sched_domain_topology_level){
643-
cpu_cpu_mask, SD_INIT_NAME(DIE)
649+
cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(DIE)
644650
};
645651
}
646652

kernel/sched/fair.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6619,6 +6619,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
66196619
/* Working cpumask for: load_balance, load_balance_newidle. */
66206620
static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
66216621
static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
6622+
static DEFINE_PER_CPU(cpumask_var_t, should_we_balance_tmpmask);
66226623

66236624
#ifdef CONFIG_NO_HZ_COMMON
66246625

@@ -9579,7 +9580,7 @@ static inline long sibling_imbalance(struct lb_env *env,
95799580
imbalance /= ncores_local + ncores_busiest;
95809581

95819582
/* Take advantage of resource in an empty sched group */
9582-
if (imbalance == 0 && local->sum_nr_running == 0 &&
9583+
if (imbalance <= 1 && local->sum_nr_running == 0 &&
95839584
busiest->sum_nr_running > 1)
95849585
imbalance = 2;
95859586

@@ -9767,6 +9768,15 @@ static bool update_sd_pick_busiest(struct lb_env *env,
97679768
break;
97689769

97699770
case group_smt_balance:
9771+
/*
9772+
* Check if we have spare CPUs on either SMT group to
9773+
* choose has spare or fully busy handling.
9774+
*/
9775+
if (sgs->idle_cpus != 0 || busiest->idle_cpus != 0)
9776+
goto has_spare;
9777+
9778+
fallthrough;
9779+
97709780
case group_fully_busy:
97719781
/*
97729782
* Select the fully busy group with highest avg_load. In
@@ -9806,6 +9816,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
98069816
else
98079817
return true;
98089818
}
9819+
has_spare:
98099820

98109821
/*
98119822
* Select not overloaded group with lowest number of idle cpus
@@ -10917,6 +10928,7 @@ static int active_load_balance_cpu_stop(void *data);
1091710928

1091810929
static int should_we_balance(struct lb_env *env)
1091910930
{
10931+
struct cpumask *swb_cpus = this_cpu_cpumask_var_ptr(should_we_balance_tmpmask);
1092010932
struct sched_group *sg = env->sd->groups;
1092110933
int cpu, idle_smt = -1;
1092210934

@@ -10940,8 +10952,9 @@ static int should_we_balance(struct lb_env *env)
1094010952
return 1;
1094110953
}
1094210954

10955+
cpumask_copy(swb_cpus, group_balance_mask(sg));
1094310956
/* Try to find first idle CPU */
10944-
for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
10957+
for_each_cpu_and(cpu, swb_cpus, env->cpus) {
1094510958
if (!idle_cpu(cpu))
1094610959
continue;
1094710960

@@ -10953,6 +10966,14 @@ static int should_we_balance(struct lb_env *env)
1095310966
if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !is_core_idle(cpu)) {
1095410967
if (idle_smt == -1)
1095510968
idle_smt = cpu;
10969+
/*
10970+
* If the core is not idle, and first SMT sibling which is
10971+
* idle has been found, then its not needed to check other
10972+
* SMT siblings for idleness:
10973+
*/
10974+
#ifdef CONFIG_SCHED_SMT
10975+
cpumask_andnot(swb_cpus, swb_cpus, cpu_smt_mask(cpu));
10976+
#endif
1095610977
continue;
1095710978
}
1095810979

@@ -12918,6 +12939,8 @@ __init void init_sched_fair_class(void)
1291812939
for_each_possible_cpu(i) {
1291912940
zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i));
1292012941
zalloc_cpumask_var_node(&per_cpu(select_rq_mask, i), GFP_KERNEL, cpu_to_node(i));
12942+
zalloc_cpumask_var_node(&per_cpu(should_we_balance_tmpmask, i),
12943+
GFP_KERNEL, cpu_to_node(i));
1292112944

1292212945
#ifdef CONFIG_CFS_BANDWIDTH
1292312946
INIT_CSD(&cpu_rq(i)->cfsb_csd, __cfsb_csd_unthrottle, cpu_rq(i));

0 commit comments

Comments
 (0)