Skip to content

Commit c722f35

Browse files
rikvanrielPeter Zijlstra
authored andcommitted
sched/fair: Bring back select_idle_smt(), but differently
Mel Gorman did some nice work in 9fe1f12 ("sched/fair: Merge select_idle_core/cpu()"), resulting in the kernel being more efficient at finding an idle CPU, and in tasks spending less time waiting to be run, both according to the schedstats run_delay numbers, and according to measured application latencies. Yay. The flip side of this is that we see more task migrations (about 30% more), higher cache misses, higher memory bandwidth utilization, and higher CPU use, for the same number of requests/second. This is most pronounced on a memcache type workload, which saw a consistent 1-3% increase in total CPU use on the system, due to those increased task migrations leading to higher L2 cache miss numbers, and higher memory utilization. The exclusive L3 cache on Skylake does us no favors there. On our web serving workload, that effect is usually negligible. It appears that the increased number of CPU migrations is generally a good thing, since it leads to lower cpu_delay numbers, reflecting the fact that tasks get to run faster. However, the reduced locality and the corresponding increase in L2 cache misses hurts a little. The patch below appears to fix the regression, while keeping the benefit of the lower cpu_delay numbers, by reintroducing select_idle_smt with a twist: when a socket has no idle cores, check to see if the sibling of "prev" is idle, before searching all the other CPUs. This fixes both the occasional 9% regression on the web serving workload, and the continuous 2% CPU use regression on the memcache type workload. With Mel's patches and this patch together, task migrations are still high, but L2 cache misses, memory bandwidth, and CPU time used are back down to what they were before. The p95 and p99 response times for the memcache type application improve by about 10% over what they were before Mel's patches got merged. Signed-off-by: Rik van Riel <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Mel Gorman <[email protected]> Acked-by: Vincent Guittot <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 6db12ee commit c722f35

File tree

1 file changed

+43
-12
lines changed

1 file changed

+43
-12
lines changed

kernel/sched/fair.c

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6038,11 +6038,9 @@ static inline bool test_idle_cores(int cpu, bool def)
60386038
{
60396039
struct sched_domain_shared *sds;
60406040

6041-
if (static_branch_likely(&sched_smt_present)) {
6042-
sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
6043-
if (sds)
6044-
return READ_ONCE(sds->has_idle_cores);
6045-
}
6041+
sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
6042+
if (sds)
6043+
return READ_ONCE(sds->has_idle_cores);
60466044

60476045
return def;
60486046
}
@@ -6112,6 +6110,24 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
61126110
return -1;
61136111
}
61146112

6113+
/*
6114+
* Scan the local SMT mask for idle CPUs.
6115+
*/
6116+
static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
6117+
{
6118+
int cpu;
6119+
6120+
for_each_cpu(cpu, cpu_smt_mask(target)) {
6121+
if (!cpumask_test_cpu(cpu, p->cpus_ptr) ||
6122+
!cpumask_test_cpu(cpu, sched_domain_span(sd)))
6123+
continue;
6124+
if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
6125+
return cpu;
6126+
}
6127+
6128+
return -1;
6129+
}
6130+
61156131
#else /* CONFIG_SCHED_SMT */
61166132

61176133
static inline void set_idle_cores(int cpu, int val)
@@ -6128,18 +6144,22 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
61286144
return __select_idle_cpu(core);
61296145
}
61306146

6147+
static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
6148+
{
6149+
return -1;
6150+
}
6151+
61316152
#endif /* CONFIG_SCHED_SMT */
61326153

61336154
/*
61346155
* Scan the LLC domain for idle CPUs; this is dynamically regulated by
61356156
* comparing the average scan cost (tracked in sd->avg_scan_cost) against the
61366157
* average idle time for this rq (as found in rq->avg_idle).
61376158
*/
6138-
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
6159+
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target)
61396160
{
61406161
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
61416162
int i, cpu, idle_cpu = -1, nr = INT_MAX;
6142-
bool smt = test_idle_cores(target, false);
61436163
int this = smp_processor_id();
61446164
struct sched_domain *this_sd;
61456165
u64 time;
@@ -6150,7 +6170,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
61506170

61516171
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
61526172

6153-
if (sched_feat(SIS_PROP) && !smt) {
6173+
if (sched_feat(SIS_PROP) && !has_idle_core) {
61546174
u64 avg_cost, avg_idle, span_avg;
61556175

61566176
/*
@@ -6170,7 +6190,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
61706190
}
61716191

61726192
for_each_cpu_wrap(cpu, cpus, target) {
6173-
if (smt) {
6193+
if (has_idle_core) {
61746194
i = select_idle_core(p, cpu, cpus, &idle_cpu);
61756195
if ((unsigned int)i < nr_cpumask_bits)
61766196
return i;
@@ -6184,10 +6204,10 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
61846204
}
61856205
}
61866206

6187-
if (smt)
6207+
if (has_idle_core)
61886208
set_idle_cores(this, false);
61896209

6190-
if (sched_feat(SIS_PROP) && !smt) {
6210+
if (sched_feat(SIS_PROP) && !has_idle_core) {
61916211
time = cpu_clock(this) - time;
61926212
update_avg(&this_sd->avg_scan_cost, time);
61936213
}
@@ -6242,6 +6262,7 @@ static inline bool asym_fits_capacity(int task_util, int cpu)
62426262
*/
62436263
static int select_idle_sibling(struct task_struct *p, int prev, int target)
62446264
{
6265+
bool has_idle_core = false;
62456266
struct sched_domain *sd;
62466267
unsigned long task_util;
62476268
int i, recent_used_cpu;
@@ -6321,7 +6342,17 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
63216342
if (!sd)
63226343
return target;
63236344

6324-
i = select_idle_cpu(p, sd, target);
6345+
if (sched_smt_active()) {
6346+
has_idle_core = test_idle_cores(target, false);
6347+
6348+
if (!has_idle_core && cpus_share_cache(prev, target)) {
6349+
i = select_idle_smt(p, sd, prev);
6350+
if ((unsigned int)i < nr_cpumask_bits)
6351+
return i;
6352+
}
6353+
}
6354+
6355+
i = select_idle_cpu(p, sd, has_idle_core, target);
63256356
if ((unsigned)i < nr_cpumask_bits)
63266357
return i;
63276358

0 commit comments

Comments
 (0)