Skip to content

Commit b4c9c9f

Browse files
vingu-linaroPeter Zijlstra
authored andcommitted
sched/fair: Prefer prev cpu in asymmetric wakeup path
During fast wakeup path, scheduler always check whether local or prev cpus are good candidates for the task before looking for other cpus in the domain. With commit b7a3316 ("sched/fair: Add asymmetric CPU capacity wakeup scan") the heterogenous system gains a dedicated path but doesn't try to reuse prev cpu whenever possible. If the previous cpu is idle and belong to the LLC domain, we should check it 1st before looking for another cpu because it stays one of the best candidate and this also stabilizes task placement on the system. This change aligns asymmetric path behavior with symmetric one and reduces cases where the task migrates across all cpus of the sd_asym_cpucapacity domains at wakeup. This change does not impact normal EAS mode but only the overloaded case or when EAS is not used. - On hikey960 with performance governor (EAS disable) ./perf bench sched pipe -T -l 50000 mainline w/ patch # migrations 999364 0 ops/sec 149313(+/-0.28%) 182587(+/- 0.40) +22% - On hikey with performance governor ./perf bench sched pipe -T -l 50000 mainline w/ patch # migrations 0 0 ops/sec 47721(+/-0.76%) 47899(+/- 0.56) +0.4% According to test on hikey, the patch doesn't impact symmetric system compared to current implementation (only tested on arm64) Also read the uclamped value of task's utilization at most twice instead instead each time we compare task's utilization with cpu's capacity. Fixes: b7a3316 ("sched/fair: Add asymmetric CPU capacity wakeup scan") Signed-off-by: Vincent Guittot <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: Dietmar Eggemann <[email protected]> Reviewed-by: Valentin Schneider <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 16b0a7a commit b4c9c9f

File tree

1 file changed

+43
-24
lines changed

1 file changed

+43
-24
lines changed

kernel/sched/fair.c

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6172,21 +6172,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
61726172
static int
61736173
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
61746174
{
6175-
unsigned long best_cap = 0;
6175+
unsigned long task_util, best_cap = 0;
61766176
int cpu, best_cpu = -1;
61776177
struct cpumask *cpus;
61786178

6179-
sync_entity_load_avg(&p->se);
6180-
61816179
cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
61826180
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
61836181

6182+
task_util = uclamp_task_util(p);
6183+
61846184
for_each_cpu_wrap(cpu, cpus, target) {
61856185
unsigned long cpu_cap = capacity_of(cpu);
61866186

61876187
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
61886188
continue;
6189-
if (task_fits_capacity(p, cpu_cap))
6189+
if (fits_capacity(task_util, cpu_cap))
61906190
return cpu;
61916191

61926192
if (cpu_cap > best_cap) {
@@ -6198,44 +6198,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
61986198
return best_cpu;
61996199
}
62006200

6201+
static inline bool asym_fits_capacity(int task_util, int cpu)
6202+
{
6203+
if (static_branch_unlikely(&sched_asym_cpucapacity))
6204+
return fits_capacity(task_util, capacity_of(cpu));
6205+
6206+
return true;
6207+
}
6208+
62016209
/*
62026210
* Try and locate an idle core/thread in the LLC cache domain.
62036211
*/
62046212
static int select_idle_sibling(struct task_struct *p, int prev, int target)
62056213
{
62066214
struct sched_domain *sd;
6215+
unsigned long task_util;
62076216
int i, recent_used_cpu;
62086217

62096218
/*
6210-
* For asymmetric CPU capacity systems, our domain of interest is
6211-
* sd_asym_cpucapacity rather than sd_llc.
6219+
* On asymmetric system, update task utilization because we will check
6220+
* that the task fits with cpu's capacity.
62126221
*/
62136222
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
6214-
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
6215-
/*
6216-
* On an asymmetric CPU capacity system where an exclusive
6217-
* cpuset defines a symmetric island (i.e. one unique
6218-
* capacity_orig value through the cpuset), the key will be set
6219-
* but the CPUs within that cpuset will not have a domain with
6220-
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
6221-
* capacity path.
6222-
*/
6223-
if (!sd)
6224-
goto symmetric;
6225-
6226-
i = select_idle_capacity(p, sd, target);
6227-
return ((unsigned)i < nr_cpumask_bits) ? i : target;
6223+
sync_entity_load_avg(&p->se);
6224+
task_util = uclamp_task_util(p);
62286225
}
62296226

6230-
symmetric:
6231-
if (available_idle_cpu(target) || sched_idle_cpu(target))
6227+
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
6228+
asym_fits_capacity(task_util, target))
62326229
return target;
62336230

62346231
/*
62356232
* If the previous CPU is cache affine and idle, don't be stupid:
62366233
*/
62376234
if (prev != target && cpus_share_cache(prev, target) &&
6238-
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
6235+
(available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
6236+
asym_fits_capacity(task_util, prev))
62396237
return prev;
62406238

62416239
/*
@@ -6258,7 +6256,8 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
62586256
recent_used_cpu != target &&
62596257
cpus_share_cache(recent_used_cpu, target) &&
62606258
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
6261-
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
6259+
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
6260+
asym_fits_capacity(task_util, recent_used_cpu)) {
62626261
/*
62636262
* Replace recent_used_cpu with prev as it is a potential
62646263
* candidate for the next wake:
@@ -6267,6 +6266,26 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
62676266
return recent_used_cpu;
62686267
}
62696268

6269+
/*
6270+
* For asymmetric CPU capacity systems, our domain of interest is
6271+
* sd_asym_cpucapacity rather than sd_llc.
6272+
*/
6273+
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
6274+
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
6275+
/*
6276+
* On an asymmetric CPU capacity system where an exclusive
6277+
* cpuset defines a symmetric island (i.e. one unique
6278+
* capacity_orig value through the cpuset), the key will be set
6279+
* but the CPUs within that cpuset will not have a domain with
6280+
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
6281+
* capacity path.
6282+
*/
6283+
if (sd) {
6284+
i = select_idle_capacity(p, sd, target);
6285+
return ((unsigned)i < nr_cpumask_bits) ? i : target;
6286+
}
6287+
}
6288+
62706289
sd = rcu_dereference(per_cpu(sd_llc, target));
62716290
if (!sd)
62726291
return target;

0 commit comments

Comments
 (0)