Skip to content

Commit 7ff1693

Browse files
pdxChenPeter Zijlstra
authored andcommitted
sched/fair: Implement prefer sibling imbalance calculation between asymmetric groups
In the current prefer sibling load balancing code, there is an implicit assumption that the busiest sched group and local sched group are equivalent, hence the tasks to be moved is simply the difference in number of tasks between the two groups (i.e. imbalance) divided by two. However, we may have different number of cores between the cluster groups, say when we take CPU offline or we have hybrid groups. In that case, we should balance between the two groups such that #tasks/#cores ratio is the same between the same between both groups. Hence the imbalance computed will need to reflect this. Adjust the sibling imbalance computation to take into account of the above considerations. Signed-off-by: Tim Chen <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/r/4eacbaa236e680687dae2958378a6173654113df.1688770494.git.tim.c.chen@linux.intel.com
1 parent d24cb0d commit 7ff1693

File tree

1 file changed

+37
-4
lines changed

1 file changed

+37
-4
lines changed

kernel/sched/fair.c

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9535,6 +9535,41 @@ static inline bool smt_balance(struct lb_env *env, struct sg_lb_stats *sgs,
95359535
return false;
95369536
}
95379537

9538+
static inline long sibling_imbalance(struct lb_env *env,
9539+
struct sd_lb_stats *sds,
9540+
struct sg_lb_stats *busiest,
9541+
struct sg_lb_stats *local)
9542+
{
9543+
int ncores_busiest, ncores_local;
9544+
long imbalance;
9545+
9546+
if (env->idle == CPU_NOT_IDLE || !busiest->sum_nr_running)
9547+
return 0;
9548+
9549+
ncores_busiest = sds->busiest->cores;
9550+
ncores_local = sds->local->cores;
9551+
9552+
if (ncores_busiest == ncores_local) {
9553+
imbalance = busiest->sum_nr_running;
9554+
lsub_positive(&imbalance, local->sum_nr_running);
9555+
return imbalance;
9556+
}
9557+
9558+
/* Balance such that nr_running/ncores ratio are same on both groups */
9559+
imbalance = ncores_local * busiest->sum_nr_running;
9560+
lsub_positive(&imbalance, ncores_busiest * local->sum_nr_running);
9561+
/* Normalize imbalance and do rounding on normalization */
9562+
imbalance = 2 * imbalance + ncores_local + ncores_busiest;
9563+
imbalance /= ncores_local + ncores_busiest;
9564+
9565+
/* Take advantage of resource in an empty sched group */
9566+
if (imbalance == 0 && local->sum_nr_running == 0 &&
9567+
busiest->sum_nr_running > 1)
9568+
imbalance = 2;
9569+
9570+
return imbalance;
9571+
}
9572+
95389573
static inline bool
95399574
sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
95409575
{
@@ -10393,14 +10428,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
1039310428
}
1039410429

1039510430
if (busiest->group_weight == 1 || sds->prefer_sibling) {
10396-
unsigned int nr_diff = busiest->sum_nr_running;
1039710431
/*
1039810432
* When prefer sibling, evenly spread running tasks on
1039910433
* groups.
1040010434
*/
1040110435
env->migration_type = migrate_task;
10402-
lsub_positive(&nr_diff, local->sum_nr_running);
10403-
env->imbalance = nr_diff;
10436+
env->imbalance = sibling_imbalance(env, sds, busiest, local);
1040410437
} else {
1040510438

1040610439
/*
@@ -10597,7 +10630,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
1059710630
* group's child domain.
1059810631
*/
1059910632
if (sds.prefer_sibling && local->group_type == group_has_spare &&
10600-
busiest->sum_nr_running > local->sum_nr_running + 1)
10633+
sibling_imbalance(env, &sds, busiest, local) > 1)
1060110634
goto force_balance;
1060210635

1060310636
if (busiest->group_type != group_overloaded) {

0 commit comments

Comments
 (0)