Skip to content

Commit a7b359f

Browse files
odinugePeter Zijlstra
authored andcommitted
sched/fair: Correctly insert cfs_rq's to list on unthrottle
Fix an issue where fairness is decreased since cfs_rq's can end up not being decayed properly. For two sibling control groups with the same priority, this can often lead to a load ratio of 99/1 (!!). This happens because when a cfs_rq is throttled, all the descendant cfs_rq's will be removed from the leaf list. When they initial cfs_rq is unthrottled, it will currently only re add descendant cfs_rq's if they have one or more entities enqueued. This is not a perfect heuristic. Instead, we insert all cfs_rq's that contain one or more enqueued entities, or it its load is not completely decayed. Can often lead to situations like this for equally weighted control groups: $ ps u -C stress USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 10009 88.8 0.0 3676 100 pts/1 R+ 11:04 0:13 stress --cpu 1 root 10023 3.0 0.0 3676 104 pts/1 R+ 11:04 0:00 stress --cpu 1 Fixes: 31bc6ae ("sched/fair: Optimize update_blocked_averages()") [vingo: !SMP build fix] Signed-off-by: Odin Ugedal <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Vincent Guittot <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 009c9aa commit a7b359f

File tree

1 file changed

+25
-19
lines changed

1 file changed

+25
-19
lines changed

kernel/sched/fair.c

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3298,6 +3298,24 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
32983298

32993299
#ifdef CONFIG_SMP
33003300
#ifdef CONFIG_FAIR_GROUP_SCHED
3301+
3302+
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
3303+
{
3304+
if (cfs_rq->load.weight)
3305+
return false;
3306+
3307+
if (cfs_rq->avg.load_sum)
3308+
return false;
3309+
3310+
if (cfs_rq->avg.util_sum)
3311+
return false;
3312+
3313+
if (cfs_rq->avg.runnable_sum)
3314+
return false;
3315+
3316+
return true;
3317+
}
3318+
33013319
/**
33023320
* update_tg_load_avg - update the tg's load avg
33033321
* @cfs_rq: the cfs_rq whose avg changed
@@ -4091,6 +4109,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
40914109

40924110
#else /* CONFIG_SMP */
40934111

4112+
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
4113+
{
4114+
return true;
4115+
}
4116+
40944117
#define UPDATE_TG 0x0
40954118
#define SKIP_AGE_LOAD 0x0
40964119
#define DO_ATTACH 0x0
@@ -4749,8 +4772,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
47494772
cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
47504773
cfs_rq->throttled_clock_task;
47514774

4752-
/* Add cfs_rq with already running entity in the list */
4753-
if (cfs_rq->nr_running >= 1)
4775+
/* Add cfs_rq with load or one or more already running entities to the list */
4776+
if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
47544777
list_add_leaf_cfs_rq(cfs_rq);
47554778
}
47564779

@@ -7996,23 +8019,6 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
79968019

79978020
#ifdef CONFIG_FAIR_GROUP_SCHED
79988021

7999-
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
8000-
{
8001-
if (cfs_rq->load.weight)
8002-
return false;
8003-
8004-
if (cfs_rq->avg.load_sum)
8005-
return false;
8006-
8007-
if (cfs_rq->avg.util_sum)
8008-
return false;
8009-
8010-
if (cfs_rq->avg.runnable_sum)
8011-
return false;
8012-
8013-
return true;
8014-
}
8015-
80168022
static bool __update_blocked_fair(struct rq *rq, bool *done)
80178023
{
80188024
struct cfs_rq *cfs_rq, *pos;

0 commit comments

Comments
 (0)