Skip to content

Commit 95d6859

Browse files
vingu-linaroPeter Zijlstra
authored andcommitted
sched/pelt: Sync util/runnable_sum with PELT window when propagating
update_tg_cfs_*() propagate the impact of the attach/detach of an entity down into the cfs_rq hierarchy and must keep the sync with the current pelt window. Even if we can't sync child cfs_rq and its group se, we can sync the group se and its parent cfs_rq with current position in the PELT window. In fact, we must keep them sync in order to stay also synced with others entities and group entities that are already attached to the cfs_rq. Signed-off-by: Vincent Guittot <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 12aa258 commit 95d6859

File tree

2 files changed

+51
-22
lines changed

2 files changed

+51
-22
lines changed

kernel/sched/fair.c

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3441,52 +3441,46 @@ static inline void
34413441
update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
34423442
{
34433443
long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
3444+
/*
3445+
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
3446+
* See ___update_load_avg() for details.
3447+
*/
3448+
u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
34443449

34453450
/* Nothing to update */
34463451
if (!delta)
34473452
return;
34483453

3449-
/*
3450-
* The relation between sum and avg is:
3451-
*
3452-
* LOAD_AVG_MAX - 1024 + sa->period_contrib
3453-
*
3454-
* however, the PELT windows are not aligned between grq and gse.
3455-
*/
3456-
34573454
/* Set new sched_entity's utilization */
34583455
se->avg.util_avg = gcfs_rq->avg.util_avg;
3459-
se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
3456+
se->avg.util_sum = se->avg.util_avg * divider;
34603457

34613458
/* Update parent cfs_rq utilization */
34623459
add_positive(&cfs_rq->avg.util_avg, delta);
3463-
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
3460+
cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
34643461
}
34653462

34663463
static inline void
34673464
update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
34683465
{
34693466
long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
3467+
/*
3468+
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
3469+
* See ___update_load_avg() for details.
3470+
*/
3471+
u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
34703472

34713473
/* Nothing to update */
34723474
if (!delta)
34733475
return;
34743476

3475-
/*
3476-
* The relation between sum and avg is:
3477-
*
3478-
* LOAD_AVG_MAX - 1024 + sa->period_contrib
3479-
*
3480-
* however, the PELT windows are not aligned between grq and gse.
3481-
*/
3482-
34833477
/* Set new sched_entity's runnable */
34843478
se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
3485-
se->avg.runnable_sum = se->avg.runnable_avg * LOAD_AVG_MAX;
3479+
se->avg.runnable_sum = se->avg.runnable_avg * divider;
34863480

34873481
/* Update parent cfs_rq runnable */
34883482
add_positive(&cfs_rq->avg.runnable_avg, delta);
3489-
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * LOAD_AVG_MAX;
3483+
cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
34903484
}
34913485

34923486
static inline void
@@ -3496,19 +3490,26 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
34963490
unsigned long load_avg;
34973491
u64 load_sum = 0;
34983492
s64 delta_sum;
3493+
u32 divider;
34993494

35003495
if (!runnable_sum)
35013496
return;
35023497

35033498
gcfs_rq->prop_runnable_sum = 0;
35043499

3500+
/*
3501+
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
3502+
* See ___update_load_avg() for details.
3503+
*/
3504+
divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
3505+
35053506
if (runnable_sum >= 0) {
35063507
/*
35073508
* Add runnable; clip at LOAD_AVG_MAX. Reflects that until
35083509
* the CPU is saturated running == runnable.
35093510
*/
35103511
runnable_sum += se->avg.load_sum;
3511-
runnable_sum = min(runnable_sum, (long)LOAD_AVG_MAX);
3512+
runnable_sum = min_t(long, runnable_sum, divider);
35123513
} else {
35133514
/*
35143515
* Estimate the new unweighted runnable_sum of the gcfs_rq by
@@ -3533,7 +3534,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
35333534
runnable_sum = max(runnable_sum, running_sum);
35343535

35353536
load_sum = (s64)se_weight(se) * runnable_sum;
3536-
load_avg = div_s64(load_sum, LOAD_AVG_MAX);
3537+
load_avg = div_s64(load_sum, divider);
35373538

35383539
delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
35393540
delta_avg = load_avg - se->avg.load_avg;
@@ -3697,6 +3698,10 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
36973698
*/
36983699
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
36993700
{
3701+
/*
3702+
* cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
3703+
* See ___update_load_avg() for details.
3704+
*/
37003705
u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
37013706

37023707
/*

kernel/sched/pelt.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,30 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
237237
return 1;
238238
}
239239

240+
/*
241+
* When syncing *_avg with *_sum, we must take into account the current
242+
* position in the PELT segment otherwise the remaining part of the segment
243+
* will be considered as idle time whereas it's not yet elapsed and this will
244+
* generate unwanted oscillation in the range [1002..1024[.
245+
*
246+
* The max value of *_sum varies with the position in the time segment and is
247+
* equals to :
248+
*
249+
* LOAD_AVG_MAX*y + sa->period_contrib
250+
*
251+
* which can be simplified into:
252+
*
253+
* LOAD_AVG_MAX - 1024 + sa->period_contrib
254+
*
255+
* because LOAD_AVG_MAX*y == LOAD_AVG_MAX-1024
256+
*
257+
* The same care must be taken when a sched entity is added, updated or
258+
* removed from a cfs_rq and we need to update sched_avg. Scheduler entities
259+
* and the cfs rq, to which they are attached, have the same position in the
260+
* time segment because they use the same clock. This means that we can use
261+
* the period_contrib of cfs_rq when updating the sched_avg of a sched_entity
262+
* if it's more convenient.
263+
*/
240264
static __always_inline void
241265
___update_load_avg(struct sched_avg *sa, unsigned long load)
242266
{

0 commit comments

Comments
 (0)