Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit 6d71a9c

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/fair: Fix EEVDF entity placement bug causing scheduling lag
I noticed this in my traces today: turbostat-1222 [006] d..2. 311.935649: reweight_entity: (ffff888108f13e00-ffff88885ef38440-6) { weight: 1048576 avg_vruntime: 3184159639071 vruntime: 3184159640194 (-1123) deadline: 3184162621107 } -> { weight: 2 avg_vruntime: 3184177463330 vruntime: 3184748414495 (-570951165) deadline: 4747605329439 } turbostat-1222 [006] d..2. 311.935651: reweight_entity: (ffff888108f13e00-ffff88885ef38440-6) { weight: 2 avg_vruntime: 3184177463330 vruntime: 3184748414495 (-570951165) deadline: 4747605329439 } -> { weight: 1048576 avg_vruntime: 3184176414812 vruntime: 3184177464419 (-1049607) deadline: 3184180445332 } Which is a weight transition: 1048576 -> 2 -> 1048576. One would expect the lag to shoot out *AND* come back, notably: -1123*1048576/2 = -588775424 -588775424*2/1048576 = -1123 Except the trace shows it is all off. Worse, subsequent cycles shoot it out further and further. This made me have a very hard look at reweight_entity(), and specifically the ->on_rq case, which is more prominent with DELAY_DEQUEUE. And indeed, it is all sorts of broken. While the computation of the new lag is correct, the computation for the new vruntime, using the new lag is broken for it does not consider the logic set out in place_entity(). With the below patch, I now see things like: migration/12-55 [012] d..3. 309.006650: reweight_entity: (ffff8881e0e6f600-ffff88885f235f40-12) { weight: 977582 avg_vruntime: 4860513347366 vruntime: 4860513347908 (-542) deadline: 4860516552475 } -> { weight: 2 avg_vruntime: 4860528915984 vruntime: 4860793840706 (-264924722) deadline: 6427157349203 } migration/14-62 [014] d..3. 309.006698: reweight_entity: (ffff8881e0e6cc00-ffff88885f3b5f40-15) { weight: 2 avg_vruntime: 4874472992283 vruntime: 4939833828823 (-65360836540) deadline: 6316614641111 } -> { weight: 967149 avg_vruntime: 4874217684324 vruntime: 4874217688559 (-4235) deadline: 4874220535650 } Which isn't perfect yet, but much closer. Reported-by: Doug Smythies <[email protected]> Reported-by: Ingo Molnar <[email protected]> Tested-by: Ingo Molnar <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Fixes: eab03c2 ("sched/eevdf: Fix vruntime adjustment on reweight") Link: https://lore.kernel.org/r/[email protected]
1 parent eea6e4b commit 6d71a9c

File tree

1 file changed

+18
-127
lines changed

1 file changed

+18
-127
lines changed

kernel/sched/fair.c

Lines changed: 18 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -689,21 +689,16 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
689689
*
690690
* XXX could add max_slice to the augmented data to track this.
691691
*/
692-
static s64 entity_lag(u64 avruntime, struct sched_entity *se)
692+
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
693693
{
694694
s64 vlag, limit;
695695

696-
vlag = avruntime - se->vruntime;
697-
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
698-
699-
return clamp(vlag, -limit, limit);
700-
}
701-
702-
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
703-
{
704696
SCHED_WARN_ON(!se->on_rq);
705697

706-
se->vlag = entity_lag(avg_vruntime(cfs_rq), se);
698+
vlag = avg_vruntime(cfs_rq) - se->vruntime;
699+
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
700+
701+
se->vlag = clamp(vlag, -limit, limit);
707702
}
708703

709704
/*
@@ -3774,137 +3769,32 @@ static inline void
37743769
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
37753770
#endif
37763771

3777-
static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
3778-
unsigned long weight)
3779-
{
3780-
unsigned long old_weight = se->load.weight;
3781-
s64 vlag, vslice;
3782-
3783-
/*
3784-
* VRUNTIME
3785-
* --------
3786-
*
3787-
* COROLLARY #1: The virtual runtime of the entity needs to be
3788-
* adjusted if re-weight at !0-lag point.
3789-
*
3790-
* Proof: For contradiction assume this is not true, so we can
3791-
* re-weight without changing vruntime at !0-lag point.
3792-
*
3793-
* Weight VRuntime Avg-VRuntime
3794-
* before w v V
3795-
* after w' v' V'
3796-
*
3797-
* Since lag needs to be preserved through re-weight:
3798-
*
3799-
* lag = (V - v)*w = (V'- v')*w', where v = v'
3800-
* ==> V' = (V - v)*w/w' + v (1)
3801-
*
3802-
* Let W be the total weight of the entities before reweight,
3803-
* since V' is the new weighted average of entities:
3804-
*
3805-
* V' = (WV + w'v - wv) / (W + w' - w) (2)
3806-
*
3807-
* by using (1) & (2) we obtain:
3808-
*
3809-
* (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
3810-
* ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
3811-
* ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
3812-
* ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
3813-
*
3814-
* Since we are doing at !0-lag point which means V != v, we
3815-
* can simplify (3):
3816-
*
3817-
* ==> W / (W + w' - w) = w / w'
3818-
* ==> Ww' = Ww + ww' - ww
3819-
* ==> W * (w' - w) = w * (w' - w)
3820-
* ==> W = w (re-weight indicates w' != w)
3821-
*
3822-
* So the cfs_rq contains only one entity, hence vruntime of
3823-
* the entity @v should always equal to the cfs_rq's weighted
3824-
* average vruntime @V, which means we will always re-weight
3825-
* at 0-lag point, thus breach assumption. Proof completed.
3826-
*
3827-
*
3828-
* COROLLARY #2: Re-weight does NOT affect weighted average
3829-
* vruntime of all the entities.
3830-
*
3831-
* Proof: According to corollary #1, Eq. (1) should be:
3832-
*
3833-
* (V - v)*w = (V' - v')*w'
3834-
* ==> v' = V' - (V - v)*w/w' (4)
3835-
*
3836-
* According to the weighted average formula, we have:
3837-
*
3838-
* V' = (WV - wv + w'v') / (W - w + w')
3839-
* = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
3840-
* = (WV - wv + w'V' - Vw + wv) / (W - w + w')
3841-
* = (WV + w'V' - Vw) / (W - w + w')
3842-
*
3843-
* ==> V'*(W - w + w') = WV + w'V' - Vw
3844-
* ==> V' * (W - w) = (W - w) * V (5)
3845-
*
3846-
* If the entity is the only one in the cfs_rq, then reweight
3847-
* always occurs at 0-lag point, so V won't change. Or else
3848-
* there are other entities, hence W != w, then Eq. (5) turns
3849-
* into V' = V. So V won't change in either case, proof done.
3850-
*
3851-
*
3852-
* So according to corollary #1 & #2, the effect of re-weight
3853-
* on vruntime should be:
3854-
*
3855-
* v' = V' - (V - v) * w / w' (4)
3856-
* = V - (V - v) * w / w'
3857-
* = V - vl * w / w'
3858-
* = V - vl'
3859-
*/
3860-
if (avruntime != se->vruntime) {
3861-
vlag = entity_lag(avruntime, se);
3862-
vlag = div_s64(vlag * old_weight, weight);
3863-
se->vruntime = avruntime - vlag;
3864-
}
3865-
3866-
/*
3867-
* DEADLINE
3868-
* --------
3869-
*
3870-
* When the weight changes, the virtual time slope changes and
3871-
* we should adjust the relative virtual deadline accordingly.
3872-
*
3873-
* d' = v' + (d - v)*w/w'
3874-
* = V' - (V - v)*w/w' + (d - v)*w/w'
3875-
* = V - (V - v)*w/w' + (d - v)*w/w'
3876-
* = V + (d - V)*w/w'
3877-
*/
3878-
vslice = (s64)(se->deadline - avruntime);
3879-
vslice = div_s64(vslice * old_weight, weight);
3880-
se->deadline = avruntime + vslice;
3881-
}
3772+
static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags);
38823773

38833774
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
38843775
unsigned long weight)
38853776
{
38863777
bool curr = cfs_rq->curr == se;
3887-
u64 avruntime;
38883778

38893779
if (se->on_rq) {
38903780
/* commit outstanding execution time */
38913781
update_curr(cfs_rq);
3892-
avruntime = avg_vruntime(cfs_rq);
3782+
update_entity_lag(cfs_rq, se);
3783+
se->deadline -= se->vruntime;
3784+
se->rel_deadline = 1;
38933785
if (!curr)
38943786
__dequeue_entity(cfs_rq, se);
38953787
update_load_sub(&cfs_rq->load, se->load.weight);
38963788
}
38973789
dequeue_load_avg(cfs_rq, se);
38983790

3899-
if (se->on_rq) {
3900-
reweight_eevdf(se, avruntime, weight);
3901-
} else {
3902-
/*
3903-
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
3904-
* we need to scale se->vlag when w_i changes.
3905-
*/
3906-
se->vlag = div_s64(se->vlag * se->load.weight, weight);
3907-
}
3791+
/*
3792+
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
3793+
* we need to scale se->vlag when w_i changes.
3794+
*/
3795+
se->vlag = div_s64(se->vlag * se->load.weight, weight);
3796+
if (se->rel_deadline)
3797+
se->deadline = div_s64(se->deadline * se->load.weight, weight);
39083798

39093799
update_load_set(&se->load, weight);
39103800

@@ -3919,6 +3809,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
39193809
enqueue_load_avg(cfs_rq, se);
39203810
if (se->on_rq) {
39213811
update_load_add(&cfs_rq->load, se->load.weight);
3812+
place_entity(cfs_rq, se, 0);
39223813
if (!curr)
39233814
__enqueue_entity(cfs_rq, se);
39243815

@@ -5359,7 +5250,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
53595250

53605251
se->vruntime = vruntime - lag;
53615252

5362-
if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
5253+
if (se->rel_deadline) {
53635254
se->deadline += se->vruntime;
53645255
se->rel_deadline = 0;
53655256
return;

0 commit comments

Comments
 (0)