Skip to content

Commit 8ff6d47

Browse files
committed
Merge tag 'sched_urgent_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Borislav Petkov: - Do not adjust the weight of empty group entities and avoid scheduling artifacts - Avoid scheduling lag by computing lag properly and thus address an EEVDF entity placement issue * tag 'sched_urgent_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Fix update_cfs_group() vs DELAY_DEQUEUE sched/fair: Fix EEVDF entity placement bug causing scheduling lag
2 parents fda5e3f + 66951e4 commit 8ff6d47

File tree

1 file changed

+23
-128
lines changed

1 file changed

+23
-128
lines changed

kernel/sched/fair.c

Lines changed: 23 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -689,21 +689,16 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
689689
*
690690
* XXX could add max_slice to the augmented data to track this.
691691
*/
692-
static s64 entity_lag(u64 avruntime, struct sched_entity *se)
692+
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
693693
{
694694
s64 vlag, limit;
695695

696-
vlag = avruntime - se->vruntime;
697-
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
698-
699-
return clamp(vlag, -limit, limit);
700-
}
701-
702-
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
703-
{
704696
SCHED_WARN_ON(!se->on_rq);
705697

706-
se->vlag = entity_lag(avg_vruntime(cfs_rq), se);
698+
vlag = avg_vruntime(cfs_rq) - se->vruntime;
699+
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
700+
701+
se->vlag = clamp(vlag, -limit, limit);
707702
}
708703

709704
/*
@@ -3774,137 +3769,32 @@ static inline void
37743769
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
37753770
#endif
37763771

3777-
static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
3778-
unsigned long weight)
3779-
{
3780-
unsigned long old_weight = se->load.weight;
3781-
s64 vlag, vslice;
3782-
3783-
/*
3784-
* VRUNTIME
3785-
* --------
3786-
*
3787-
* COROLLARY #1: The virtual runtime of the entity needs to be
3788-
* adjusted if re-weight at !0-lag point.
3789-
*
3790-
* Proof: For contradiction assume this is not true, so we can
3791-
* re-weight without changing vruntime at !0-lag point.
3792-
*
3793-
* Weight VRuntime Avg-VRuntime
3794-
* before w v V
3795-
* after w' v' V'
3796-
*
3797-
* Since lag needs to be preserved through re-weight:
3798-
*
3799-
* lag = (V - v)*w = (V'- v')*w', where v = v'
3800-
* ==> V' = (V - v)*w/w' + v (1)
3801-
*
3802-
* Let W be the total weight of the entities before reweight,
3803-
* since V' is the new weighted average of entities:
3804-
*
3805-
* V' = (WV + w'v - wv) / (W + w' - w) (2)
3806-
*
3807-
* by using (1) & (2) we obtain:
3808-
*
3809-
* (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
3810-
* ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
3811-
* ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
3812-
* ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
3813-
*
3814-
* Since we are doing at !0-lag point which means V != v, we
3815-
* can simplify (3):
3816-
*
3817-
* ==> W / (W + w' - w) = w / w'
3818-
* ==> Ww' = Ww + ww' - ww
3819-
* ==> W * (w' - w) = w * (w' - w)
3820-
* ==> W = w (re-weight indicates w' != w)
3821-
*
3822-
* So the cfs_rq contains only one entity, hence vruntime of
3823-
* the entity @v should always equal to the cfs_rq's weighted
3824-
* average vruntime @V, which means we will always re-weight
3825-
* at 0-lag point, thus breach assumption. Proof completed.
3826-
*
3827-
*
3828-
* COROLLARY #2: Re-weight does NOT affect weighted average
3829-
* vruntime of all the entities.
3830-
*
3831-
* Proof: According to corollary #1, Eq. (1) should be:
3832-
*
3833-
* (V - v)*w = (V' - v')*w'
3834-
* ==> v' = V' - (V - v)*w/w' (4)
3835-
*
3836-
* According to the weighted average formula, we have:
3837-
*
3838-
* V' = (WV - wv + w'v') / (W - w + w')
3839-
* = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
3840-
* = (WV - wv + w'V' - Vw + wv) / (W - w + w')
3841-
* = (WV + w'V' - Vw) / (W - w + w')
3842-
*
3843-
* ==> V'*(W - w + w') = WV + w'V' - Vw
3844-
* ==> V' * (W - w) = (W - w) * V (5)
3845-
*
3846-
* If the entity is the only one in the cfs_rq, then reweight
3847-
* always occurs at 0-lag point, so V won't change. Or else
3848-
* there are other entities, hence W != w, then Eq. (5) turns
3849-
* into V' = V. So V won't change in either case, proof done.
3850-
*
3851-
*
3852-
* So according to corollary #1 & #2, the effect of re-weight
3853-
* on vruntime should be:
3854-
*
3855-
* v' = V' - (V - v) * w / w' (4)
3856-
* = V - (V - v) * w / w'
3857-
* = V - vl * w / w'
3858-
* = V - vl'
3859-
*/
3860-
if (avruntime != se->vruntime) {
3861-
vlag = entity_lag(avruntime, se);
3862-
vlag = div_s64(vlag * old_weight, weight);
3863-
se->vruntime = avruntime - vlag;
3864-
}
3865-
3866-
/*
3867-
* DEADLINE
3868-
* --------
3869-
*
3870-
* When the weight changes, the virtual time slope changes and
3871-
* we should adjust the relative virtual deadline accordingly.
3872-
*
3873-
* d' = v' + (d - v)*w/w'
3874-
* = V' - (V - v)*w/w' + (d - v)*w/w'
3875-
* = V - (V - v)*w/w' + (d - v)*w/w'
3876-
* = V + (d - V)*w/w'
3877-
*/
3878-
vslice = (s64)(se->deadline - avruntime);
3879-
vslice = div_s64(vslice * old_weight, weight);
3880-
se->deadline = avruntime + vslice;
3881-
}
3772+
static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags);
38823773

38833774
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
38843775
unsigned long weight)
38853776
{
38863777
bool curr = cfs_rq->curr == se;
3887-
u64 avruntime;
38883778

38893779
if (se->on_rq) {
38903780
/* commit outstanding execution time */
38913781
update_curr(cfs_rq);
3892-
avruntime = avg_vruntime(cfs_rq);
3782+
update_entity_lag(cfs_rq, se);
3783+
se->deadline -= se->vruntime;
3784+
se->rel_deadline = 1;
38933785
if (!curr)
38943786
__dequeue_entity(cfs_rq, se);
38953787
update_load_sub(&cfs_rq->load, se->load.weight);
38963788
}
38973789
dequeue_load_avg(cfs_rq, se);
38983790

3899-
if (se->on_rq) {
3900-
reweight_eevdf(se, avruntime, weight);
3901-
} else {
3902-
/*
3903-
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
3904-
* we need to scale se->vlag when w_i changes.
3905-
*/
3906-
se->vlag = div_s64(se->vlag * se->load.weight, weight);
3907-
}
3791+
/*
3792+
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
3793+
* we need to scale se->vlag when w_i changes.
3794+
*/
3795+
se->vlag = div_s64(se->vlag * se->load.weight, weight);
3796+
if (se->rel_deadline)
3797+
se->deadline = div_s64(se->deadline * se->load.weight, weight);
39083798

39093799
update_load_set(&se->load, weight);
39103800

@@ -3919,6 +3809,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
39193809
enqueue_load_avg(cfs_rq, se);
39203810
if (se->on_rq) {
39213811
update_load_add(&cfs_rq->load, se->load.weight);
3812+
place_entity(cfs_rq, se, 0);
39223813
if (!curr)
39233814
__enqueue_entity(cfs_rq, se);
39243815

@@ -4065,7 +3956,11 @@ static void update_cfs_group(struct sched_entity *se)
40653956
struct cfs_rq *gcfs_rq = group_cfs_rq(se);
40663957
long shares;
40673958

4068-
if (!gcfs_rq)
3959+
/*
3960+
* When a group becomes empty, preserve its weight. This matters for
3961+
* DELAY_DEQUEUE.
3962+
*/
3963+
if (!gcfs_rq || !gcfs_rq->load.weight)
40693964
return;
40703965

40713966
if (throttled_hierarchy(gcfs_rq))
@@ -5359,7 +5254,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
53595254

53605255
se->vruntime = vruntime - lag;
53615256

5362-
if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
5257+
if (se->rel_deadline) {
53635258
se->deadline += se->vruntime;
53645259
se->rel_deadline = 0;
53655260
return;

0 commit comments

Comments
 (0)