Skip to content

Commit 89ee048

Browse files
vingu-linaroIngo Molnar
authored andcommitted
sched/core: Fix group_entity's share update
The update of the share of a cfs_rq is done when its load_avg is updated but before the group_entity's load_avg has been updated for the past time slot. This generates wrong load_avg accounting which can be significant when small tasks are involved in the scheduling. Let take the example of a task a that is dequeued of its task group A: root (cfs_rq) \ (se) A (cfs_rq) \ (se) a Task "a" was the only task in task group A which becomes idle when a is dequeued. We have the sequence: - dequeue_entity a->se - update_load_avg(a->se) - dequeue_entity_load_avg(A->cfs_rq, a->se) - update_cfs_shares(A->cfs_rq) A->cfs_rq->load.weight == 0 A->se->load.weight is updated with the new share (0 in this case) - dequeue_entity A->se - update_load_avg(A->se) but its weight is now null so the last time slot (up to a tick) will be accounted with a weight of 0 instead of its real weight during the time slot. The last time slot will be accounted as an idle one whereas it was a running one. If the running time of task a is short enough that no tick happens when it runs, all running time of group entity A->se will be accounted as idle time. Instead, we should update the share of a cfs_rq (in fact the weight of its group entity) only after having updated the load_avg of the group_entity. update_cfs_shares() now takes the sched_entity as a parameter instead of the cfs_rq, and the weight of the group_entity is updated only once its load_avg has been synced with current time. Signed-off-by: Vincent Guittot <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Mike Galbraith <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent da9647e commit 89ee048

File tree

1 file changed

+37
-13
lines changed

1 file changed

+37
-13
lines changed

kernel/sched/fair.c

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2689,16 +2689,20 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
26892689

26902690
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
26912691

2692-
static void update_cfs_shares(struct cfs_rq *cfs_rq)
2692+
static void update_cfs_shares(struct sched_entity *se)
26932693
{
2694+
struct cfs_rq *cfs_rq = group_cfs_rq(se);
26942695
struct task_group *tg;
2695-
struct sched_entity *se;
26962696
long shares;
26972697

2698-
tg = cfs_rq->tg;
2699-
se = tg->se[cpu_of(rq_of(cfs_rq))];
2700-
if (!se || throttled_hierarchy(cfs_rq))
2698+
if (!cfs_rq)
2699+
return;
2700+
2701+
if (throttled_hierarchy(cfs_rq))
27012702
return;
2703+
2704+
tg = cfs_rq->tg;
2705+
27022706
#ifndef CONFIG_SMP
27032707
if (likely(se->load.weight == tg->shares))
27042708
return;
@@ -2707,8 +2711,9 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
27072711

27082712
reweight_entity(cfs_rq_of(se), se, shares);
27092713
}
2714+
27102715
#else /* CONFIG_FAIR_GROUP_SCHED */
2711-
static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
2716+
static inline void update_cfs_shares(struct sched_entity *se)
27122717
{
27132718
}
27142719
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -3582,10 +3587,18 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
35823587
if (renorm && !curr)
35833588
se->vruntime += cfs_rq->min_vruntime;
35843589

3590+
/*
3591+
* When enqueuing a sched_entity, we must:
3592+
* - Update loads to have both entity and cfs_rq synced with now.
3593+
* - Add its load to cfs_rq->runnable_avg
3594+
* - For group_entity, update its weight to reflect the new share of
3595+
* its group cfs_rq
3596+
* - Add its new weight to cfs_rq->load.weight
3597+
*/
35853598
update_load_avg(se, UPDATE_TG);
35863599
enqueue_entity_load_avg(cfs_rq, se);
3600+
update_cfs_shares(se);
35873601
account_entity_enqueue(cfs_rq, se);
3588-
update_cfs_shares(cfs_rq);
35893602

35903603
if (flags & ENQUEUE_WAKEUP)
35913604
place_entity(cfs_rq, se, 0);
@@ -3657,6 +3670,15 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
36573670
* Update run-time statistics of the 'current'.
36583671
*/
36593672
update_curr(cfs_rq);
3673+
3674+
/*
3675+
* When dequeuing a sched_entity, we must:
3676+
* - Update loads to have both entity and cfs_rq synced with now.
3677+
* - Substract its load from the cfs_rq->runnable_avg.
3678+
* - Substract its previous weight from cfs_rq->load.weight.
3679+
* - For group entity, update its weight to reflect the new share
3680+
* of its group cfs_rq.
3681+
*/
36603682
update_load_avg(se, UPDATE_TG);
36613683
dequeue_entity_load_avg(cfs_rq, se);
36623684

@@ -3681,7 +3703,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
36813703
/* return excess runtime on last dequeue */
36823704
return_cfs_rq_runtime(cfs_rq);
36833705

3684-
update_cfs_shares(cfs_rq);
3706+
update_cfs_shares(se);
36853707

36863708
/*
36873709
* Now advance min_vruntime if @se was the entity holding it back,
@@ -3864,7 +3886,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
38643886
* Ensure that runnable average is periodically updated.
38653887
*/
38663888
update_load_avg(curr, UPDATE_TG);
3867-
update_cfs_shares(cfs_rq);
3889+
update_cfs_shares(curr);
38683890

38693891
#ifdef CONFIG_SCHED_HRTICK
38703892
/*
@@ -4761,7 +4783,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
47614783
break;
47624784

47634785
update_load_avg(se, UPDATE_TG);
4764-
update_cfs_shares(cfs_rq);
4786+
update_cfs_shares(se);
47654787
}
47664788

47674789
if (!se)
@@ -4820,7 +4842,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
48204842
break;
48214843

48224844
update_load_avg(se, UPDATE_TG);
4823-
update_cfs_shares(cfs_rq);
4845+
update_cfs_shares(se);
48244846
}
48254847

48264848
if (!se)
@@ -9362,8 +9384,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
93629384

93639385
/* Possible calls to update_curr() need rq clock */
93649386
update_rq_clock(rq);
9365-
for_each_sched_entity(se)
9366-
update_cfs_shares(group_cfs_rq(se));
9387+
for_each_sched_entity(se) {
9388+
update_load_avg(se, UPDATE_TG);
9389+
update_cfs_shares(se);
9390+
}
93679391
raw_spin_unlock_irqrestore(&rq->lock, flags);
93689392
}
93699393

0 commit comments

Comments
 (0)