Skip to content

Commit 556061b

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/nohz: Fix rq->cpu_load[] calculations
While investigating why the load-balancer did funny I found that the rq->cpu_load[] tables were completely screwy.. a bit more digging revealed that the updates that got through were missing ticks followed by a catchup of 2 ticks. The catchup assumes the cpu was idle during that time (since only nohz can cause missed ticks and the machine is idle etc..) this means that esp. the higher indices were significantly lower than they ought to be. The reason for this is that its not correct to compare against jiffies on every jiffy on any other cpu than the cpu that updates jiffies. This patch cludges around it by only doing the catch-up stuff from nohz_idle_balance() and doing the regular stuff unconditionally from the tick. Signed-off-by: Peter Zijlstra <[email protected]> Cc: [email protected] Cc: Venkatesh Pallipadi <[email protected]> Link: http://lkml.kernel.org/n/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 870a0bb commit 556061b

File tree

3 files changed

+41
-16
lines changed

3 files changed

+41
-16
lines changed

kernel/sched/core.c

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -692,8 +692,6 @@ int tg_nop(struct task_group *tg, void *data)
692692
}
693693
#endif
694694

695-
void update_cpu_load(struct rq *this_rq);
696-
697695
static void set_load_weight(struct task_struct *p)
698696
{
699697
int prio = p->static_prio - MAX_RT_PRIO;
@@ -2486,22 +2484,13 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
24862484
* scheduler tick (TICK_NSEC). With tickless idle this will not be called
24872485
* every tick. We fix it up based on jiffies.
24882486
*/
2489-
void update_cpu_load(struct rq *this_rq)
2487+
static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2488+
unsigned long pending_updates)
24902489
{
2491-
unsigned long this_load = this_rq->load.weight;
2492-
unsigned long curr_jiffies = jiffies;
2493-
unsigned long pending_updates;
24942490
int i, scale;
24952491

24962492
this_rq->nr_load_updates++;
24972493

2498-
/* Avoid repeated calls on same jiffy, when moving in and out of idle */
2499-
if (curr_jiffies == this_rq->last_load_update_tick)
2500-
return;
2501-
2502-
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2503-
this_rq->last_load_update_tick = curr_jiffies;
2504-
25052494
/* Update our load: */
25062495
this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
25072496
for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
@@ -2526,9 +2515,45 @@ void update_cpu_load(struct rq *this_rq)
25262515
sched_avg_update(this_rq);
25272516
}
25282517

2518+
/*
2519+
* Called from nohz_idle_balance() to update the load ratings before doing the
2520+
* idle balance.
2521+
*/
2522+
void update_idle_cpu_load(struct rq *this_rq)
2523+
{
2524+
unsigned long curr_jiffies = jiffies;
2525+
unsigned long load = this_rq->load.weight;
2526+
unsigned long pending_updates;
2527+
2528+
/*
2529+
* Bloody broken means of dealing with nohz, but better than nothing..
2530+
* jiffies is updated by one cpu, another cpu can drift wrt the jiffy
2531+
* update and see 0 difference the one time and 2 the next, even though
2532+
* we ticked at roughtly the same rate.
2533+
*
2534+
* Hence we only use this from nohz_idle_balance() and skip this
2535+
* nonsense when called from the scheduler_tick() since that's
2536+
* guaranteed a stable rate.
2537+
*/
2538+
if (load || curr_jiffies == this_rq->last_load_update_tick)
2539+
return;
2540+
2541+
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2542+
this_rq->last_load_update_tick = curr_jiffies;
2543+
2544+
__update_cpu_load(this_rq, load, pending_updates);
2545+
}
2546+
2547+
/*
2548+
* Called from scheduler_tick()
2549+
*/
25292550
static void update_cpu_load_active(struct rq *this_rq)
25302551
{
2531-
update_cpu_load(this_rq);
2552+
/*
2553+
* See the mess in update_idle_cpu_load().
2554+
*/
2555+
this_rq->last_load_update_tick = jiffies;
2556+
__update_cpu_load(this_rq, this_rq->load.weight, 1);
25322557

25332558
calc_load_account_active(this_rq);
25342559
}

kernel/sched/fair.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5012,7 +5012,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
50125012

50135013
raw_spin_lock_irq(&this_rq->lock);
50145014
update_rq_clock(this_rq);
5015-
update_cpu_load(this_rq);
5015+
update_idle_cpu_load(this_rq);
50165016
raw_spin_unlock_irq(&this_rq->lock);
50175017

50185018
rebalance_domains(balance_cpu, CPU_IDLE);

kernel/sched/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ extern void resched_cpu(int cpu);
876876
extern struct rt_bandwidth def_rt_bandwidth;
877877
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
878878

879-
extern void update_cpu_load(struct rq *this_rq);
879+
extern void update_idle_cpu_load(struct rq *this_rq);
880880

881881
#ifdef CONFIG_CGROUP_CPUACCT
882882
#include <linux/cgroup.h>

0 commit comments

Comments
 (0)