Skip to content

Commit 3fed382

Browse files
Rik van RielIngo Molnar
authored andcommitted
sched/numa: Implement NUMA node level wake_affine()
Since select_idle_sibling() can place a task anywhere on a socket, comparing loads between individual CPU cores makes no real sense for deciding whether to do an affine wakeup across sockets, either. Instead, compare the load between the sockets in a similar way the load balancer and the numa balancing code do. Signed-off-by: Rik van Riel <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Mike Galbraith <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: [email protected] Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 7d894e6 commit 3fed382

File tree

1 file changed

+71
-59
lines changed

1 file changed

+71
-59
lines changed

kernel/sched/fair.c

Lines changed: 71 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2586,6 +2586,60 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
25862586
}
25872587
}
25882588
}
2589+
2590+
/*
2591+
* Can a task be moved from prev_cpu to this_cpu without causing a load
2592+
* imbalance that would trigger the load balancer?
2593+
*/
2594+
static inline bool numa_wake_affine(struct sched_domain *sd,
2595+
struct task_struct *p, int this_cpu,
2596+
int prev_cpu, int sync)
2597+
{
2598+
struct numa_stats prev_load, this_load;
2599+
s64 this_eff_load, prev_eff_load;
2600+
2601+
update_numa_stats(&prev_load, cpu_to_node(prev_cpu));
2602+
update_numa_stats(&this_load, cpu_to_node(this_cpu));
2603+
2604+
/*
2605+
* If sync wakeup then subtract the (maximum possible)
2606+
* effect of the currently running task from the load
2607+
* of the current CPU:
2608+
*/
2609+
if (sync) {
2610+
unsigned long current_load = task_h_load(current);
2611+
2612+
if (this_load.load > current_load)
2613+
this_load.load -= current_load;
2614+
else
2615+
this_load.load = 0;
2616+
}
2617+
2618+
/*
2619+
* In low-load situations, where this_cpu's node is idle due to the
2620+
* sync cause above having dropped this_load.load to 0, move the task.
2621+
* Moving to an idle socket will not create a bad imbalance.
2622+
*
2623+
* Otherwise check if the nodes are near enough in load to allow this
2624+
* task to be woken on this_cpu's node.
2625+
*/
2626+
if (this_load.load > 0) {
2627+
unsigned long task_load = task_h_load(p);
2628+
2629+
this_eff_load = 100;
2630+
this_eff_load *= prev_load.compute_capacity;
2631+
2632+
prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
2633+
prev_eff_load *= this_load.compute_capacity;
2634+
2635+
this_eff_load *= this_load.load + task_load;
2636+
prev_eff_load *= prev_load.load - task_load;
2637+
2638+
return this_eff_load <= prev_eff_load;
2639+
}
2640+
2641+
return true;
2642+
}
25892643
#else
25902644
static void task_tick_numa(struct rq *rq, struct task_struct *curr)
25912645
{
@@ -2598,6 +2652,13 @@ static inline void account_numa_enqueue(struct rq *rq, struct task_struct *p)
25982652
static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
25992653
{
26002654
}
2655+
2656+
static inline bool numa_wake_affine(struct sched_domain *sd,
2657+
struct task_struct *p, int this_cpu,
2658+
int prev_cpu, int sync)
2659+
{
2660+
return true;
2661+
}
26012662
#endif /* CONFIG_NUMA_BALANCING */
26022663

26032664
static void
@@ -5407,74 +5468,25 @@ static int wake_wide(struct task_struct *p)
54075468
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
54085469
int prev_cpu, int sync)
54095470
{
5410-
s64 this_load, load;
5411-
s64 this_eff_load, prev_eff_load;
5412-
int idx, this_cpu;
5413-
struct task_group *tg;
5414-
unsigned long weight;
5415-
int balanced;
5416-
5417-
idx = sd->wake_idx;
5418-
this_cpu = smp_processor_id();
5419-
load = source_load(prev_cpu, idx);
5420-
this_load = target_load(this_cpu, idx);
5471+
int this_cpu = smp_processor_id();
5472+
bool affine = false;
54215473

54225474
/*
54235475
* Common case: CPUs are in the same socket, and select_idle_sibling()
54245476
* will do its thing regardless of what we return:
54255477
*/
54265478
if (cpus_share_cache(prev_cpu, this_cpu))
5427-
return true;
5428-
5429-
/*
5430-
* If sync wakeup then subtract the (maximum possible)
5431-
* effect of the currently running task from the load
5432-
* of the current CPU:
5433-
*/
5434-
if (sync) {
5435-
tg = task_group(current);
5436-
weight = current->se.avg.load_avg;
5437-
5438-
this_load += effective_load(tg, this_cpu, -weight, -weight);
5439-
load += effective_load(tg, prev_cpu, 0, -weight);
5440-
}
5441-
5442-
tg = task_group(p);
5443-
weight = p->se.avg.load_avg;
5444-
5445-
/*
5446-
* In low-load situations, where prev_cpu is idle and this_cpu is idle
5447-
* due to the sync cause above having dropped this_load to 0, we'll
5448-
* always have an imbalance, but there's really nothing you can do
5449-
* about that, so that's good too.
5450-
*
5451-
* Otherwise check if either cpus are near enough in load to allow this
5452-
* task to be woken on this_cpu.
5453-
*/
5454-
this_eff_load = 100;
5455-
this_eff_load *= capacity_of(prev_cpu);
5456-
5457-
prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
5458-
prev_eff_load *= capacity_of(this_cpu);
5459-
5460-
if (this_load > 0) {
5461-
this_eff_load *= this_load +
5462-
effective_load(tg, this_cpu, weight, weight);
5463-
5464-
prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
5465-
}
5466-
5467-
balanced = this_eff_load <= prev_eff_load;
5479+
affine = true;
5480+
else
5481+
affine = numa_wake_affine(sd, p, this_cpu, prev_cpu, sync);
54685482

54695483
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
5484+
if (affine) {
5485+
schedstat_inc(sd->ttwu_move_affine);
5486+
schedstat_inc(p->se.statistics.nr_wakeups_affine);
5487+
}
54705488

5471-
if (!balanced)
5472-
return 0;
5473-
5474-
schedstat_inc(sd->ttwu_move_affine);
5475-
schedstat_inc(p->se.statistics.nr_wakeups_affine);
5476-
5477-
return 1;
5489+
return affine;
54785490
}
54795491

54805492
static inline int task_util(struct task_struct *p);

0 commit comments

Comments
 (0)