@@ -2586,6 +2586,60 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
2586
2586
}
2587
2587
}
2588
2588
}
2589
+
2590
+ /*
2591
+ * Can a task be moved from prev_cpu to this_cpu without causing a load
2592
+ * imbalance that would trigger the load balancer?
2593
+ */
2594
+ static inline bool numa_wake_affine (struct sched_domain * sd ,
2595
+ struct task_struct * p , int this_cpu ,
2596
+ int prev_cpu , int sync )
2597
+ {
2598
+ struct numa_stats prev_load , this_load ;
2599
+ s64 this_eff_load , prev_eff_load ;
2600
+
2601
+ update_numa_stats (& prev_load , cpu_to_node (prev_cpu ));
2602
+ update_numa_stats (& this_load , cpu_to_node (this_cpu ));
2603
+
2604
+ /*
2605
+ * If sync wakeup then subtract the (maximum possible)
2606
+ * effect of the currently running task from the load
2607
+ * of the current CPU:
2608
+ */
2609
+ if (sync ) {
2610
+ unsigned long current_load = task_h_load (current );
2611
+
2612
+ if (this_load .load > current_load )
2613
+ this_load .load -= current_load ;
2614
+ else
2615
+ this_load .load = 0 ;
2616
+ }
2617
+
2618
+ /*
2619
+ * In low-load situations, where this_cpu's node is idle due to the
2620
+ * sync cause above having dropped this_load.load to 0, move the task.
2621
+ * Moving to an idle socket will not create a bad imbalance.
2622
+ *
2623
+ * Otherwise check if the nodes are near enough in load to allow this
2624
+ * task to be woken on this_cpu's node.
2625
+ */
2626
+ if (this_load .load > 0 ) {
2627
+ unsigned long task_load = task_h_load (p );
2628
+
2629
+ this_eff_load = 100 ;
2630
+ this_eff_load *= prev_load .compute_capacity ;
2631
+
2632
+ prev_eff_load = 100 + (sd -> imbalance_pct - 100 ) / 2 ;
2633
+ prev_eff_load *= this_load .compute_capacity ;
2634
+
2635
+ this_eff_load *= this_load .load + task_load ;
2636
+ prev_eff_load *= prev_load .load - task_load ;
2637
+
2638
+ return this_eff_load <= prev_eff_load ;
2639
+ }
2640
+
2641
+ return true;
2642
+ }
2589
2643
#else
2590
2644
static void task_tick_numa (struct rq * rq , struct task_struct * curr )
2591
2645
{
@@ -2598,6 +2652,13 @@ static inline void account_numa_enqueue(struct rq *rq, struct task_struct *p)
2598
2652
static inline void account_numa_dequeue (struct rq * rq , struct task_struct * p )
2599
2653
{
2600
2654
}
2655
+
2656
+ static inline bool numa_wake_affine (struct sched_domain * sd ,
2657
+ struct task_struct * p , int this_cpu ,
2658
+ int prev_cpu , int sync )
2659
+ {
2660
+ return true;
2661
+ }
2601
2662
#endif /* CONFIG_NUMA_BALANCING */
2602
2663
2603
2664
static void
@@ -5407,74 +5468,25 @@ static int wake_wide(struct task_struct *p)
5407
5468
static int wake_affine (struct sched_domain * sd , struct task_struct * p ,
5408
5469
int prev_cpu , int sync )
5409
5470
{
5410
- s64 this_load , load ;
5411
- s64 this_eff_load , prev_eff_load ;
5412
- int idx , this_cpu ;
5413
- struct task_group * tg ;
5414
- unsigned long weight ;
5415
- int balanced ;
5416
-
5417
- idx = sd -> wake_idx ;
5418
- this_cpu = smp_processor_id ();
5419
- load = source_load (prev_cpu , idx );
5420
- this_load = target_load (this_cpu , idx );
5471
+ int this_cpu = smp_processor_id ();
5472
+ bool affine = false;
5421
5473
5422
5474
/*
5423
5475
* Common case: CPUs are in the same socket, and select_idle_sibling()
5424
5476
* will do its thing regardless of what we return:
5425
5477
*/
5426
5478
if (cpus_share_cache (prev_cpu , this_cpu ))
5427
- return true;
5428
-
5429
- /*
5430
- * If sync wakeup then subtract the (maximum possible)
5431
- * effect of the currently running task from the load
5432
- * of the current CPU:
5433
- */
5434
- if (sync ) {
5435
- tg = task_group (current );
5436
- weight = current -> se .avg .load_avg ;
5437
-
5438
- this_load += effective_load (tg , this_cpu , - weight , - weight );
5439
- load += effective_load (tg , prev_cpu , 0 , - weight );
5440
- }
5441
-
5442
- tg = task_group (p );
5443
- weight = p -> se .avg .load_avg ;
5444
-
5445
- /*
5446
- * In low-load situations, where prev_cpu is idle and this_cpu is idle
5447
- * due to the sync cause above having dropped this_load to 0, we'll
5448
- * always have an imbalance, but there's really nothing you can do
5449
- * about that, so that's good too.
5450
- *
5451
- * Otherwise check if either cpus are near enough in load to allow this
5452
- * task to be woken on this_cpu.
5453
- */
5454
- this_eff_load = 100 ;
5455
- this_eff_load *= capacity_of (prev_cpu );
5456
-
5457
- prev_eff_load = 100 + (sd -> imbalance_pct - 100 ) / 2 ;
5458
- prev_eff_load *= capacity_of (this_cpu );
5459
-
5460
- if (this_load > 0 ) {
5461
- this_eff_load *= this_load +
5462
- effective_load (tg , this_cpu , weight , weight );
5463
-
5464
- prev_eff_load *= load + effective_load (tg , prev_cpu , 0 , weight );
5465
- }
5466
-
5467
- balanced = this_eff_load <= prev_eff_load ;
5479
+ affine = true;
5480
+ else
5481
+ affine = numa_wake_affine (sd , p , this_cpu , prev_cpu , sync );
5468
5482
5469
5483
schedstat_inc (p -> se .statistics .nr_wakeups_affine_attempts );
5484
+ if (affine ) {
5485
+ schedstat_inc (sd -> ttwu_move_affine );
5486
+ schedstat_inc (p -> se .statistics .nr_wakeups_affine );
5487
+ }
5470
5488
5471
- if (!balanced )
5472
- return 0 ;
5473
-
5474
- schedstat_inc (sd -> ttwu_move_affine );
5475
- schedstat_inc (p -> se .statistics .nr_wakeups_affine );
5476
-
5477
- return 1 ;
5489
+ return affine ;
5478
5490
}
5479
5491
5480
5492
static inline int task_util (struct task_struct * p );
0 commit comments