@@ -776,29 +776,16 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
776
776
* Scheduling class queueing methods:
777
777
*/
778
778
779
- #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
780
- static void
781
- add_cfs_task_weight (struct cfs_rq * cfs_rq , unsigned long weight )
782
- {
783
- cfs_rq -> task_weight += weight ;
784
- }
785
- #else
786
- static inline void
787
- add_cfs_task_weight (struct cfs_rq * cfs_rq , unsigned long weight )
788
- {
789
- }
790
- #endif
791
-
792
779
static void
793
780
account_entity_enqueue (struct cfs_rq * cfs_rq , struct sched_entity * se )
794
781
{
795
782
update_load_add (& cfs_rq -> load , se -> load .weight );
796
783
if (!parent_entity (se ))
797
784
update_load_add (& rq_of (cfs_rq )-> load , se -> load .weight );
798
- if ( entity_is_task ( se )) {
799
- add_cfs_task_weight ( cfs_rq , se -> load . weight );
800
- list_add (& se -> group_node , & cfs_rq -> tasks );
801
- }
785
+ #ifdef CONFIG_SMP
786
+ if ( entity_is_task ( se ))
787
+ list_add (& se -> group_node , & rq_of ( cfs_rq ) -> cfs_tasks );
788
+ #endif
802
789
cfs_rq -> nr_running ++ ;
803
790
}
804
791
@@ -808,10 +795,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
808
795
update_load_sub (& cfs_rq -> load , se -> load .weight );
809
796
if (!parent_entity (se ))
810
797
update_load_sub (& rq_of (cfs_rq )-> load , se -> load .weight );
811
- if (entity_is_task (se )) {
812
- add_cfs_task_weight (cfs_rq , - se -> load .weight );
798
+ if (entity_is_task (se ))
813
799
list_del_init (& se -> group_node );
814
- }
815
800
cfs_rq -> nr_running -- ;
816
801
}
817
802
@@ -3085,24 +3070,25 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
3085
3070
static unsigned long __read_mostly max_load_balance_interval = HZ /10 ;
3086
3071
3087
3072
#define LBF_ALL_PINNED 0x01
3088
- #define LBF_NEED_BREAK 0x02 /* clears into HAD_BREAK */
3089
- #define LBF_HAD_BREAK 0x04
3090
- #define LBF_HAD_BREAKS 0x0C /* count HAD_BREAKs overflows into ABORT */
3091
- #define LBF_ABORT 0x10
3073
+ #define LBF_NEED_BREAK 0x02
3074
+ #define LBF_ABORT 0x04
3092
3075
3093
3076
struct lb_env {
3094
3077
struct sched_domain * sd ;
3095
3078
3096
3079
int src_cpu ;
3097
3080
struct rq * src_rq ;
3098
- struct cfs_rq * src_cfs_rq ;
3099
3081
3100
3082
int dst_cpu ;
3101
3083
struct rq * dst_rq ;
3102
3084
3103
3085
enum cpu_idle_type idle ;
3104
3086
unsigned long max_load_move ;
3105
3087
unsigned int flags ;
3088
+
3089
+ unsigned int loop ;
3090
+ unsigned int loop_break ;
3091
+ unsigned int loop_max ;
3106
3092
};
3107
3093
3108
3094
/*
@@ -3208,53 +3194,69 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
3208
3194
static int move_one_task (struct lb_env * env )
3209
3195
{
3210
3196
struct task_struct * p , * n ;
3211
- struct cfs_rq * cfs_rq ;
3212
3197
3213
- for_each_leaf_cfs_rq (env -> src_rq , cfs_rq ) {
3214
- list_for_each_entry_safe (p , n , & cfs_rq -> tasks , se .group_node ) {
3215
- if (throttled_lb_pair (task_group (p ),
3216
- env -> src_cpu , env -> dst_cpu ))
3217
- break ;
3198
+ list_for_each_entry_safe (p , n , & env -> src_rq -> cfs_tasks , se .group_node ) {
3199
+ if (throttled_lb_pair (task_group (p ), env -> src_rq -> cpu , env -> dst_cpu ))
3200
+ continue ;
3218
3201
3219
- if (!can_migrate_task (p , env ))
3220
- continue ;
3202
+ if (!can_migrate_task (p , env ))
3203
+ continue ;
3221
3204
3222
- move_task (p , env );
3223
- /*
3224
- * Right now, this is only the second place move_task()
3225
- * is called, so we can safely collect move_task()
3226
- * stats here rather than inside move_task().
3227
- */
3228
- schedstat_inc (env -> sd , lb_gained [env -> idle ]);
3229
- return 1 ;
3230
- }
3205
+ move_task (p , env );
3206
+ /*
3207
+ * Right now, this is only the second place move_task()
3208
+ * is called, so we can safely collect move_task()
3209
+ * stats here rather than inside move_task().
3210
+ */
3211
+ schedstat_inc (env -> sd , lb_gained [env -> idle ]);
3212
+ return 1 ;
3231
3213
}
3232
-
3233
3214
return 0 ;
3234
3215
}
3235
3216
3217
+ static unsigned long task_h_load (struct task_struct * p );
3218
+
3236
3219
static unsigned long balance_tasks (struct lb_env * env )
3237
3220
{
3238
- int loops = 0 , pulled = 0 ;
3239
3221
long rem_load_move = env -> max_load_move ;
3240
3222
struct task_struct * p , * n ;
3223
+ unsigned long load ;
3224
+ int pulled = 0 ;
3241
3225
3242
3226
if (env -> max_load_move == 0 )
3243
3227
goto out ;
3244
3228
3245
- list_for_each_entry_safe (p , n , & env -> src_cfs_rq -> tasks , se .group_node ) {
3246
- if (loops ++ > sysctl_sched_nr_migrate ) {
3229
+ list_for_each_entry_safe (p , n , & env -> src_rq -> cfs_tasks , se .group_node ) {
3230
+ env -> loop ++ ;
3231
+ /* We've more or less seen every task there is, call it quits */
3232
+ if (env -> loop > env -> loop_max ) {
3233
+ env -> flags |= LBF_ABORT ;
3234
+ break ;
3235
+ }
3236
+ /* take a beather every nr_migrate tasks */
3237
+ if (env -> loop > env -> loop_break ) {
3238
+ env -> loop_break += sysctl_sched_nr_migrate ;
3247
3239
env -> flags |= LBF_NEED_BREAK ;
3248
3240
break ;
3249
3241
}
3250
3242
3251
- if ((p -> se .load .weight >> 1 ) > rem_load_move ||
3252
- !can_migrate_task (p , env ))
3253
- continue ;
3243
+ if (throttled_lb_pair (task_group (p ), env -> src_rq -> cpu ,
3244
+ env -> dst_cpu ))
3245
+ goto next ;
3246
+
3247
+ load = task_h_load (p );
3248
+ if (load < 16 && !env -> sd -> nr_balance_failed )
3249
+ goto next ;
3250
+
3251
+ if ((load * 2 ) > rem_load_move )
3252
+ goto next ;
3253
+
3254
+ if (!can_migrate_task (p , env ))
3255
+ goto next ;
3254
3256
3255
3257
move_task (p , env );
3256
3258
pulled ++ ;
3257
- rem_load_move -= p -> se . load . weight ;
3259
+ rem_load_move -= load ;
3258
3260
3259
3261
#ifdef CONFIG_PREEMPT
3260
3262
/*
@@ -3274,6 +3276,10 @@ static unsigned long balance_tasks(struct lb_env *env)
3274
3276
*/
3275
3277
if (rem_load_move <= 0 )
3276
3278
break ;
3279
+
3280
+ continue ;
3281
+ next :
3282
+ list_move_tail (& p -> se .group_node , & env -> src_rq -> cfs_tasks );
3277
3283
}
3278
3284
out :
3279
3285
/*
@@ -3363,65 +3369,33 @@ static int tg_load_down(struct task_group *tg, void *data)
3363
3369
3364
3370
static void update_h_load (long cpu )
3365
3371
{
3372
+ rcu_read_lock ();
3366
3373
walk_tg_tree (tg_load_down , tg_nop , (void * )cpu );
3374
+ rcu_read_unlock ();
3367
3375
}
3368
3376
3369
- static unsigned long load_balance_fair (struct lb_env * env )
3377
+ static unsigned long task_h_load (struct task_struct * p )
3370
3378
{
3371
- unsigned long max_load_move = env -> max_load_move ;
3372
- long rem_load_move = env -> max_load_move ;
3373
-
3374
- rcu_read_lock ();
3375
- update_h_load (cpu_of (env -> src_rq ));
3376
-
3377
- for_each_leaf_cfs_rq (env -> src_rq , env -> src_cfs_rq ) {
3378
- unsigned long busiest_h_load = env -> src_cfs_rq -> h_load ;
3379
- unsigned long busiest_weight = env -> src_cfs_rq -> load .weight ;
3380
- u64 rem_load , moved_load ;
3381
-
3382
- if (env -> flags & (LBF_NEED_BREAK |LBF_ABORT ))
3383
- break ;
3384
-
3385
- /*
3386
- * empty group or part of a throttled hierarchy
3387
- */
3388
- if (!env -> src_cfs_rq -> task_weight )
3389
- continue ;
3390
-
3391
- if (throttled_lb_pair (env -> src_cfs_rq -> tg ,
3392
- cpu_of (env -> src_rq ),
3393
- env -> dst_cpu ))
3394
- continue ;
3395
-
3396
- rem_load = (u64 )rem_load_move * busiest_weight ;
3397
- rem_load = div_u64 (rem_load , busiest_h_load + 1 );
3398
-
3399
- env -> max_load_move = rem_load ;
3400
-
3401
- moved_load = balance_tasks (env );
3402
- if (!moved_load )
3403
- continue ;
3404
-
3405
- moved_load *= busiest_h_load ;
3406
- moved_load = div_u64 (moved_load , busiest_weight + 1 );
3379
+ struct cfs_rq * cfs_rq = task_cfs_rq (p );
3380
+ unsigned long load ;
3407
3381
3408
- rem_load_move -= moved_load ;
3409
- if (rem_load_move < 0 )
3410
- break ;
3411
- }
3412
- rcu_read_unlock ();
3382
+ load = p -> se .load .weight ;
3383
+ load = div_u64 (load * cfs_rq -> h_load , cfs_rq -> load .weight + 1 );
3413
3384
3414
- return max_load_move - rem_load_move ;
3385
+ return load ;
3415
3386
}
3416
3387
#else
3417
3388
static inline void update_shares (int cpu )
3418
3389
{
3419
3390
}
3420
3391
3421
- static unsigned long load_balance_fair ( struct lb_env * env )
3392
+ static inline void update_h_load ( long cpu )
3422
3393
{
3423
- env -> src_cfs_rq = & env -> src_rq -> cfs ;
3424
- return balance_tasks (env );
3394
+ }
3395
+
3396
+ static unsigned long task_h_load (struct task_struct * p )
3397
+ {
3398
+ return p -> se .load .weight ;
3425
3399
}
3426
3400
#endif
3427
3401
@@ -3437,9 +3411,10 @@ static int move_tasks(struct lb_env *env)
3437
3411
unsigned long max_load_move = env -> max_load_move ;
3438
3412
unsigned long total_load_moved = 0 , load_moved ;
3439
3413
3414
+ update_h_load (cpu_of (env -> src_rq ));
3440
3415
do {
3441
3416
env -> max_load_move = max_load_move - total_load_moved ;
3442
- load_moved = load_balance_fair (env );
3417
+ load_moved = balance_tasks (env );
3443
3418
total_load_moved += load_moved ;
3444
3419
3445
3420
if (env -> flags & (LBF_NEED_BREAK |LBF_ABORT ))
@@ -4464,6 +4439,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4464
4439
.dst_cpu = this_cpu ,
4465
4440
.dst_rq = this_rq ,
4466
4441
.idle = idle ,
4442
+ .loop_break = sysctl_sched_nr_migrate ,
4467
4443
};
4468
4444
4469
4445
cpumask_copy (cpus , cpu_active_mask );
@@ -4504,6 +4480,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4504
4480
env .max_load_move = imbalance ;
4505
4481
env .src_cpu = busiest -> cpu ;
4506
4482
env .src_rq = busiest ;
4483
+ env .loop_max = busiest -> nr_running ;
4507
4484
4508
4485
local_irq_save (flags );
4509
4486
double_rq_lock (this_rq , busiest );
@@ -4521,9 +4498,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4521
4498
goto out_balanced ;
4522
4499
4523
4500
if (env .flags & LBF_NEED_BREAK ) {
4524
- env .flags += LBF_HAD_BREAK - LBF_NEED_BREAK ;
4525
- if (env .flags & LBF_ABORT )
4526
- goto out_balanced ;
4501
+ env .flags &= ~LBF_NEED_BREAK ;
4527
4502
goto redo ;
4528
4503
}
4529
4504
@@ -5357,7 +5332,6 @@ static void set_curr_task_fair(struct rq *rq)
5357
5332
void init_cfs_rq (struct cfs_rq * cfs_rq )
5358
5333
{
5359
5334
cfs_rq -> tasks_timeline = RB_ROOT ;
5360
- INIT_LIST_HEAD (& cfs_rq -> tasks );
5361
5335
cfs_rq -> min_vruntime = (u64 )(- (1LL << 20 ));
5362
5336
#ifndef CONFIG_64BIT
5363
5337
cfs_rq -> min_vruntime_copy = cfs_rq -> min_vruntime ;
0 commit comments