@@ -784,7 +784,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
784
784
update_load_add (& rq_of (cfs_rq )-> load , se -> load .weight );
785
785
#ifdef CONFIG_SMP
786
786
if (entity_is_task (se ))
787
- list_add (& se -> group_node , & rq_of (cfs_rq )-> cfs_tasks );
787
+ list_add_tail (& se -> group_node , & rq_of (cfs_rq )-> cfs_tasks );
788
788
#endif
789
789
cfs_rq -> nr_running ++ ;
790
790
}
@@ -3071,7 +3071,6 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10;
3071
3071
3072
3072
#define LBF_ALL_PINNED 0x01
3073
3073
#define LBF_NEED_BREAK 0x02
3074
- #define LBF_ABORT 0x04
3075
3074
3076
3075
struct lb_env {
3077
3076
struct sched_domain * sd ;
@@ -3083,7 +3082,7 @@ struct lb_env {
3083
3082
struct rq * dst_rq ;
3084
3083
3085
3084
enum cpu_idle_type idle ;
3086
- unsigned long max_load_move ;
3085
+ long load_move ;
3087
3086
unsigned int flags ;
3088
3087
3089
3088
unsigned int loop ;
@@ -3216,80 +3215,86 @@ static int move_one_task(struct lb_env *env)
3216
3215
3217
3216
static unsigned long task_h_load (struct task_struct * p );
3218
3217
3219
- static unsigned long balance_tasks (struct lb_env * env )
3218
+ /*
3219
+ * move_tasks tries to move up to load_move weighted load from busiest to
3220
+ * this_rq, as part of a balancing operation within domain "sd".
3221
+ * Returns 1 if successful and 0 otherwise.
3222
+ *
3223
+ * Called with both runqueues locked.
3224
+ */
3225
+ static int move_tasks (struct lb_env * env )
3220
3226
{
3221
- long rem_load_move = env -> max_load_move ;
3222
- struct task_struct * p , * n ;
3227
+ struct list_head * tasks = & env -> src_rq -> cfs_tasks ;
3228
+ struct task_struct * p ;
3223
3229
unsigned long load ;
3224
3230
int pulled = 0 ;
3225
3231
3226
- if (env -> max_load_move == 0 )
3227
- goto out ;
3232
+ if (env -> load_move <= 0 )
3233
+ return 0 ;
3234
+
3235
+ while (!list_empty (tasks )) {
3236
+ p = list_first_entry (tasks , struct task_struct , se .group_node );
3228
3237
3229
- list_for_each_entry_safe (p , n , & env -> src_rq -> cfs_tasks , se .group_node ) {
3230
3238
env -> loop ++ ;
3231
3239
/* We've more or less seen every task there is, call it quits */
3232
- if (env -> loop > env -> loop_max ) {
3233
- env -> flags |= LBF_ABORT ;
3240
+ if (env -> loop > env -> loop_max )
3234
3241
break ;
3235
- }
3236
- /* take a beather every nr_migrate tasks */
3242
+
3243
+ /* take a breather every nr_migrate tasks */
3237
3244
if (env -> loop > env -> loop_break ) {
3238
3245
env -> loop_break += sysctl_sched_nr_migrate ;
3239
3246
env -> flags |= LBF_NEED_BREAK ;
3240
3247
break ;
3241
3248
}
3242
3249
3243
- if (throttled_lb_pair (task_group (p ), env -> src_rq -> cpu ,
3244
- env -> dst_cpu ))
3250
+ if (throttled_lb_pair (task_group (p ), env -> src_cpu , env -> dst_cpu ))
3245
3251
goto next ;
3246
3252
3247
3253
load = task_h_load (p );
3254
+
3248
3255
if (load < 16 && !env -> sd -> nr_balance_failed )
3249
3256
goto next ;
3250
3257
3251
- if ((load * 2 ) > rem_load_move )
3258
+ if ((load / 2 ) > env -> load_move )
3252
3259
goto next ;
3253
3260
3254
3261
if (!can_migrate_task (p , env ))
3255
3262
goto next ;
3256
3263
3257
3264
move_task (p , env );
3258
3265
pulled ++ ;
3259
- rem_load_move -= load ;
3266
+ env -> load_move -= load ;
3260
3267
3261
3268
#ifdef CONFIG_PREEMPT
3262
3269
/*
3263
3270
* NEWIDLE balancing is a source of latency, so preemptible
3264
3271
* kernels will stop after the first task is pulled to minimize
3265
3272
* the critical section.
3266
3273
*/
3267
- if (env -> idle == CPU_NEWLY_IDLE ) {
3268
- env -> flags |= LBF_ABORT ;
3274
+ if (env -> idle == CPU_NEWLY_IDLE )
3269
3275
break ;
3270
- }
3271
3276
#endif
3272
3277
3273
3278
/*
3274
3279
* We only want to steal up to the prescribed amount of
3275
3280
* weighted load.
3276
3281
*/
3277
- if (rem_load_move <= 0 )
3282
+ if (env -> load_move <= 0 )
3278
3283
break ;
3279
3284
3280
3285
continue ;
3281
3286
next :
3282
- list_move_tail (& p -> se .group_node , & env -> src_rq -> cfs_tasks );
3287
+ list_move_tail (& p -> se .group_node , tasks );
3283
3288
}
3284
- out :
3289
+
3285
3290
/*
3286
3291
* Right now, this is one of only two places move_task() is called,
3287
3292
* so we can safely collect move_task() stats here rather than
3288
3293
* inside move_task().
3289
3294
*/
3290
3295
schedstat_add (env -> sd , lb_gained [env -> idle ], pulled );
3291
3296
3292
- return env -> max_load_move - rem_load_move ;
3297
+ return pulled ;
3293
3298
}
3294
3299
3295
3300
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -3399,43 +3404,6 @@ static unsigned long task_h_load(struct task_struct *p)
3399
3404
}
3400
3405
#endif
3401
3406
3402
- /*
3403
- * move_tasks tries to move up to max_load_move weighted load from busiest to
3404
- * this_rq, as part of a balancing operation within domain "sd".
3405
- * Returns 1 if successful and 0 otherwise.
3406
- *
3407
- * Called with both runqueues locked.
3408
- */
3409
- static int move_tasks (struct lb_env * env )
3410
- {
3411
- unsigned long max_load_move = env -> max_load_move ;
3412
- unsigned long total_load_moved = 0 , load_moved ;
3413
-
3414
- update_h_load (cpu_of (env -> src_rq ));
3415
- do {
3416
- env -> max_load_move = max_load_move - total_load_moved ;
3417
- load_moved = balance_tasks (env );
3418
- total_load_moved += load_moved ;
3419
-
3420
- if (env -> flags & (LBF_NEED_BREAK |LBF_ABORT ))
3421
- break ;
3422
-
3423
- #ifdef CONFIG_PREEMPT
3424
- /*
3425
- * NEWIDLE balancing is a source of latency, so preemptible
3426
- * kernels will stop after the first task is pulled to minimize
3427
- * the critical section.
3428
- */
3429
- if (env -> idle == CPU_NEWLY_IDLE && env -> dst_rq -> nr_running ) {
3430
- env -> flags |= LBF_ABORT ;
3431
- break ;
3432
- }
3433
- #endif
3434
- } while (load_moved && max_load_move > total_load_moved );
3435
-
3436
- return total_load_moved > 0 ;
3437
- }
3438
-
3439
3407
/********** Helpers for find_busiest_group ************************/
3440
3408
/*
3441
3409
* sd_lb_stats - Structure to store the statistics of a sched_domain
@@ -4477,31 +4445,31 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4477
4445
* correctly treated as an imbalance.
4478
4446
*/
4479
4447
env .flags |= LBF_ALL_PINNED ;
4480
- env .max_load_move = imbalance ;
4448
+ env .load_move = imbalance ;
4481
4449
env .src_cpu = busiest -> cpu ;
4482
4450
env .src_rq = busiest ;
4483
4451
env .loop_max = busiest -> nr_running ;
4484
4452
4453
+ more_balance :
4485
4454
local_irq_save (flags );
4486
4455
double_rq_lock (this_rq , busiest );
4487
- ld_moved = move_tasks (& env );
4456
+ if (!env .loop )
4457
+ update_h_load (env .src_cpu );
4458
+ ld_moved += move_tasks (& env );
4488
4459
double_rq_unlock (this_rq , busiest );
4489
4460
local_irq_restore (flags );
4490
4461
4462
+ if (env .flags & LBF_NEED_BREAK ) {
4463
+ env .flags &= ~LBF_NEED_BREAK ;
4464
+ goto more_balance ;
4465
+ }
4466
+
4491
4467
/*
4492
4468
* some other cpu did the load balance for us.
4493
4469
*/
4494
4470
if (ld_moved && this_cpu != smp_processor_id ())
4495
4471
resched_cpu (this_cpu );
4496
4472
4497
- if (env .flags & LBF_ABORT )
4498
- goto out_balanced ;
4499
-
4500
- if (env .flags & LBF_NEED_BREAK ) {
4501
- env .flags &= ~LBF_NEED_BREAK ;
4502
- goto redo ;
4503
- }
4504
-
4505
4473
/* All tasks on this runqueue were pinned by CPU affinity */
4506
4474
if (unlikely (env .flags & LBF_ALL_PINNED )) {
4507
4475
cpumask_clear_cpu (cpu_of (busiest ), cpus );
0 commit comments