@@ -601,9 +601,134 @@ static inline bool entity_before(const struct sched_entity *a,
601
601
return (s64 )(a -> vruntime - b -> vruntime ) < 0 ;
602
602
}
603
603
604
+ static inline s64 entity_key (struct cfs_rq * cfs_rq , struct sched_entity * se )
605
+ {
606
+ return (s64 )(se -> vruntime - cfs_rq -> min_vruntime );
607
+ }
608
+
604
609
#define __node_2_se (node ) \
605
610
rb_entry((node), struct sched_entity, run_node)
606
611
612
+ /*
613
+ * Compute virtual time from the per-task service numbers:
614
+ *
615
+ * Fair schedulers conserve lag:
616
+ *
617
+ * \Sum lag_i = 0
618
+ *
619
+ * Where lag_i is given by:
620
+ *
621
+ * lag_i = S - s_i = w_i * (V - v_i)
622
+ *
623
+ * Where S is the ideal service time and V is it's virtual time counterpart.
624
+ * Therefore:
625
+ *
626
+ * \Sum lag_i = 0
627
+ * \Sum w_i * (V - v_i) = 0
628
+ * \Sum w_i * V - w_i * v_i = 0
629
+ *
630
+ * From which we can solve an expression for V in v_i (which we have in
631
+ * se->vruntime):
632
+ *
633
+ * \Sum v_i * w_i \Sum v_i * w_i
634
+ * V = -------------- = --------------
635
+ * \Sum w_i W
636
+ *
637
+ * Specifically, this is the weighted average of all entity virtual runtimes.
638
+ *
639
+ * [[ NOTE: this is only equal to the ideal scheduler under the condition
640
+ * that join/leave operations happen at lag_i = 0, otherwise the
641
+ * virtual time has non-continguous motion equivalent to:
642
+ *
643
+ * V +-= lag_i / W
644
+ *
645
+ * Also see the comment in place_entity() that deals with this. ]]
646
+ *
647
+ * However, since v_i is u64, and the multiplcation could easily overflow
648
+ * transform it into a relative form that uses smaller quantities:
649
+ *
650
+ * Substitute: v_i == (v_i - v0) + v0
651
+ *
652
+ * \Sum ((v_i - v0) + v0) * w_i \Sum (v_i - v0) * w_i
653
+ * V = ---------------------------- = --------------------- + v0
654
+ * W W
655
+ *
656
+ * Which we track using:
657
+ *
658
+ * v0 := cfs_rq->min_vruntime
659
+ * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
660
+ * \Sum w_i := cfs_rq->avg_load
661
+ *
662
+ * Since min_vruntime is a monotonic increasing variable that closely tracks
663
+ * the per-task service, these deltas: (v_i - v), will be in the order of the
664
+ * maximal (virtual) lag induced in the system due to quantisation.
665
+ *
666
+ * Also, we use scale_load_down() to reduce the size.
667
+ *
668
+ * As measured, the max (key * weight) value was ~44 bits for a kernel build.
669
+ */
670
+ static void
671
+ avg_vruntime_add (struct cfs_rq * cfs_rq , struct sched_entity * se )
672
+ {
673
+ unsigned long weight = scale_load_down (se -> load .weight );
674
+ s64 key = entity_key (cfs_rq , se );
675
+
676
+ cfs_rq -> avg_vruntime += key * weight ;
677
+ cfs_rq -> avg_load += weight ;
678
+ }
679
+
680
+ static void
681
+ avg_vruntime_sub (struct cfs_rq * cfs_rq , struct sched_entity * se )
682
+ {
683
+ unsigned long weight = scale_load_down (se -> load .weight );
684
+ s64 key = entity_key (cfs_rq , se );
685
+
686
+ cfs_rq -> avg_vruntime -= key * weight ;
687
+ cfs_rq -> avg_load -= weight ;
688
+ }
689
+
690
+ static inline
691
+ void avg_vruntime_update (struct cfs_rq * cfs_rq , s64 delta )
692
+ {
693
+ /*
694
+ * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
695
+ */
696
+ cfs_rq -> avg_vruntime -= cfs_rq -> avg_load * delta ;
697
+ }
698
+
699
+ u64 avg_vruntime (struct cfs_rq * cfs_rq )
700
+ {
701
+ struct sched_entity * curr = cfs_rq -> curr ;
702
+ s64 avg = cfs_rq -> avg_vruntime ;
703
+ long load = cfs_rq -> avg_load ;
704
+
705
+ if (curr && curr -> on_rq ) {
706
+ unsigned long weight = scale_load_down (curr -> load .weight );
707
+
708
+ avg += entity_key (cfs_rq , curr ) * weight ;
709
+ load += weight ;
710
+ }
711
+
712
+ if (load )
713
+ avg = div_s64 (avg , load );
714
+
715
+ return cfs_rq -> min_vruntime + avg ;
716
+ }
717
+
718
+ static u64 __update_min_vruntime (struct cfs_rq * cfs_rq , u64 vruntime )
719
+ {
720
+ u64 min_vruntime = cfs_rq -> min_vruntime ;
721
+ /*
722
+ * open coded max_vruntime() to allow updating avg_vruntime
723
+ */
724
+ s64 delta = (s64 )(vruntime - min_vruntime );
725
+ if (delta > 0 ) {
726
+ avg_vruntime_update (cfs_rq , delta );
727
+ min_vruntime = vruntime ;
728
+ }
729
+ return min_vruntime ;
730
+ }
731
+
607
732
static void update_min_vruntime (struct cfs_rq * cfs_rq )
608
733
{
609
734
struct sched_entity * curr = cfs_rq -> curr ;
@@ -629,7 +754,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
629
754
630
755
/* ensure we never gain time by being placed backwards. */
631
756
u64_u32_store (cfs_rq -> min_vruntime ,
632
- max_vruntime (cfs_rq -> min_vruntime , vruntime ));
757
+ __update_min_vruntime (cfs_rq , vruntime ));
633
758
}
634
759
635
760
static inline bool __entity_less (struct rb_node * a , const struct rb_node * b )
@@ -642,12 +767,14 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
642
767
*/
643
768
static void __enqueue_entity (struct cfs_rq * cfs_rq , struct sched_entity * se )
644
769
{
770
+ avg_vruntime_add (cfs_rq , se );
645
771
rb_add_cached (& se -> run_node , & cfs_rq -> tasks_timeline , __entity_less );
646
772
}
647
773
648
774
static void __dequeue_entity (struct cfs_rq * cfs_rq , struct sched_entity * se )
649
775
{
650
776
rb_erase_cached (& se -> run_node , & cfs_rq -> tasks_timeline );
777
+ avg_vruntime_sub (cfs_rq , se );
651
778
}
652
779
653
780
struct sched_entity * __pick_first_entity (struct cfs_rq * cfs_rq )
@@ -3379,6 +3506,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
3379
3506
/* commit outstanding execution time */
3380
3507
if (cfs_rq -> curr == se )
3381
3508
update_curr (cfs_rq );
3509
+ else
3510
+ avg_vruntime_sub (cfs_rq , se );
3382
3511
update_load_sub (& cfs_rq -> load , se -> load .weight );
3383
3512
}
3384
3513
dequeue_load_avg (cfs_rq , se );
@@ -3394,9 +3523,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
3394
3523
#endif
3395
3524
3396
3525
enqueue_load_avg (cfs_rq , se );
3397
- if (se -> on_rq )
3526
+ if (se -> on_rq ) {
3398
3527
update_load_add (& cfs_rq -> load , se -> load .weight );
3399
-
3528
+ if (cfs_rq -> curr != se )
3529
+ avg_vruntime_add (cfs_rq , se );
3530
+ }
3400
3531
}
3401
3532
3402
3533
void reweight_task (struct task_struct * p , int prio )
0 commit comments