Skip to content

Commit 245c8e8

Browse files
committed
Merge tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: - Fix EEVDF corner cases - Fix two nohz_full= related bugs that can cause boot crashes and warnings * tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/isolation: Fix boot crash when maxcpus < first housekeeping CPU sched/isolation: Prevent boot crash when the boot CPU is nohz_full sched/eevdf: Prevent vlag from going out of bounds in reweight_eevdf() sched/eevdf: Fix miscalculation in reweight_entity() when se is not curr sched/eevdf: Always update V if se->on_rq when reweighting
2 parents aec147c + 257bf89 commit 245c8e8

File tree

3 files changed

+38
-21
lines changed

3 files changed

+38
-21
lines changed

Documentation/timers/no_hz.rst

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain
129129
online to handle timekeeping tasks in order to ensure that system
130130
calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
131131
(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
132-
user processes to observe slight drifts in clock rate.) Therefore, the
133-
boot CPU is prohibited from entering adaptive-ticks mode. Specifying a
134-
"nohz_full=" mask that includes the boot CPU will result in a boot-time
135-
error message, and the boot CPU will be removed from the mask. Note that
136-
this means that your system must have at least two CPUs in order for
132+
user processes to observe slight drifts in clock rate.) Note that this
133+
means that your system must have at least two CPUs in order for
137134
CONFIG_NO_HZ_FULL=y to do anything for you.
138135

139136
Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.

kernel/sched/fair.c

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -696,15 +696,21 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
696696
*
697697
* XXX could add max_slice to the augmented data to track this.
698698
*/
699-
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
699+
static s64 entity_lag(u64 avruntime, struct sched_entity *se)
700700
{
701-
s64 lag, limit;
701+
s64 vlag, limit;
702+
703+
vlag = avruntime - se->vruntime;
704+
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
702705

706+
return clamp(vlag, -limit, limit);
707+
}
708+
709+
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
710+
{
703711
SCHED_WARN_ON(!se->on_rq);
704-
lag = avg_vruntime(cfs_rq) - se->vruntime;
705712

706-
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
707-
se->vlag = clamp(lag, -limit, limit);
713+
se->vlag = entity_lag(avg_vruntime(cfs_rq), se);
708714
}
709715

710716
/*
@@ -3676,11 +3682,10 @@ static inline void
36763682
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
36773683
#endif
36783684

3679-
static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
3685+
static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
36803686
unsigned long weight)
36813687
{
36823688
unsigned long old_weight = se->load.weight;
3683-
u64 avruntime = avg_vruntime(cfs_rq);
36843689
s64 vlag, vslice;
36853690

36863691
/*
@@ -3761,7 +3766,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
37613766
* = V - vl'
37623767
*/
37633768
if (avruntime != se->vruntime) {
3764-
vlag = (s64)(avruntime - se->vruntime);
3769+
vlag = entity_lag(avruntime, se);
37653770
vlag = div_s64(vlag * old_weight, weight);
37663771
se->vruntime = avruntime - vlag;
37673772
}
@@ -3787,25 +3792,26 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
37873792
unsigned long weight)
37883793
{
37893794
bool curr = cfs_rq->curr == se;
3795+
u64 avruntime;
37903796

37913797
if (se->on_rq) {
37923798
/* commit outstanding execution time */
3793-
if (curr)
3794-
update_curr(cfs_rq);
3795-
else
3799+
update_curr(cfs_rq);
3800+
avruntime = avg_vruntime(cfs_rq);
3801+
if (!curr)
37963802
__dequeue_entity(cfs_rq, se);
37973803
update_load_sub(&cfs_rq->load, se->load.weight);
37983804
}
37993805
dequeue_load_avg(cfs_rq, se);
38003806

3801-
if (!se->on_rq) {
3807+
if (se->on_rq) {
3808+
reweight_eevdf(se, avruntime, weight);
3809+
} else {
38023810
/*
38033811
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
38043812
* we need to scale se->vlag when w_i changes.
38053813
*/
38063814
se->vlag = div_s64(se->vlag * se->load.weight, weight);
3807-
} else {
3808-
reweight_eevdf(cfs_rq, se, weight);
38093815
}
38103816

38113817
update_load_set(&se->load, weight);

kernel/sched/isolation.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type)
4646
if (cpu < nr_cpu_ids)
4747
return cpu;
4848

49-
return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
49+
cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
50+
if (likely(cpu < nr_cpu_ids))
51+
return cpu;
52+
/*
53+
* Unless we have another problem this can only happen
54+
* at boot time before start_secondary() brings the 1st
55+
* housekeeping CPU up.
56+
*/
57+
WARN_ON_ONCE(system_state == SYSTEM_RUNNING ||
58+
type != HK_TYPE_TIMER);
5059
}
5160
}
5261
return smp_processor_id();
@@ -109,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type,
109118
static int __init housekeeping_setup(char *str, unsigned long flags)
110119
{
111120
cpumask_var_t non_housekeeping_mask, housekeeping_staging;
121+
unsigned int first_cpu;
112122
int err = 0;
113123

114124
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
@@ -129,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
129139
cpumask_andnot(housekeeping_staging,
130140
cpu_possible_mask, non_housekeeping_mask);
131141

132-
if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
142+
first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging);
143+
if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) {
133144
__cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
134145
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
135146
if (!housekeeping.flags) {
@@ -138,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
138149
}
139150
}
140151

152+
if (cpumask_empty(non_housekeeping_mask))
153+
goto free_housekeeping_staging;
154+
141155
if (!housekeeping.flags) {
142156
/* First setup call ("nohz_full=" or "isolcpus=") */
143157
enum hk_type type;

0 commit comments

Comments
 (0)