Skip to content

Commit f558c2b

Browse files
author
Peter Zijlstra
committed
sched/rt: Fix double enqueue caused by rt_effective_prio
Double enqueues in rt runqueues (list) have been reported while running a simple test that spawns a number of threads doing a short sleep/run pattern while being concurrently setscheduled between rt and fair class. WARNING: CPU: 3 PID: 2825 at kernel/sched/rt.c:1294 enqueue_task_rt+0x355/0x360 CPU: 3 PID: 2825 Comm: setsched__13 RIP: 0010:enqueue_task_rt+0x355/0x360 Call Trace: __sched_setscheduler+0x581/0x9d0 _sched_setscheduler+0x63/0xa0 do_sched_setscheduler+0xa0/0x150 __x64_sys_sched_setscheduler+0x1a/0x30 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae list_add double add: new=ffff9867cb629b40, prev=ffff9867cb629b40, next=ffff98679fc67ca0. kernel BUG at lib/list_debug.c:31! invalid opcode: 0000 [#1] PREEMPT_RT SMP PTI CPU: 3 PID: 2825 Comm: setsched__13 RIP: 0010:__list_add_valid+0x41/0x50 Call Trace: enqueue_task_rt+0x291/0x360 __sched_setscheduler+0x581/0x9d0 _sched_setscheduler+0x63/0xa0 do_sched_setscheduler+0xa0/0x150 __x64_sys_sched_setscheduler+0x1a/0x30 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae __sched_setscheduler() uses rt_effective_prio() to handle proper queuing of priority boosted tasks that are setscheduled while being boosted. rt_effective_prio() is however called twice per each __sched_setscheduler() call: first directly by __sched_setscheduler() before dequeuing the task and then by __setscheduler() to actually do the priority change. If the priority of the pi_top_task is concurrently being changed however, it might happen that the two calls return different results. If, for example, the first call returned the same rt priority the task was running at and the second one a fair priority, the task won't be removed by the rt list (on_list still set) and then enqueued in the fair runqueue. When eventually setscheduled back to rt it will be seen as enqueued already and the WARNING/BUG be issued. Fix this by calling rt_effective_prio() only once and then reusing the return value. While at it refactor code as well for clarity. Concurrent priority inheritance handling is still safe and will eventually converge to a new state by following the inheritance chain(s). Fixes: 0782e63 ("sched: Handle priority boosted tasks proper in setscheduler()") [squashed Peterz changes; added changelog] Reported-by: Mark Simmons <[email protected]> Signed-off-by: Juri Lelli <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent c500bee commit f558c2b

File tree

1 file changed

+35
-55
lines changed

1 file changed

+35
-55
lines changed

kernel/sched/core.c

Lines changed: 35 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1981,12 +1981,18 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
19811981
dequeue_task(rq, p, flags);
19821982
}
19831983

1984-
/*
1985-
* __normal_prio - return the priority that is based on the static prio
1986-
*/
1987-
static inline int __normal_prio(struct task_struct *p)
1984+
static inline int __normal_prio(int policy, int rt_prio, int nice)
19881985
{
1989-
return p->static_prio;
1986+
int prio;
1987+
1988+
if (dl_policy(policy))
1989+
prio = MAX_DL_PRIO - 1;
1990+
else if (rt_policy(policy))
1991+
prio = MAX_RT_PRIO - 1 - rt_prio;
1992+
else
1993+
prio = NICE_TO_PRIO(nice);
1994+
1995+
return prio;
19901996
}
19911997

19921998
/*
@@ -1998,15 +2004,7 @@ static inline int __normal_prio(struct task_struct *p)
19982004
*/
19992005
static inline int normal_prio(struct task_struct *p)
20002006
{
2001-
int prio;
2002-
2003-
if (task_has_dl_policy(p))
2004-
prio = MAX_DL_PRIO-1;
2005-
else if (task_has_rt_policy(p))
2006-
prio = MAX_RT_PRIO-1 - p->rt_priority;
2007-
else
2008-
prio = __normal_prio(p);
2009-
return prio;
2007+
return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio));
20102008
}
20112009

20122010
/*
@@ -4099,7 +4097,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
40994097
} else if (PRIO_TO_NICE(p->static_prio) < 0)
41004098
p->static_prio = NICE_TO_PRIO(0);
41014099

4102-
p->prio = p->normal_prio = __normal_prio(p);
4100+
p->prio = p->normal_prio = p->static_prio;
41034101
set_load_weight(p, false);
41044102

41054103
/*
@@ -6341,6 +6339,18 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
63416339
}
63426340
EXPORT_SYMBOL(default_wake_function);
63436341

6342+
static void __setscheduler_prio(struct task_struct *p, int prio)
6343+
{
6344+
if (dl_prio(prio))
6345+
p->sched_class = &dl_sched_class;
6346+
else if (rt_prio(prio))
6347+
p->sched_class = &rt_sched_class;
6348+
else
6349+
p->sched_class = &fair_sched_class;
6350+
6351+
p->prio = prio;
6352+
}
6353+
63446354
#ifdef CONFIG_RT_MUTEXES
63456355

63466356
static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
@@ -6456,22 +6466,19 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
64566466
} else {
64576467
p->dl.pi_se = &p->dl;
64586468
}
6459-
p->sched_class = &dl_sched_class;
64606469
} else if (rt_prio(prio)) {
64616470
if (dl_prio(oldprio))
64626471
p->dl.pi_se = &p->dl;
64636472
if (oldprio < prio)
64646473
queue_flag |= ENQUEUE_HEAD;
6465-
p->sched_class = &rt_sched_class;
64666474
} else {
64676475
if (dl_prio(oldprio))
64686476
p->dl.pi_se = &p->dl;
64696477
if (rt_prio(oldprio))
64706478
p->rt.timeout = 0;
6471-
p->sched_class = &fair_sched_class;
64726479
}
64736480

6474-
p->prio = prio;
6481+
__setscheduler_prio(p, prio);
64756482

64766483
if (queued)
64776484
enqueue_task(rq, p, queue_flag);
@@ -6824,35 +6831,6 @@ static void __setscheduler_params(struct task_struct *p,
68246831
set_load_weight(p, true);
68256832
}
68266833

6827-
/* Actually do priority change: must hold pi & rq lock. */
6828-
static void __setscheduler(struct rq *rq, struct task_struct *p,
6829-
const struct sched_attr *attr, bool keep_boost)
6830-
{
6831-
/*
6832-
* If params can't change scheduling class changes aren't allowed
6833-
* either.
6834-
*/
6835-
if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
6836-
return;
6837-
6838-
__setscheduler_params(p, attr);
6839-
6840-
/*
6841-
* Keep a potential priority boosting if called from
6842-
* sched_setscheduler().
6843-
*/
6844-
p->prio = normal_prio(p);
6845-
if (keep_boost)
6846-
p->prio = rt_effective_prio(p, p->prio);
6847-
6848-
if (dl_prio(p->prio))
6849-
p->sched_class = &dl_sched_class;
6850-
else if (rt_prio(p->prio))
6851-
p->sched_class = &rt_sched_class;
6852-
else
6853-
p->sched_class = &fair_sched_class;
6854-
}
6855-
68566834
/*
68576835
* Check the target process has a UID that matches the current process's:
68586836
*/
@@ -6873,10 +6851,8 @@ static int __sched_setscheduler(struct task_struct *p,
68736851
const struct sched_attr *attr,
68746852
bool user, bool pi)
68756853
{
6876-
int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
6877-
MAX_RT_PRIO - 1 - attr->sched_priority;
6878-
int retval, oldprio, oldpolicy = -1, queued, running;
6879-
int new_effective_prio, policy = attr->sched_policy;
6854+
int oldpolicy = -1, policy = attr->sched_policy;
6855+
int retval, oldprio, newprio, queued, running;
68806856
const struct sched_class *prev_class;
68816857
struct callback_head *head;
68826858
struct rq_flags rf;
@@ -7074,6 +7050,7 @@ static int __sched_setscheduler(struct task_struct *p,
70747050
p->sched_reset_on_fork = reset_on_fork;
70757051
oldprio = p->prio;
70767052

7053+
newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice);
70777054
if (pi) {
70787055
/*
70797056
* Take priority boosted tasks into account. If the new
@@ -7082,8 +7059,8 @@ static int __sched_setscheduler(struct task_struct *p,
70827059
* the runqueue. This will be done when the task deboost
70837060
* itself.
70847061
*/
7085-
new_effective_prio = rt_effective_prio(p, newprio);
7086-
if (new_effective_prio == oldprio)
7062+
newprio = rt_effective_prio(p, newprio);
7063+
if (newprio == oldprio)
70877064
queue_flags &= ~DEQUEUE_MOVE;
70887065
}
70897066

@@ -7096,7 +7073,10 @@ static int __sched_setscheduler(struct task_struct *p,
70967073

70977074
prev_class = p->sched_class;
70987075

7099-
__setscheduler(rq, p, attr, pi);
7076+
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
7077+
__setscheduler_params(p, attr);
7078+
__setscheduler_prio(p, newprio);
7079+
}
71007080
__setscheduler_uclamp(p, attr);
71017081

71027082
if (queued) {

0 commit comments

Comments
 (0)