Skip to content

Commit d5e1586

Browse files
author
Peter Zijlstra
committed
sched: Unconditionally use full-fat wait_task_inactive()
While modifying wait_task_inactive() for PREEMPT_RT; the build robot noted that UP got broken. This led to audit and consideration of the UP implementation of wait_task_inactive(). It looks like the UP implementation is also broken for PREEMPT; consider task_current_syscall() getting preempted between the two calls to wait_task_inactive(). Therefore move the wait_task_inactive() implementation out of CONFIG_SMP and unconditionally use it. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/20230602103731.GA630648%40hirez.programming.kicks-ass.net
1 parent 0dd37d6 commit d5e1586

File tree

2 files changed

+110
-113
lines changed

2 files changed

+110
-113
lines changed

include/linux/sched.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,15 +2006,12 @@ static __always_inline void scheduler_ipi(void)
20062006
*/
20072007
preempt_fold_need_resched();
20082008
}
2009-
extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
20102009
#else
20112010
static inline void scheduler_ipi(void) { }
2012-
static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
2013-
{
2014-
return 1;
2015-
}
20162011
#endif
20172012

2013+
extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
2014+
20182015
/*
20192016
* Set thread flags in other task's structures.
20202017
* See asm/thread_info.h for TIF_xxxx flags available:

kernel/sched/core.c

Lines changed: 108 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,6 +2213,114 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
22132213
rq_clock_skip_update(rq);
22142214
}
22152215

2216+
/*
2217+
* wait_task_inactive - wait for a thread to unschedule.
2218+
*
2219+
* Wait for the thread to block in any of the states set in @match_state.
2220+
* If it changes, i.e. @p might have woken up, then return zero. When we
2221+
* succeed in waiting for @p to be off its CPU, we return a positive number
2222+
* (its total switch count). If a second call a short while later returns the
2223+
* same number, the caller can be sure that @p has remained unscheduled the
2224+
* whole time.
2225+
*
2226+
* The caller must ensure that the task *will* unschedule sometime soon,
2227+
* else this function might spin for a *long* time. This function can't
2228+
* be called with interrupts off, or it may introduce deadlock with
2229+
* smp_call_function() if an IPI is sent by the same process we are
2230+
* waiting to become inactive.
2231+
*/
2232+
unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
2233+
{
2234+
int running, queued;
2235+
struct rq_flags rf;
2236+
unsigned long ncsw;
2237+
struct rq *rq;
2238+
2239+
for (;;) {
2240+
/*
2241+
* We do the initial early heuristics without holding
2242+
* any task-queue locks at all. We'll only try to get
2243+
* the runqueue lock when things look like they will
2244+
* work out!
2245+
*/
2246+
rq = task_rq(p);
2247+
2248+
/*
2249+
* If the task is actively running on another CPU
2250+
* still, just relax and busy-wait without holding
2251+
* any locks.
2252+
*
2253+
* NOTE! Since we don't hold any locks, it's not
2254+
* even sure that "rq" stays as the right runqueue!
2255+
* But we don't care, since "task_on_cpu()" will
2256+
* return false if the runqueue has changed and p
2257+
* is actually now running somewhere else!
2258+
*/
2259+
while (task_on_cpu(rq, p)) {
2260+
if (!(READ_ONCE(p->__state) & match_state))
2261+
return 0;
2262+
cpu_relax();
2263+
}
2264+
2265+
/*
2266+
* Ok, time to look more closely! We need the rq
2267+
* lock now, to be *sure*. If we're wrong, we'll
2268+
* just go back and repeat.
2269+
*/
2270+
rq = task_rq_lock(p, &rf);
2271+
trace_sched_wait_task(p);
2272+
running = task_on_cpu(rq, p);
2273+
queued = task_on_rq_queued(p);
2274+
ncsw = 0;
2275+
if (READ_ONCE(p->__state) & match_state)
2276+
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
2277+
task_rq_unlock(rq, p, &rf);
2278+
2279+
/*
2280+
* If it changed from the expected state, bail out now.
2281+
*/
2282+
if (unlikely(!ncsw))
2283+
break;
2284+
2285+
/*
2286+
* Was it really running after all now that we
2287+
* checked with the proper locks actually held?
2288+
*
2289+
* Oops. Go back and try again..
2290+
*/
2291+
if (unlikely(running)) {
2292+
cpu_relax();
2293+
continue;
2294+
}
2295+
2296+
/*
2297+
* It's not enough that it's not actively running,
2298+
* it must be off the runqueue _entirely_, and not
2299+
* preempted!
2300+
*
2301+
* So if it was still runnable (but just not actively
2302+
* running right now), it's preempted, and we should
2303+
* yield - it could be a while.
2304+
*/
2305+
if (unlikely(queued)) {
2306+
ktime_t to = NSEC_PER_SEC / HZ;
2307+
2308+
set_current_state(TASK_UNINTERRUPTIBLE);
2309+
schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
2310+
continue;
2311+
}
2312+
2313+
/*
2314+
* Ahh, all good. It wasn't running, and it wasn't
2315+
* runnable, which means that it will never become
2316+
* running in the future either. We're all done!
2317+
*/
2318+
break;
2319+
}
2320+
2321+
return ncsw;
2322+
}
2323+
22162324
#ifdef CONFIG_SMP
22172325

22182326
static void
@@ -3341,114 +3449,6 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
33413449
}
33423450
#endif /* CONFIG_NUMA_BALANCING */
33433451

3344-
/*
3345-
* wait_task_inactive - wait for a thread to unschedule.
3346-
*
3347-
* Wait for the thread to block in any of the states set in @match_state.
3348-
* If it changes, i.e. @p might have woken up, then return zero. When we
3349-
* succeed in waiting for @p to be off its CPU, we return a positive number
3350-
* (its total switch count). If a second call a short while later returns the
3351-
* same number, the caller can be sure that @p has remained unscheduled the
3352-
* whole time.
3353-
*
3354-
* The caller must ensure that the task *will* unschedule sometime soon,
3355-
* else this function might spin for a *long* time. This function can't
3356-
* be called with interrupts off, or it may introduce deadlock with
3357-
* smp_call_function() if an IPI is sent by the same process we are
3358-
* waiting to become inactive.
3359-
*/
3360-
unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
3361-
{
3362-
int running, queued;
3363-
struct rq_flags rf;
3364-
unsigned long ncsw;
3365-
struct rq *rq;
3366-
3367-
for (;;) {
3368-
/*
3369-
* We do the initial early heuristics without holding
3370-
* any task-queue locks at all. We'll only try to get
3371-
* the runqueue lock when things look like they will
3372-
* work out!
3373-
*/
3374-
rq = task_rq(p);
3375-
3376-
/*
3377-
* If the task is actively running on another CPU
3378-
* still, just relax and busy-wait without holding
3379-
* any locks.
3380-
*
3381-
* NOTE! Since we don't hold any locks, it's not
3382-
* even sure that "rq" stays as the right runqueue!
3383-
* But we don't care, since "task_on_cpu()" will
3384-
* return false if the runqueue has changed and p
3385-
* is actually now running somewhere else!
3386-
*/
3387-
while (task_on_cpu(rq, p)) {
3388-
if (!(READ_ONCE(p->__state) & match_state))
3389-
return 0;
3390-
cpu_relax();
3391-
}
3392-
3393-
/*
3394-
* Ok, time to look more closely! We need the rq
3395-
* lock now, to be *sure*. If we're wrong, we'll
3396-
* just go back and repeat.
3397-
*/
3398-
rq = task_rq_lock(p, &rf);
3399-
trace_sched_wait_task(p);
3400-
running = task_on_cpu(rq, p);
3401-
queued = task_on_rq_queued(p);
3402-
ncsw = 0;
3403-
if (READ_ONCE(p->__state) & match_state)
3404-
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
3405-
task_rq_unlock(rq, p, &rf);
3406-
3407-
/*
3408-
* If it changed from the expected state, bail out now.
3409-
*/
3410-
if (unlikely(!ncsw))
3411-
break;
3412-
3413-
/*
3414-
* Was it really running after all now that we
3415-
* checked with the proper locks actually held?
3416-
*
3417-
* Oops. Go back and try again..
3418-
*/
3419-
if (unlikely(running)) {
3420-
cpu_relax();
3421-
continue;
3422-
}
3423-
3424-
/*
3425-
* It's not enough that it's not actively running,
3426-
* it must be off the runqueue _entirely_, and not
3427-
* preempted!
3428-
*
3429-
* So if it was still runnable (but just not actively
3430-
* running right now), it's preempted, and we should
3431-
* yield - it could be a while.
3432-
*/
3433-
if (unlikely(queued)) {
3434-
ktime_t to = NSEC_PER_SEC / HZ;
3435-
3436-
set_current_state(TASK_UNINTERRUPTIBLE);
3437-
schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
3438-
continue;
3439-
}
3440-
3441-
/*
3442-
* Ahh, all good. It wasn't running, and it wasn't
3443-
* runnable, which means that it will never become
3444-
* running in the future either. We're all done!
3445-
*/
3446-
break;
3447-
}
3448-
3449-
return ncsw;
3450-
}
3451-
34523452
/***
34533453
* kick_process - kick a running thread to enter/exit the kernel
34543454
* @p: the to-be-kicked thread

0 commit comments

Comments
 (0)