Skip to content

Commit b4b27b9

Browse files
committed
Revert "signal: Allow tasks to cache one sigqueue struct"
This reverts commits 4bad58e (and 399f8dd, which tried to fix it). I do not believe these are correct, and I'm about to release 5.13, so am reverting them out of an abundance of caution. The locking is odd, and appears broken. On the allocation side (in __sigqueue_alloc()), the locking is somewhat straightforward: it depends on sighand->siglock. Since one caller doesn't hold that lock, it further then tests 'sigqueue_flags' to avoid the case with no locks held. On the freeing side (in sigqueue_cache_or_free()), there is no locking at all, and the logic instead depends on 'current' being a single thread, and not able to race with itself. To make things more exciting, there's also the data race between freeing a signal and allocating one, which is handled by using WRITE_ONCE() and READ_ONCE(), and being mutually exclusive wrt the initial state (ie freeing will only free if the old state was NULL, while allocating will obviously only use the value if it was non-NULL, so only one or the other will actually act on the value). However, while the free->alloc paths do seem mutually exclusive thanks to just the data value dependency, it's not clear what the memory ordering constraints are on it. Could writes from the previous allocation possibly be delayed and seen by the new allocation later, causing logical inconsistencies? So it's all very exciting and unusual. And in particular, it seems that the freeing side is incorrect in depending on "current" being single-threaded. Yes, 'current' is a single thread, but in the presense of asynchronous events even a single thread can have data races. And such asynchronous events can and do happen, with interrupts causing signals to be flushed and thus free'd (for example - sending a SIGCONT/SIGSTOP can happen from interrupt context, and can flush previously queued process control signals). So regardless of all the other questions about the memory ordering and locking for this new cached allocation, the sigqueue_cache_or_free() assumptions seem to be fundamentally incorrect. It may be that people will show me the errors of my ways, and tell me why this is all safe after all. We can reinstate it if so. But my current belief is that the WRITE_ONCE() that sets the cached entry needs to be a smp_store_release(), and the READ_ONCE() that finds a cached entry needs to be a smp_load_acquire() to handle memory ordering correctly. And the sequence in sigqueue_cache_or_free() would need to either use a lock or at least be interrupt-safe some way (perhaps by using something like the percpu 'cmpxchg': it doesn't need to be SMP-safe, but like the percpu operations it needs to be interrupt-safe). Fixes: 399f8dd ("signal: Prevent sigqueue caching after task got released") Fixes: 4bad58e ("signal: Allow tasks to cache one sigqueue struct") Cc: Thomas Gleixner <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Oleg Nesterov <[email protected]> Cc: Christian Brauner <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 625acff commit b4b27b9

File tree

5 files changed

+2
-61
lines changed

5 files changed

+2
-61
lines changed

include/linux/sched.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -997,7 +997,6 @@ struct task_struct {
997997
/* Signal handlers: */
998998
struct signal_struct *signal;
999999
struct sighand_struct __rcu *sighand;
1000-
struct sigqueue *sigqueue_cache;
10011000
sigset_t blocked;
10021001
sigset_t real_blocked;
10031002
/* Restored if set_restore_sigmask() was used: */

include/linux/signal.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,6 @@ static inline void init_sigpending(struct sigpending *sig)
267267
}
268268

269269
extern void flush_sigqueue(struct sigpending *queue);
270-
extern void exit_task_sigqueue_cache(struct task_struct *tsk);
271270

272271
/* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
273272
static inline int valid_signal(unsigned long sig)

kernel/exit.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,6 @@ static void __exit_signal(struct task_struct *tsk)
162162
flush_sigqueue(&sig->shared_pending);
163163
tty_kref_put(tty);
164164
}
165-
exit_task_sigqueue_cache(tsk);
166165
}
167166

168167
static void delayed_put_task_struct(struct rcu_head *rhp)

kernel/fork.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2008,7 +2008,6 @@ static __latent_entropy struct task_struct *copy_process(
20082008
spin_lock_init(&p->alloc_lock);
20092009

20102010
init_sigpending(&p->pending);
2011-
p->sigqueue_cache = NULL;
20122011

20132012
p->utime = p->stime = p->gtime = 0;
20142013
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME

kernel/signal.c

Lines changed: 2 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -431,22 +431,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
431431
rcu_read_unlock();
432432

433433
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
434-
/*
435-
* Preallocation does not hold sighand::siglock so it can't
436-
* use the cache. The lockless caching requires that only
437-
* one consumer and only one producer run at a time.
438-
*
439-
* For the regular allocation case it is sufficient to
440-
* check @q for NULL because this code can only be called
441-
* if the target task @t has not been reaped yet; which
442-
* means this code can never observe the error pointer which is
443-
* written to @t->sigqueue_cache in exit_task_sigqueue_cache().
444-
*/
445-
q = READ_ONCE(t->sigqueue_cache);
446-
if (!q || sigqueue_flags)
447-
q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
448-
else
449-
WRITE_ONCE(t->sigqueue_cache, NULL);
434+
q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
450435
} else {
451436
print_dropped_signal(sig);
452437
}
@@ -463,53 +448,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
463448
return q;
464449
}
465450

466-
void exit_task_sigqueue_cache(struct task_struct *tsk)
467-
{
468-
/* Race free because @tsk is mopped up */
469-
struct sigqueue *q = tsk->sigqueue_cache;
470-
471-
if (q) {
472-
/*
473-
* Hand it back to the cache as the task might
474-
* be self reaping which would leak the object.
475-
*/
476-
kmem_cache_free(sigqueue_cachep, q);
477-
}
478-
479-
/*
480-
* Set an error pointer to ensure that @tsk will not cache a
481-
* sigqueue when it is reaping it's child tasks
482-
*/
483-
tsk->sigqueue_cache = ERR_PTR(-1);
484-
}
485-
486-
static void sigqueue_cache_or_free(struct sigqueue *q)
487-
{
488-
/*
489-
* Cache one sigqueue per task. This pairs with the consumer side
490-
* in __sigqueue_alloc() and needs READ/WRITE_ONCE() to prevent the
491-
* compiler from store tearing and to tell KCSAN that the data race
492-
* is intentional when run without holding current->sighand->siglock,
493-
* which is fine as current obviously cannot run __sigqueue_free()
494-
* concurrently.
495-
*
496-
* The NULL check is safe even if current has been reaped already,
497-
* in which case exit_task_sigqueue_cache() wrote an error pointer
498-
* into current->sigqueue_cache.
499-
*/
500-
if (!READ_ONCE(current->sigqueue_cache))
501-
WRITE_ONCE(current->sigqueue_cache, q);
502-
else
503-
kmem_cache_free(sigqueue_cachep, q);
504-
}
505-
506451
static void __sigqueue_free(struct sigqueue *q)
507452
{
508453
if (q->flags & SIGQUEUE_PREALLOC)
509454
return;
510455
if (atomic_dec_and_test(&q->user->sigpending))
511456
free_uid(q->user);
512-
sigqueue_cache_or_free(q);
457+
kmem_cache_free(sigqueue_cachep, q);
513458
}
514459

515460
void flush_sigqueue(struct sigpending *queue)

0 commit comments

Comments
 (0)