Skip to content

Commit 8420e08

Browse files
benzeajmberg-intel
authored andcommitted
um: Track userspace children dying in SECCOMP mode
When in seccomp mode, we would hang forever on the futex if a child has died unexpectedly. In contrast, ptrace mode will notice it and kill the corresponding thread when it fails to run it. Fix this issue using a new IRQ that is fired after a SIGCHLD and keeping an (internal) list of all MMs. In the IRQ handler, find the affected MM and set its PID to -1 as well as the futex variable to FUTEX_IN_KERN. This, together with futex returning -EINTR after the signal is sufficient to implement a race-free detection of a child dying. Note that this also enables IRQ handling while starting a userspace process. This should be safe and SECCOMP requires the IRQ in case the process does not come up properly. Signed-off-by: Benjamin Berg <[email protected]> Signed-off-by: Benjamin Berg <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Johannes Berg <[email protected]>
1 parent b1e1bd2 commit 8420e08

File tree

11 files changed

+145
-8
lines changed

11 files changed

+145
-8
lines changed

arch/um/include/asm/irq.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,18 @@
1313
#define TELNETD_IRQ 8
1414
#define XTERM_IRQ 9
1515
#define RANDOM_IRQ 10
16+
#define SIGCHLD_IRQ 11
1617

1718
#ifdef CONFIG_UML_NET_VECTOR
1819

19-
#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1)
20+
#define VECTOR_BASE_IRQ (SIGCHLD_IRQ + 1)
2021
#define VECTOR_IRQ_SPACE 8
2122

2223
#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
2324

2425
#else
2526

26-
#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1)
27+
#define UM_FIRST_DYN_IRQ (SIGCHLD_IRQ + 1)
2728

2829
#endif
2930

arch/um/include/asm/mmu.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@
66
#ifndef __ARCH_UM_MMU_H
77
#define __ARCH_UM_MMU_H
88

9+
#include "linux/types.h"
910
#include <mm_id.h>
1011

1112
typedef struct mm_context {
1213
struct mm_id id;
1314

15+
struct list_head list;
16+
1417
/* Address range in need of a TLB sync */
1518
unsigned long sync_tlb_range_from;
1619
unsigned long sync_tlb_range_to;

arch/um/include/shared/irq_user.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ enum um_irq_type {
1717
struct siginfo;
1818
extern void sigio_handler(int sig, struct siginfo *unused_si,
1919
struct uml_pt_regs *regs, void *mc);
20+
extern void sigchld_handler(int sig, struct siginfo *unused_si,
21+
struct uml_pt_regs *regs, void *mc);
2022
void sigio_run_timetravel_handlers(void);
2123
extern void free_irq_by_fd(int fd);
2224
extern void deactivate_fd(int fd, int irqnum);

arch/um/include/shared/os.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ extern int create_mem_file(unsigned long long len);
197197
extern void report_enomem(void);
198198

199199
/* process.c */
200+
pid_t os_reap_child(void);
200201
extern void os_alarm_process(int pid);
201202
extern void os_kill_process(int pid, int reap_child);
202203
extern void os_kill_ptraced_process(int pid, int reap_child);

arch/um/include/shared/skas/mm_id.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@ struct mm_id {
1414

1515
void __switch_mm(struct mm_id *mm_idp);
1616

17+
void notify_mm_kill(int pid);
18+
1719
#endif

arch/um/include/shared/skas/skas.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <sysdep/ptrace.h>
1010

11+
extern int using_seccomp;
1112
extern int userspace_pid[];
1213

1314
extern void new_thread_handler(void);

arch/um/kernel/irq.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,3 +690,9 @@ void __init init_IRQ(void)
690690
/* Initialize EPOLL Loop */
691691
os_setup_epoll();
692692
}
693+
694+
extern void sigchld_handler(int sig, struct siginfo *unused_si,
695+
struct uml_pt_regs *regs, void *mc)
696+
{
697+
do_IRQ(SIGCHLD_IRQ, regs);
698+
}

arch/um/kernel/skas/mmu.c

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/sched/signal.h>
99
#include <linux/slab.h>
1010

11+
#include <shared/irq_kern.h>
1112
#include <asm/pgalloc.h>
1213
#include <asm/sections.h>
1314
#include <asm/mmu_context.h>
@@ -19,6 +20,9 @@
1920
/* Ensure the stub_data struct covers the allocated area */
2021
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
2122

23+
spinlock_t mm_list_lock;
24+
struct list_head mm_list;
25+
2226
int init_new_context(struct task_struct *task, struct mm_struct *mm)
2327
{
2428
struct mm_id *new_id = &mm->context.id;
@@ -31,10 +35,12 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
3135

3236
new_id->stack = stack;
3337

34-
block_signals_trace();
35-
new_id->pid = start_userspace(stack);
36-
unblock_signals_trace();
38+
scoped_guard(spinlock_irqsave, &mm_list_lock) {
39+
/* Insert into list, used for lookups when the child dies */
40+
list_add(&mm->context.list, &mm_list);
41+
}
3742

43+
new_id->pid = start_userspace(stack);
3844
if (new_id->pid < 0) {
3945
ret = new_id->pid;
4046
goto out_free;
@@ -60,13 +66,79 @@ void destroy_context(struct mm_struct *mm)
6066
* zero, resulting in a kill(0), which will result in the
6167
* whole UML suddenly dying. Also, cover negative and
6268
* 1 cases, since they shouldn't happen either.
69+
*
70+
* Negative cases happen if the child died unexpectedly.
6371
*/
64-
if (mmu->id.pid < 2) {
72+
if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
6573
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
6674
mmu->id.pid);
6775
return;
6876
}
69-
os_kill_ptraced_process(mmu->id.pid, 1);
77+
78+
if (mmu->id.pid > 0) {
79+
os_kill_ptraced_process(mmu->id.pid, 1);
80+
mmu->id.pid = -1;
81+
}
7082

7183
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
84+
85+
guard(spinlock_irqsave)(&mm_list_lock);
86+
87+
list_del(&mm->context.list);
88+
}
89+
90+
static irqreturn_t mm_sigchld_irq(int irq, void* dev)
91+
{
92+
struct mm_context *mm_context;
93+
pid_t pid;
94+
95+
guard(spinlock)(&mm_list_lock);
96+
97+
while ((pid = os_reap_child()) > 0) {
98+
/*
99+
* A child died, check if we have an MM with the PID. This is
100+
* only relevant in SECCOMP mode (as ptrace will fail anyway).
101+
*
102+
* See wait_stub_done_seccomp for more details.
103+
*/
104+
list_for_each_entry(mm_context, &mm_list, list) {
105+
if (mm_context->id.pid == pid) {
106+
struct stub_data *stub_data;
107+
printk("Unexpectedly lost MM child! Affected tasks will segfault.");
108+
109+
/* Marks the MM as dead */
110+
mm_context->id.pid = -1;
111+
112+
/*
113+
* NOTE: If SMP is implemented, a futex_wake
114+
* needs to be added here.
115+
*/
116+
stub_data = (void *)mm_context->id.stack;
117+
stub_data->futex = FUTEX_IN_KERN;
118+
119+
/*
120+
* NOTE: Currently executing syscalls by
121+
* affected tasks may finish normally.
122+
*/
123+
break;
124+
}
125+
}
126+
}
127+
128+
return IRQ_HANDLED;
129+
}
130+
131+
static int __init init_child_tracking(void)
132+
{
133+
int err;
134+
135+
spin_lock_init(&mm_list_lock);
136+
INIT_LIST_HEAD(&mm_list);
137+
138+
err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
139+
if (err < 0)
140+
panic("Failed to register SIGCHLD IRQ: %d", err);
141+
142+
return 0;
72143
}
144+
early_initcall(init_child_tracking)

arch/um/os-Linux/process.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,29 @@
1818
#include <init.h>
1919
#include <longjmp.h>
2020
#include <os.h>
21+
#include <skas/skas.h>
2122

2223
void os_alarm_process(int pid)
2324
{
25+
if (pid <= 0)
26+
return;
27+
2428
kill(pid, SIGALRM);
2529
}
2630

2731
void os_kill_process(int pid, int reap_child)
2832
{
33+
if (pid <= 0)
34+
return;
35+
36+
/* Block signals until child is reaped */
37+
block_signals();
38+
2939
kill(pid, SIGKILL);
3040
if (reap_child)
3141
CATCH_EINTR(waitpid(pid, NULL, __WALL));
42+
43+
unblock_signals();
3244
}
3345

3446
/* Kill off a ptraced child by all means available. kill it normally first,
@@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child)
3850

3951
void os_kill_ptraced_process(int pid, int reap_child)
4052
{
53+
if (pid <= 0)
54+
return;
55+
56+
/* Block signals until child is reaped */
57+
block_signals();
58+
4159
kill(pid, SIGKILL);
4260
ptrace(PTRACE_KILL, pid);
4361
ptrace(PTRACE_CONT, pid);
4462
if (reap_child)
4563
CATCH_EINTR(waitpid(pid, NULL, __WALL));
64+
65+
unblock_signals();
66+
}
67+
68+
pid_t os_reap_child(void)
69+
{
70+
int status;
71+
72+
/* Try to reap a child */
73+
return waitpid(-1, &status, WNOHANG);
4674
}
4775

4876
/* Don't use the glibc version, which caches the result in TLS. It misses some
@@ -151,6 +179,9 @@ void init_new_thread_signals(void)
151179
set_handler(SIGBUS);
152180
signal(SIGHUP, SIG_IGN);
153181
set_handler(SIGIO);
182+
/* We (currently) only use the child reaper IRQ in seccomp mode */
183+
if (using_seccomp)
184+
set_handler(SIGCHLD);
154185
signal(SIGWINCH, SIG_IGN);
155186
}
156187

arch/um/os-Linux/signal.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) =
2929
[SIGBUS] = relay_signal,
3030
[SIGSEGV] = segv_handler,
3131
[SIGIO] = sigio_handler,
32+
[SIGCHLD] = sigchld_handler,
3233
};
3334

3435
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
@@ -44,7 +45,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
4445
}
4546

4647
/* enable signals if sig isn't IRQ signal */
47-
if ((sig != SIGIO) && (sig != SIGWINCH))
48+
if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD))
4849
unblock_signals_trace();
4950

5051
(*sig_info[sig])(sig, si, &r, mc);
@@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
6465
#define SIGALRM_BIT 1
6566
#define SIGALRM_MASK (1 << SIGALRM_BIT)
6667

68+
#define SIGCHLD_BIT 2
69+
#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
70+
6771
int signals_enabled;
6872
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
6973
static int signals_blocked, signals_blocked_pending;
@@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
102106
return;
103107
}
104108

109+
if (!enabled && (sig == SIGCHLD)) {
110+
signals_pending |= SIGCHLD_MASK;
111+
return;
112+
}
113+
105114
block_signals_trace();
106115

107116
sig_handler_common(sig, si, mc);
@@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
181190

182191
[SIGIO] = sig_handler,
183192
[SIGWINCH] = sig_handler,
193+
/* SIGCHLD is only actually registered in seccomp mode. */
194+
[SIGCHLD] = sig_handler,
184195
[SIGALRM] = timer_alarm_handler,
185196

186197
[SIGUSR1] = sigusr1_handler,
@@ -309,6 +320,12 @@ void unblock_signals(void)
309320
if (save_pending & SIGIO_MASK)
310321
sig_handler_common(SIGIO, NULL, NULL);
311322

323+
if (save_pending & SIGCHLD_MASK) {
324+
struct uml_pt_regs regs = {};
325+
326+
sigchld_handler(SIGCHLD, NULL, &regs, NULL);
327+
}
328+
312329
/* Do not reenter the handler */
313330

314331
if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))

arch/um/os-Linux/skas/process.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ static int __init init_stub_exe_fd(void)
309309
}
310310
__initcall(init_stub_exe_fd);
311311

312+
int using_seccomp;
312313
int userspace_pid[NR_CPUS];
313314

314315
/**

0 commit comments

Comments
 (0)