Skip to content

Commit c3f3ce0

Browse files
aagittorvalds
authored andcommitted
userfaultfd: use RCU to free the task struct when fork fails
The task structure is freed while get_mem_cgroup_from_mm() holds rcu_read_lock() and dereferences mm->owner. get_mem_cgroup_from_mm() failing fork() ---- --- task = mm->owner mm->owner = NULL; free(task) if (task) *task; /* use after free */ The fix consists in freeing the task with RCU also in the fork failure case, exactly like it always happens for the regular exit(2) path. That is enough to make the rcu_read_lock hold in get_mem_cgroup_from_mm() (left side above) effective to avoid a use after free when dereferencing the task structure. An alternate possible fix would be to defer the delivery of the userfaultfd contexts to the monitor until after fork() is guaranteed to succeed. Such a change would require more changes because it would create a strict ordering dependency where the uffd methods would need to be called beyond the last potentially failing branch in order to be safe. This solution as opposed only adds the dependency to common code to set mm->owner to NULL and to free the task struct that was pointed by mm->owner with RCU, if fork ends up failing. The userfaultfd methods can still be called anywhere during the fork runtime and the monitor will keep discarding orphaned "mm" coming from failed forks in userland. This race condition couldn't trigger if CONFIG_MEMCG was set =n at build time. [[email protected]: improve changelog, reduce #ifdefs per Michal] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Fixes: 893e26e ("userfaultfd: non-cooperative: Add fork() event") Signed-off-by: Andrea Arcangeli <[email protected]> Tested-by: zhong jiang <[email protected]> Reported-by: [email protected] Cc: Oleg Nesterov <[email protected]> Cc: Jann Horn <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Peter Xu <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: "Kirill A . Shutemov" <[email protected]> Cc: Michal Hocko <[email protected]> Cc: zhong jiang <[email protected]> Cc: [email protected] Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent acb2ec3 commit c3f3ce0

File tree

1 file changed

+29
-2
lines changed

1 file changed

+29
-2
lines changed

kernel/fork.c

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,15 @@ static void mm_init_aio(struct mm_struct *mm)
955955
#endif
956956
}
957957

958+
static __always_inline void mm_clear_owner(struct mm_struct *mm,
959+
struct task_struct *p)
960+
{
961+
#ifdef CONFIG_MEMCG
962+
if (mm->owner == p)
963+
WRITE_ONCE(mm->owner, NULL);
964+
#endif
965+
}
966+
958967
static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
959968
{
960969
#ifdef CONFIG_MEMCG
@@ -1343,6 +1352,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
13431352
free_pt:
13441353
/* don't put binfmt in mmput, we haven't got module yet */
13451354
mm->binfmt = NULL;
1355+
mm_init_owner(mm, NULL);
13461356
mmput(mm);
13471357

13481358
fail_nomem:
@@ -1726,6 +1736,21 @@ static int pidfd_create(struct pid *pid)
17261736
return fd;
17271737
}
17281738

1739+
static void __delayed_free_task(struct rcu_head *rhp)
1740+
{
1741+
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
1742+
1743+
free_task(tsk);
1744+
}
1745+
1746+
static __always_inline void delayed_free_task(struct task_struct *tsk)
1747+
{
1748+
if (IS_ENABLED(CONFIG_MEMCG))
1749+
call_rcu(&tsk->rcu, __delayed_free_task);
1750+
else
1751+
free_task(tsk);
1752+
}
1753+
17291754
/*
17301755
* This creates a new process as a copy of the old one,
17311756
* but does not actually start it yet.
@@ -2233,8 +2258,10 @@ static __latent_entropy struct task_struct *copy_process(
22332258
bad_fork_cleanup_namespaces:
22342259
exit_task_namespaces(p);
22352260
bad_fork_cleanup_mm:
2236-
if (p->mm)
2261+
if (p->mm) {
2262+
mm_clear_owner(p->mm, p);
22372263
mmput(p->mm);
2264+
}
22382265
bad_fork_cleanup_signal:
22392266
if (!(clone_flags & CLONE_THREAD))
22402267
free_signal_struct(p->signal);
@@ -2265,7 +2292,7 @@ static __latent_entropy struct task_struct *copy_process(
22652292
bad_fork_free:
22662293
p->state = TASK_DEAD;
22672294
put_task_stack(p);
2268-
free_task(p);
2295+
delayed_free_task(p);
22692296
fork_out:
22702297
spin_lock_irq(&current->sighand->siglock);
22712298
hlist_del_init(&delayed.node);

0 commit comments

Comments
 (0)