Skip to content

Commit dd8e8f4

Browse files
oleg-nesterovtorvalds
authored andcommitted
oom: introduce find_lock_task_mm() to fix !mm false positives
Almost all ->mm == NULL checks in oom_kill.c are wrong. The current code assumes that the task without ->mm has already released its memory and ignores the process. However this is not necessarily true when this process is multithreaded, other live sub-threads can use this ->mm. - Remove the "if (!p->mm)" check in select_bad_process(), it is just wrong. - Add the new helper, find_lock_task_mm(), which finds the live thread which uses the memory and takes task_lock() to pin ->mm - change oom_badness() to use this helper instead of just checking ->mm != NULL. - As David pointed out, select_bad_process() must never choose the task without ->mm, but no matter what oom_badness() returns the task can be chosen if nothing else has been found yet. Change oom_badness() to return int, change it to return -1 if find_lock_task_mm() fails, and change select_bad_process() to check points >= 0. Note! This patch is not enough, we need more changes. - oom_badness() was fixed, but oom_kill_task() still ignores the task without ->mm - oom_forkbomb_penalty() should use find_lock_task_mm() too, and it also needs other changes to actually find the first first-descendant children This will be addressed later. [[email protected]: use in badness(), __oom_kill_task()] Signed-off-by: Oleg Nesterov <[email protected]> Signed-off-by: David Rientjes <[email protected]> Signed-off-by: KOSAKI Motohiro <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent b522794 commit dd8e8f4

File tree

1 file changed

+43
-31
lines changed

1 file changed

+43
-31
lines changed

mm/oom_kill.c

Lines changed: 43 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,20 @@ static int has_intersects_mems_allowed(struct task_struct *tsk)
5252
return 0;
5353
}
5454

55+
static struct task_struct *find_lock_task_mm(struct task_struct *p)
56+
{
57+
struct task_struct *t = p;
58+
59+
do {
60+
task_lock(t);
61+
if (likely(t->mm))
62+
return t;
63+
task_unlock(t);
64+
} while_each_thread(p, t);
65+
66+
return NULL;
67+
}
68+
5569
/**
5670
* badness - calculate a numeric value for how bad this task has been
5771
* @p: task struct of which task we should calculate
@@ -74,8 +88,8 @@ static int has_intersects_mems_allowed(struct task_struct *tsk)
7488
unsigned long badness(struct task_struct *p, unsigned long uptime)
7589
{
7690
unsigned long points, cpu_time, run_time;
77-
struct mm_struct *mm;
7891
struct task_struct *child;
92+
struct task_struct *c, *t;
7993
int oom_adj = p->signal->oom_adj;
8094
struct task_cputime task_time;
8195
unsigned long utime;
@@ -84,17 +98,14 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
8498
if (oom_adj == OOM_DISABLE)
8599
return 0;
86100

87-
task_lock(p);
88-
mm = p->mm;
89-
if (!mm) {
90-
task_unlock(p);
101+
p = find_lock_task_mm(p);
102+
if (!p)
91103
return 0;
92-
}
93104

94105
/*
95106
* The memory size of the process is the basis for the badness.
96107
*/
97-
points = mm->total_vm;
108+
points = p->mm->total_vm;
98109

99110
/*
100111
* After this unlock we can no longer dereference local variable `mm'
@@ -115,12 +126,17 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
115126
* child is eating the vast majority of memory, adding only half
116127
* to the parents will make the child our kill candidate of choice.
117128
*/
118-
list_for_each_entry(child, &p->children, sibling) {
119-
task_lock(child);
120-
if (child->mm != mm && child->mm)
121-
points += child->mm->total_vm/2 + 1;
122-
task_unlock(child);
123-
}
129+
t = p;
130+
do {
131+
list_for_each_entry(c, &t->children, sibling) {
132+
child = find_lock_task_mm(c);
133+
if (child) {
134+
if (child->mm != p->mm)
135+
points += child->mm->total_vm/2 + 1;
136+
task_unlock(child);
137+
}
138+
}
139+
} while_each_thread(p, t);
124140

125141
/*
126142
* CPU time is in tens of seconds and run time is in thousands
@@ -256,9 +272,6 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
256272
for_each_process(p) {
257273
unsigned long points;
258274

259-
/* skip tasks that have already released their mm */
260-
if (!p->mm)
261-
continue;
262275
/* skip the init task and kthreads */
263276
if (is_global_init(p) || (p->flags & PF_KTHREAD))
264277
continue;
@@ -385,14 +398,9 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
385398
return;
386399
}
387400

388-
task_lock(p);
389-
if (!p->mm) {
390-
WARN_ON(1);
391-
printk(KERN_WARNING "tried to kill an mm-less task %d (%s)!\n",
392-
task_pid_nr(p), p->comm);
393-
task_unlock(p);
401+
p = find_lock_task_mm(p);
402+
if (!p)
394403
return;
395-
}
396404

397405
if (verbose)
398406
printk(KERN_ERR "Killed process %d (%s) "
@@ -437,6 +445,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
437445
const char *message)
438446
{
439447
struct task_struct *c;
448+
struct task_struct *t = p;
440449

441450
if (printk_ratelimit())
442451
dump_header(p, gfp_mask, order, mem);
@@ -454,14 +463,17 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
454463
message, task_pid_nr(p), p->comm, points);
455464

456465
/* Try to kill a child first */
457-
list_for_each_entry(c, &p->children, sibling) {
458-
if (c->mm == p->mm)
459-
continue;
460-
if (mem && !task_in_mem_cgroup(c, mem))
461-
continue;
462-
if (!oom_kill_task(c))
463-
return 0;
464-
}
466+
do {
467+
list_for_each_entry(c, &t->children, sibling) {
468+
if (c->mm == p->mm)
469+
continue;
470+
if (mem && !task_in_mem_cgroup(c, mem))
471+
continue;
472+
if (!oom_kill_task(c))
473+
return 0;
474+
}
475+
} while_each_thread(p, t);
476+
465477
return oom_kill_task(p);
466478
}
467479

0 commit comments

Comments
 (0)