Skip to content

Commit 8184116

Browse files
Cyrill Gorcunovtorvalds
authored andcommitted
fs, proc: introduce /proc/<pid>/task/<tid>/children entry
When we do checkpoint of a task we need to know the list of children the task, has but there is no easy and fast way to generate reverse parent->children chain from arbitrary <pid> (while a parent pid is provided in "PPid" field of /proc/<pid>/status). So instead of walking over all pids in the system (creating one big process tree in memory, just to figure out which children a task has) -- we add explicit /proc/<pid>/task/<tid>/children entry, because the kernel already has this kind of information but it is not yet exported. This is a first level children, not the whole process tree. Signed-off-by: Cyrill Gorcunov <[email protected]> Reviewed-by: Oleg Nesterov <[email protected]> Reviewed-by: Kees Cook <[email protected]> Cc: Pavel Emelyanov <[email protected]> Cc: Serge Hallyn <[email protected]> Cc: KAMEZAWA Hiroyuki <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 98ed57e commit 8184116

File tree

4 files changed

+145
-0
lines changed

4 files changed

+145
-0
lines changed

Documentation/filesystems/proc.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ Table of Contents
4040
3.4 /proc/<pid>/coredump_filter - Core dump filtering settings
4141
3.5 /proc/<pid>/mountinfo - Information about mounts
4242
3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm
43+
3.7 /proc/<pid>/task/<tid>/children - Information about task children
4344

4445
4 Configuring procfs
4546
4.1 Mount options
@@ -1578,6 +1579,23 @@ then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated
15781579
comm value.
15791580

15801581

1582+
3.7 /proc/<pid>/task/<tid>/children - Information about task children
1583+
-------------------------------------------------------------------------
1584+
This file provides a fast way to retrieve first level children pids
1585+
of a task pointed by <pid>/<tid> pair. The format is a space separated
1586+
stream of pids.
1587+
1588+
Note the "first level" here -- if a child has own children they will
1589+
not be listed here, one needs to read /proc/<children-pid>/task/<tid>/children
1590+
to obtain the descendants.
1591+
1592+
Since this interface is intended to be fast and cheap it doesn't
1593+
guarantee to provide precise results and some children might be
1594+
skipped, especially if they've exited right after we printed their
1595+
pids, so one need to either stop or freeze processes being inspected
1596+
if precise results are needed.
1597+
1598+
15811599
------------------------------------------------------------------------------
15821600
Configuring procfs
15831601
------------------------------------------------------------------------------

fs/proc/array.c

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,3 +565,126 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
565565

566566
return 0;
567567
}
568+
569+
#ifdef CONFIG_CHECKPOINT_RESTORE
570+
static struct pid *
571+
get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos)
572+
{
573+
struct task_struct *start, *task;
574+
struct pid *pid = NULL;
575+
576+
read_lock(&tasklist_lock);
577+
578+
start = pid_task(proc_pid(inode), PIDTYPE_PID);
579+
if (!start)
580+
goto out;
581+
582+
/*
583+
* Lets try to continue searching first, this gives
584+
* us significant speedup on children-rich processes.
585+
*/
586+
if (pid_prev) {
587+
task = pid_task(pid_prev, PIDTYPE_PID);
588+
if (task && task->real_parent == start &&
589+
!(list_empty(&task->sibling))) {
590+
if (list_is_last(&task->sibling, &start->children))
591+
goto out;
592+
task = list_first_entry(&task->sibling,
593+
struct task_struct, sibling);
594+
pid = get_pid(task_pid(task));
595+
goto out;
596+
}
597+
}
598+
599+
/*
600+
* Slow search case.
601+
*
602+
* We might miss some children here if children
603+
* are exited while we were not holding the lock,
604+
* but it was never promised to be accurate that
605+
* much.
606+
*
607+
* "Just suppose that the parent sleeps, but N children
608+
* exit after we printed their tids. Now the slow paths
609+
* skips N extra children, we miss N tasks." (c)
610+
*
611+
* So one need to stop or freeze the leader and all
612+
* its children to get a precise result.
613+
*/
614+
list_for_each_entry(task, &start->children, sibling) {
615+
if (pos-- == 0) {
616+
pid = get_pid(task_pid(task));
617+
break;
618+
}
619+
}
620+
621+
out:
622+
read_unlock(&tasklist_lock);
623+
return pid;
624+
}
625+
626+
static int children_seq_show(struct seq_file *seq, void *v)
627+
{
628+
struct inode *inode = seq->private;
629+
pid_t pid;
630+
631+
pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
632+
return seq_printf(seq, "%d ", pid);
633+
}
634+
635+
static void *children_seq_start(struct seq_file *seq, loff_t *pos)
636+
{
637+
return get_children_pid(seq->private, NULL, *pos);
638+
}
639+
640+
static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
641+
{
642+
struct pid *pid;
643+
644+
pid = get_children_pid(seq->private, v, *pos + 1);
645+
put_pid(v);
646+
647+
++*pos;
648+
return pid;
649+
}
650+
651+
static void children_seq_stop(struct seq_file *seq, void *v)
652+
{
653+
put_pid(v);
654+
}
655+
656+
static const struct seq_operations children_seq_ops = {
657+
.start = children_seq_start,
658+
.next = children_seq_next,
659+
.stop = children_seq_stop,
660+
.show = children_seq_show,
661+
};
662+
663+
static int children_seq_open(struct inode *inode, struct file *file)
664+
{
665+
struct seq_file *m;
666+
int ret;
667+
668+
ret = seq_open(file, &children_seq_ops);
669+
if (ret)
670+
return ret;
671+
672+
m = file->private_data;
673+
m->private = inode;
674+
675+
return ret;
676+
}
677+
678+
int children_seq_release(struct inode *inode, struct file *file)
679+
{
680+
seq_release(inode, file);
681+
return 0;
682+
}
683+
684+
const struct file_operations proc_tid_children_operations = {
685+
.open = children_seq_open,
686+
.read = seq_read,
687+
.llseek = seq_lseek,
688+
.release = children_seq_release,
689+
};
690+
#endif /* CONFIG_CHECKPOINT_RESTORE */

fs/proc/base.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3400,6 +3400,9 @@ static const struct pid_entry tid_base_stuff[] = {
34003400
ONE("stat", S_IRUGO, proc_tid_stat),
34013401
ONE("statm", S_IRUGO, proc_pid_statm),
34023402
REG("maps", S_IRUGO, proc_tid_maps_operations),
3403+
#ifdef CONFIG_CHECKPOINT_RESTORE
3404+
REG("children", S_IRUGO, proc_tid_children_operations),
3405+
#endif
34033406
#ifdef CONFIG_NUMA
34043407
REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
34053408
#endif

fs/proc/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
5454
struct pid *pid, struct task_struct *task);
5555
extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
5656

57+
extern const struct file_operations proc_tid_children_operations;
5758
extern const struct file_operations proc_pid_maps_operations;
5859
extern const struct file_operations proc_tid_maps_operations;
5960
extern const struct file_operations proc_pid_numa_maps_operations;

0 commit comments

Comments
 (0)