Skip to content

Commit 95846ec

Browse files
gs0510torvalds
authored andcommitted
pid: replace pid bitmap implementation with IDR API
Patch series "Replacing PID bitmap implementation with IDR API", v4. This series replaces kernel bitmap implementation of PID allocation with IDR API. These patches are written to simplify the kernel by replacing custom code with calls to generic code. The following are the stats for pid and pid_namespace object files before and after the replacement. There is a noteworthy change between the IDR and bitmap implementation. Before text data bss dec hex filename 8447 3894 64 12405 3075 kernel/pid.o After text data bss dec hex filename 3397 304 0 3701 e75 kernel/pid.o Before text data bss dec hex filename 5692 1842 192 7726 1e2e kernel/pid_namespace.o After text data bss dec hex filename 2854 216 16 3086 c0e kernel/pid_namespace.o The following are the stats for ps, pstree and calling readdir on /proc for 10,000 processes. ps: With IDR API With bitmap real 0m1.479s 0m2.319s user 0m0.070s 0m0.060s sys 0m0.289s 0m0.516s pstree: With IDR API With bitmap real 0m1.024s 0m1.794s user 0m0.348s 0m0.612s sys 0m0.184s 0m0.264s proc: With IDR API With bitmap real 0m0.059s 0m0.074s user 0m0.000s 0m0.004s sys 0m0.016s 0m0.016s This patch (of 2): Replace the current bitmap implementation for Process ID allocation. Functions that are no longer required, for example, free_pidmap(), alloc_pidmap(), etc. are removed. The rest of the functions are modified to use the IDR API. The change was made to make the PID allocation less complex by replacing custom code with calls to generic API. [[email protected]: v6] Link: http://lkml.kernel.org/r/[email protected] [[email protected]: restore the old behaviour of the ns_last_pid sysctl] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Gargi Sharma <[email protected]> Reviewed-by: Rik van Riel <[email protected]> Acked-by: Oleg Nesterov <[email protected]> Cc: Julia Lawall <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Pavel Tatashin <[email protected]> Cc: Kirill Tkhai <[email protected]> Cc: Eric W. Biederman <[email protected]> Cc: Christoph Hellwig <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent f9eb2fd commit 95846ec

File tree

6 files changed

+65
-209
lines changed

6 files changed

+65
-209
lines changed

arch/powerpc/platforms/cell/spufs/sched.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
10931093
LOAD_INT(c), LOAD_FRAC(c),
10941094
count_active_contexts(),
10951095
atomic_read(&nr_spu_contexts),
1096-
task_active_pid_ns(current)->last_pid);
1096+
idr_get_cursor(&task_active_pid_ns(current)->idr));
10971097
return 0;
10981098
}
10991099

fs/proc/loadavg.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
2424
LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
2525
LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
2626
nr_running(), nr_threads,
27-
task_active_pid_ns(current)->last_pid);
27+
idr_get_cursor(&task_active_pid_ns(current)->idr));
2828
return 0;
2929
}
3030

include/linux/pid_namespace.h

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,8 @@
1010
#include <linux/nsproxy.h>
1111
#include <linux/kref.h>
1212
#include <linux/ns_common.h>
13+
#include <linux/idr.h>
1314

14-
struct pidmap {
15-
atomic_t nr_free;
16-
void *page;
17-
};
18-
19-
#define BITS_PER_PAGE (PAGE_SIZE * 8)
20-
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
21-
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE)
2215

2316
struct fs_pin;
2417

@@ -30,9 +23,8 @@ enum { /* definitions for pid_namespace's hide_pid field */
3023

3124
struct pid_namespace {
3225
struct kref kref;
33-
struct pidmap pidmap[PIDMAP_ENTRIES];
26+
struct idr idr;
3427
struct rcu_head rcu;
35-
int last_pid;
3628
unsigned int nr_hashed;
3729
struct task_struct *child_reaper;
3830
struct kmem_cache *pid_cachep;
@@ -106,6 +98,6 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
10698

10799
extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
108100
void pidhash_init(void);
109-
void pidmap_init(void);
101+
void pid_idr_init(void);
110102

111103
#endif /* _LINUX_PID_NS_H */

init/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ asmlinkage __visible void __init start_kernel(void)
669669
if (late_time_init)
670670
late_time_init();
671671
calibrate_delay();
672-
pidmap_init();
672+
pid_idr_init();
673673
anon_vma_init();
674674
#ifdef CONFIG_X86
675675
if (efi_enabled(EFI_RUNTIME_SERVICES))

kernel/pid.c

Lines changed: 36 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <linux/proc_ns.h>
4040
#include <linux/proc_fs.h>
4141
#include <linux/sched/task.h>
42+
#include <linux/idr.h>
4243

4344
#define pid_hashfn(nr, ns) \
4445
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -53,14 +54,6 @@ int pid_max = PID_MAX_DEFAULT;
5354
int pid_max_min = RESERVED_PIDS + 1;
5455
int pid_max_max = PID_MAX_LIMIT;
5556

56-
static inline int mk_pid(struct pid_namespace *pid_ns,
57-
struct pidmap *map, int off)
58-
{
59-
return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
60-
}
61-
62-
#define find_next_offset(map, off) \
63-
find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
6457

6558
/*
6659
* PID-map pages start out as NULL, they get allocated upon
@@ -70,10 +63,7 @@ static inline int mk_pid(struct pid_namespace *pid_ns,
7063
*/
7164
struct pid_namespace init_pid_ns = {
7265
.kref = KREF_INIT(2),
73-
.pidmap = {
74-
[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
75-
},
76-
.last_pid = 0,
66+
.idr = IDR_INIT,
7767
.nr_hashed = PIDNS_HASH_ADDING,
7868
.level = 0,
7969
.child_reaper = &init_task,
@@ -101,138 +91,6 @@ EXPORT_SYMBOL_GPL(init_pid_ns);
10191

10292
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
10393

104-
static void free_pidmap(struct upid *upid)
105-
{
106-
int nr = upid->nr;
107-
struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
108-
int offset = nr & BITS_PER_PAGE_MASK;
109-
110-
clear_bit(offset, map->page);
111-
atomic_inc(&map->nr_free);
112-
}
113-
114-
/*
115-
* If we started walking pids at 'base', is 'a' seen before 'b'?
116-
*/
117-
static int pid_before(int base, int a, int b)
118-
{
119-
/*
120-
* This is the same as saying
121-
*
122-
* (a - base + MAXUINT) % MAXUINT < (b - base + MAXUINT) % MAXUINT
123-
* and that mapping orders 'a' and 'b' with respect to 'base'.
124-
*/
125-
return (unsigned)(a - base) < (unsigned)(b - base);
126-
}
127-
128-
/*
129-
* We might be racing with someone else trying to set pid_ns->last_pid
130-
* at the pid allocation time (there's also a sysctl for this, but racing
131-
* with this one is OK, see comment in kernel/pid_namespace.c about it).
132-
* We want the winner to have the "later" value, because if the
133-
* "earlier" value prevails, then a pid may get reused immediately.
134-
*
135-
* Since pids rollover, it is not sufficient to just pick the bigger
136-
* value. We have to consider where we started counting from.
137-
*
138-
* 'base' is the value of pid_ns->last_pid that we observed when
139-
* we started looking for a pid.
140-
*
141-
* 'pid' is the pid that we eventually found.
142-
*/
143-
static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
144-
{
145-
int prev;
146-
int last_write = base;
147-
do {
148-
prev = last_write;
149-
last_write = cmpxchg(&pid_ns->last_pid, prev, pid);
150-
} while ((prev != last_write) && (pid_before(base, last_write, pid)));
151-
}
152-
153-
static int alloc_pidmap(struct pid_namespace *pid_ns)
154-
{
155-
int i, offset, max_scan, pid, last = pid_ns->last_pid;
156-
struct pidmap *map;
157-
158-
pid = last + 1;
159-
if (pid >= pid_max)
160-
pid = RESERVED_PIDS;
161-
offset = pid & BITS_PER_PAGE_MASK;
162-
map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
163-
/*
164-
* If last_pid points into the middle of the map->page we
165-
* want to scan this bitmap block twice, the second time
166-
* we start with offset == 0 (or RESERVED_PIDS).
167-
*/
168-
max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
169-
for (i = 0; i <= max_scan; ++i) {
170-
if (unlikely(!map->page)) {
171-
void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
172-
/*
173-
* Free the page if someone raced with us
174-
* installing it:
175-
*/
176-
spin_lock_irq(&pidmap_lock);
177-
if (!map->page) {
178-
map->page = page;
179-
page = NULL;
180-
}
181-
spin_unlock_irq(&pidmap_lock);
182-
kfree(page);
183-
if (unlikely(!map->page))
184-
return -ENOMEM;
185-
}
186-
if (likely(atomic_read(&map->nr_free))) {
187-
for ( ; ; ) {
188-
if (!test_and_set_bit(offset, map->page)) {
189-
atomic_dec(&map->nr_free);
190-
set_last_pid(pid_ns, last, pid);
191-
return pid;
192-
}
193-
offset = find_next_offset(map, offset);
194-
if (offset >= BITS_PER_PAGE)
195-
break;
196-
pid = mk_pid(pid_ns, map, offset);
197-
if (pid >= pid_max)
198-
break;
199-
}
200-
}
201-
if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
202-
++map;
203-
offset = 0;
204-
} else {
205-
map = &pid_ns->pidmap[0];
206-
offset = RESERVED_PIDS;
207-
if (unlikely(last == offset))
208-
break;
209-
}
210-
pid = mk_pid(pid_ns, map, offset);
211-
}
212-
return -EAGAIN;
213-
}
214-
215-
int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
216-
{
217-
int offset;
218-
struct pidmap *map, *end;
219-
220-
if (last >= PID_MAX_LIMIT)
221-
return -1;
222-
223-
offset = (last + 1) & BITS_PER_PAGE_MASK;
224-
map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
225-
end = &pid_ns->pidmap[PIDMAP_ENTRIES];
226-
for (; map < end; map++, offset = 0) {
227-
if (unlikely(!map->page))
228-
continue;
229-
offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
230-
if (offset < BITS_PER_PAGE)
231-
return mk_pid(pid_ns, map, offset);
232-
}
233-
return -1;
234-
}
235-
23694
void put_pid(struct pid *pid)
23795
{
23896
struct pid_namespace *ns;
@@ -266,7 +124,7 @@ void free_pid(struct pid *pid)
266124
struct upid *upid = pid->numbers + i;
267125
struct pid_namespace *ns = upid->ns;
268126
hlist_del_rcu(&upid->pid_chain);
269-
switch(--ns->nr_hashed) {
127+
switch (--ns->nr_hashed) {
270128
case 2:
271129
case 1:
272130
/* When all that is left in the pid namespace
@@ -284,12 +142,11 @@ void free_pid(struct pid *pid)
284142
schedule_work(&ns->proc_work);
285143
break;
286144
}
145+
146+
idr_remove(&ns->idr, upid->nr);
287147
}
288148
spin_unlock_irqrestore(&pidmap_lock, flags);
289149

290-
for (i = 0; i <= pid->level; i++)
291-
free_pidmap(pid->numbers + i);
292-
293150
call_rcu(&pid->rcu, delayed_put_pid);
294151
}
295152

@@ -308,8 +165,29 @@ struct pid *alloc_pid(struct pid_namespace *ns)
308165

309166
tmp = ns;
310167
pid->level = ns->level;
168+
311169
for (i = ns->level; i >= 0; i--) {
312-
nr = alloc_pidmap(tmp);
170+
int pid_min = 1;
171+
172+
idr_preload(GFP_KERNEL);
173+
spin_lock_irq(&pidmap_lock);
174+
175+
/*
176+
* init really needs pid 1, but after reaching the maximum
177+
* wrap back to RESERVED_PIDS
178+
*/
179+
if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
180+
pid_min = RESERVED_PIDS;
181+
182+
/*
183+
* Store a null pointer so find_pid_ns does not find
184+
* a partially initialized PID (see below).
185+
*/
186+
nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
187+
pid_max, GFP_ATOMIC);
188+
spin_unlock_irq(&pidmap_lock);
189+
idr_preload_end();
190+
313191
if (nr < 0) {
314192
retval = nr;
315193
goto out_free;
@@ -339,6 +217,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
339217
for ( ; upid >= pid->numbers; --upid) {
340218
hlist_add_head_rcu(&upid->pid_chain,
341219
&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
220+
/* Make the PID visible to find_pid_ns. */
221+
idr_replace(&upid->ns->idr, pid, upid->nr);
342222
upid->ns->nr_hashed++;
343223
}
344224
spin_unlock_irq(&pidmap_lock);
@@ -350,8 +230,11 @@ struct pid *alloc_pid(struct pid_namespace *ns)
350230
put_pid_ns(ns);
351231

352232
out_free:
233+
spin_lock_irq(&pidmap_lock);
353234
while (++i <= ns->level)
354-
free_pidmap(pid->numbers + i);
235+
idr_remove(&ns->idr, (pid->numbers + i)->nr);
236+
237+
spin_unlock_irq(&pidmap_lock);
355238

356239
kmem_cache_free(ns->pid_cachep, pid);
357240
return ERR_PTR(retval);
@@ -553,16 +436,7 @@ EXPORT_SYMBOL_GPL(task_active_pid_ns);
553436
*/
554437
struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
555438
{
556-
struct pid *pid;
557-
558-
do {
559-
pid = find_pid_ns(nr, ns);
560-
if (pid)
561-
break;
562-
nr = next_pidmap(ns, nr);
563-
} while (nr > 0);
564-
565-
return pid;
439+
return idr_get_next(&ns->idr, &nr);
566440
}
567441

568442
/*
@@ -578,7 +452,7 @@ void __init pidhash_init(void)
578452
0, 4096);
579453
}
580454

581-
void __init pidmap_init(void)
455+
void __init pid_idr_init(void)
582456
{
583457
/* Verify no one has done anything silly: */
584458
BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
@@ -590,10 +464,7 @@ void __init pidmap_init(void)
590464
PIDS_PER_CPU_MIN * num_possible_cpus());
591465
pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
592466

593-
init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
594-
/* Reserve PID 0. We never call free_pidmap(0) */
595-
set_bit(0, init_pid_ns.pidmap[0].page);
596-
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
467+
idr_init(&init_pid_ns.idr);
597468

598469
init_pid_ns.pid_cachep = KMEM_CACHE(pid,
599470
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);

0 commit comments

Comments
 (0)