Skip to content

Commit 22d917d

Browse files
committed
userns: Rework the user_namespace adding uid/gid mapping support
- Convert the old uid mapping functions into compatibility wrappers - Add a uid/gid mapping layer from user space uid and gids to kernel internal uids and gids that is extent based for simplicty and speed. * Working with number space after mapping uids/gids into their kernel internal version adds only mapping complexity over what we have today, leaving the kernel code easy to understand and test. - Add proc files /proc/self/uid_map /proc/self/gid_map These files display the mapping and allow a mapping to be added if a mapping does not exist. - Allow entering the user namespace without a uid or gid mapping. Since we are starting with an existing user our uids and gids still have global mappings so are still valid and useful they just don't have local mappings. The requirement for things to work are global uid and gid so it is odd but perfectly fine not to have a local uid and gid mapping. Not requiring global uid and gid mappings greatly simplifies the logic of setting up the uid and gid mappings by allowing the mappings to be set after the namespace is created which makes the slight weirdness worth it. - Make the mappings in the initial user namespace to the global uid/gid space explicit. Today it is an identity mapping but in the future we may want to twist this for debugging, similar to what we do with jiffies. - Document the memory ordering requirements of setting the uid and gid mappings. We only allow the mappings to be set once and there are no pointers involved so the requirments are trivial but a little atypical. Performance: In this scheme for the permission checks the performance is expected to stay the same as the actuall machine instructions should remain the same. The worst case I could think of is ls -l on a large directory where all of the stat results need to be translated with from kuids and kgids to uids and gids. So I benchmarked that case on my laptop with a dual core hyperthread Intel i5-2520M cpu with 3M of cpu cache. My benchmark consisted of going to single user mode where nothing else was running. On an ext4 filesystem opening 1,000,000 files and looping through all of the files 1000 times and calling fstat on the individuals files. This was to ensure I was benchmarking stat times where the inodes were in the kernels cache, but the inode values were not in the processors cache. My results: v3.4-rc1: ~= 156ns (unmodified v3.4-rc1 with user namespace support disabled) v3.4-rc1-userns-: ~= 155ns (v3.4-rc1 with my user namespace patches and user namespace support disabled) v3.4-rc1-userns+: ~= 164ns (v3.4-rc1 with my user namespace patches and user namespace support enabled) All of the configurations ran in roughly 120ns when I performed tests that ran in the cpu cache. So in summary the performance impact is: 1ns improvement in the worst case with user namespace support compiled out. 8ns aka 5% slowdown in the worst case with user namespace support compiled in. Acked-by: Serge Hallyn <[email protected]> Signed-off-by: Eric W. Biederman <[email protected]>
1 parent 783291e commit 22d917d

File tree

5 files changed

+644
-48
lines changed

5 files changed

+644
-48
lines changed

fs/proc/base.c

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
#include <linux/oom.h>
8282
#include <linux/elf.h>
8383
#include <linux/pid_namespace.h>
84+
#include <linux/user_namespace.h>
8485
#include <linux/fs_struct.h>
8586
#include <linux/slab.h>
8687
#include <linux/flex_array.h>
@@ -2943,6 +2944,74 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
29432944
}
29442945
#endif /* CONFIG_TASK_IO_ACCOUNTING */
29452946

2947+
#ifdef CONFIG_USER_NS
2948+
static int proc_id_map_open(struct inode *inode, struct file *file,
2949+
struct seq_operations *seq_ops)
2950+
{
2951+
struct user_namespace *ns = NULL;
2952+
struct task_struct *task;
2953+
struct seq_file *seq;
2954+
int ret = -EINVAL;
2955+
2956+
task = get_proc_task(inode);
2957+
if (task) {
2958+
rcu_read_lock();
2959+
ns = get_user_ns(task_cred_xxx(task, user_ns));
2960+
rcu_read_unlock();
2961+
put_task_struct(task);
2962+
}
2963+
if (!ns)
2964+
goto err;
2965+
2966+
ret = seq_open(file, seq_ops);
2967+
if (ret)
2968+
goto err_put_ns;
2969+
2970+
seq = file->private_data;
2971+
seq->private = ns;
2972+
2973+
return 0;
2974+
err_put_ns:
2975+
put_user_ns(ns);
2976+
err:
2977+
return ret;
2978+
}
2979+
2980+
static int proc_id_map_release(struct inode *inode, struct file *file)
2981+
{
2982+
struct seq_file *seq = file->private_data;
2983+
struct user_namespace *ns = seq->private;
2984+
put_user_ns(ns);
2985+
return seq_release(inode, file);
2986+
}
2987+
2988+
static int proc_uid_map_open(struct inode *inode, struct file *file)
2989+
{
2990+
return proc_id_map_open(inode, file, &proc_uid_seq_operations);
2991+
}
2992+
2993+
static int proc_gid_map_open(struct inode *inode, struct file *file)
2994+
{
2995+
return proc_id_map_open(inode, file, &proc_gid_seq_operations);
2996+
}
2997+
2998+
static const struct file_operations proc_uid_map_operations = {
2999+
.open = proc_uid_map_open,
3000+
.write = proc_uid_map_write,
3001+
.read = seq_read,
3002+
.llseek = seq_lseek,
3003+
.release = proc_id_map_release,
3004+
};
3005+
3006+
static const struct file_operations proc_gid_map_operations = {
3007+
.open = proc_gid_map_open,
3008+
.write = proc_gid_map_write,
3009+
.read = seq_read,
3010+
.llseek = seq_lseek,
3011+
.release = proc_id_map_release,
3012+
};
3013+
#endif /* CONFIG_USER_NS */
3014+
29463015
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
29473016
struct pid *pid, struct task_struct *task)
29483017
{
@@ -3045,6 +3114,10 @@ static const struct pid_entry tgid_base_stuff[] = {
30453114
#ifdef CONFIG_HARDWALL
30463115
INF("hardwall", S_IRUGO, proc_pid_hardwall),
30473116
#endif
3117+
#ifdef CONFIG_USER_NS
3118+
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3119+
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
3120+
#endif
30483121
};
30493122

30503123
static int proc_tgid_base_readdir(struct file * filp,
@@ -3400,6 +3473,10 @@ static const struct pid_entry tid_base_stuff[] = {
34003473
#ifdef CONFIG_HARDWALL
34013474
INF("hardwall", S_IRUGO, proc_pid_hardwall),
34023475
#endif
3476+
#ifdef CONFIG_USER_NS
3477+
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3478+
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
3479+
#endif
34033480
};
34043481

34053482
static int proc_tid_base_readdir(struct file * filp,

include/linux/uidgid.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,28 @@ static inline bool gid_valid(kgid_t gid)
127127
return !gid_eq(gid, INVALID_GID);
128128
}
129129

130+
#ifdef CONFIG_USER_NS
131+
132+
extern kuid_t make_kuid(struct user_namespace *from, uid_t uid);
133+
extern kgid_t make_kgid(struct user_namespace *from, gid_t gid);
134+
135+
extern uid_t from_kuid(struct user_namespace *to, kuid_t uid);
136+
extern gid_t from_kgid(struct user_namespace *to, kgid_t gid);
137+
extern uid_t from_kuid_munged(struct user_namespace *to, kuid_t uid);
138+
extern gid_t from_kgid_munged(struct user_namespace *to, kgid_t gid);
139+
140+
static inline bool kuid_has_mapping(struct user_namespace *ns, kuid_t uid)
141+
{
142+
return from_kuid(ns, uid) != (uid_t) -1;
143+
}
144+
145+
static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
146+
{
147+
return from_kgid(ns, gid) != (gid_t) -1;
148+
}
149+
150+
#else
151+
130152
static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid)
131153
{
132154
return KUIDT_INIT(uid);
@@ -173,4 +195,6 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
173195
return true;
174196
}
175197

198+
#endif /* CONFIG_USER_NS */
199+
176200
#endif /* _LINUX_UIDGID_H */

include/linux/user_namespace.h

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,20 @@
66
#include <linux/sched.h>
77
#include <linux/err.h>
88

9+
#define UID_GID_MAP_MAX_EXTENTS 5
10+
11+
struct uid_gid_map { /* 64 bytes -- 1 cache line */
12+
u32 nr_extents;
13+
struct uid_gid_extent {
14+
u32 first;
15+
u32 lower_first;
16+
u32 count;
17+
} extent[UID_GID_MAP_MAX_EXTENTS];
18+
};
19+
920
struct user_namespace {
21+
struct uid_gid_map uid_map;
22+
struct uid_gid_map gid_map;
1023
struct kref kref;
1124
struct user_namespace *parent;
1225
kuid_t owner;
@@ -33,9 +46,11 @@ static inline void put_user_ns(struct user_namespace *ns)
3346
kref_put(&ns->kref, free_user_ns);
3447
}
3548

36-
uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid);
37-
gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid);
38-
49+
struct seq_operations;
50+
extern struct seq_operations proc_uid_seq_operations;
51+
extern struct seq_operations proc_gid_seq_operations;
52+
extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
53+
extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
3954
#else
4055

4156
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -52,17 +67,18 @@ static inline void put_user_ns(struct user_namespace *ns)
5267
{
5368
}
5469

70+
#endif
71+
5572
static inline uid_t user_ns_map_uid(struct user_namespace *to,
5673
const struct cred *cred, uid_t uid)
5774
{
58-
return uid;
75+
return from_kuid_munged(to, make_kuid(cred->user_ns, uid));
5976
}
77+
6078
static inline gid_t user_ns_map_gid(struct user_namespace *to,
6179
const struct cred *cred, gid_t gid)
6280
{
63-
return gid;
81+
return from_kgid_munged(to, make_kgid(cred->user_ns, gid));
6482
}
6583

66-
#endif
67-
6884
#endif /* _LINUX_USER_H */

kernel/user.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,22 @@
2222
* and 1 for... ?
2323
*/
2424
struct user_namespace init_user_ns = {
25+
.uid_map = {
26+
.nr_extents = 1,
27+
.extent[0] = {
28+
.first = 0,
29+
.lower_first = 0,
30+
.count = 4294967295,
31+
},
32+
},
33+
.gid_map = {
34+
.nr_extents = 1,
35+
.extent[0] = {
36+
.first = 0,
37+
.lower_first = 0,
38+
.count = 4294967295,
39+
},
40+
},
2541
.kref = {
2642
.refcount = ATOMIC_INIT(3),
2743
},

0 commit comments

Comments
 (0)