@@ -78,6 +78,8 @@ static struct kmem_cache *mnt_cache __ro_after_init;
78
78
static DECLARE_RWSEM (namespace_sem );
79
79
static HLIST_HEAD (unmounted ); /* protected by namespace_sem */
80
80
static LIST_HEAD (ex_mountpoints ); /* protected by namespace_sem */
81
+ static DEFINE_RWLOCK (mnt_ns_tree_lock );
82
+ static struct rb_root mnt_ns_tree = RB_ROOT ; /* protected by mnt_ns_tree_lock */
81
83
82
84
struct mount_kattr {
83
85
unsigned int attr_set ;
@@ -103,6 +105,109 @@ EXPORT_SYMBOL_GPL(fs_kobj);
103
105
*/
104
106
__cacheline_aligned_in_smp DEFINE_SEQLOCK (mount_lock );
105
107
108
+ static int mnt_ns_cmp (u64 seq , const struct mnt_namespace * ns )
109
+ {
110
+ u64 seq_b = ns -> seq ;
111
+
112
+ if (seq < seq_b )
113
+ return -1 ;
114
+ if (seq > seq_b )
115
+ return 1 ;
116
+ return 0 ;
117
+ }
118
+
119
+ static inline struct mnt_namespace * node_to_mnt_ns (const struct rb_node * node )
120
+ {
121
+ if (!node )
122
+ return NULL ;
123
+ return rb_entry (node , struct mnt_namespace , mnt_ns_tree_node );
124
+ }
125
+
126
+ static bool mnt_ns_less (struct rb_node * a , const struct rb_node * b )
127
+ {
128
+ struct mnt_namespace * ns_a = node_to_mnt_ns (a );
129
+ struct mnt_namespace * ns_b = node_to_mnt_ns (b );
130
+ u64 seq_a = ns_a -> seq ;
131
+
132
+ return mnt_ns_cmp (seq_a , ns_b ) < 0 ;
133
+ }
134
+
135
+ static void mnt_ns_tree_add (struct mnt_namespace * ns )
136
+ {
137
+ guard (write_lock )(& mnt_ns_tree_lock );
138
+ rb_add (& ns -> mnt_ns_tree_node , & mnt_ns_tree , mnt_ns_less );
139
+ }
140
+
141
+ static void mnt_ns_release (struct mnt_namespace * ns )
142
+ {
143
+ lockdep_assert_not_held (& mnt_ns_tree_lock );
144
+
145
+ /* keep alive for {list,stat}mount() */
146
+ if (refcount_dec_and_test (& ns -> passive )) {
147
+ put_user_ns (ns -> user_ns );
148
+ kfree (ns );
149
+ }
150
+ }
151
+ DEFINE_FREE (mnt_ns_release , struct mnt_namespace * , if (_T ) mnt_ns_release (_T ))
152
+
153
+ static void mnt_ns_tree_remove (struct mnt_namespace * ns )
154
+ {
155
+ /* remove from global mount namespace list */
156
+ if (!is_anon_ns (ns )) {
157
+ guard (write_lock )(& mnt_ns_tree_lock );
158
+ rb_erase (& ns -> mnt_ns_tree_node , & mnt_ns_tree );
159
+ }
160
+
161
+ mnt_ns_release (ns );
162
+ }
163
+
164
+ /*
165
+ * Returns the mount namespace which either has the specified id, or has the
166
+ * next smallest id afer the specified one.
167
+ */
168
+ static struct mnt_namespace * mnt_ns_find_id_at (u64 mnt_ns_id )
169
+ {
170
+ struct rb_node * node = mnt_ns_tree .rb_node ;
171
+ struct mnt_namespace * ret = NULL ;
172
+
173
+ lockdep_assert_held (& mnt_ns_tree_lock );
174
+
175
+ while (node ) {
176
+ struct mnt_namespace * n = node_to_mnt_ns (node );
177
+
178
+ if (mnt_ns_id <= n -> seq ) {
179
+ ret = node_to_mnt_ns (node );
180
+ if (mnt_ns_id == n -> seq )
181
+ break ;
182
+ node = node -> rb_left ;
183
+ } else {
184
+ node = node -> rb_right ;
185
+ }
186
+ }
187
+ return ret ;
188
+ }
189
+
190
+ /*
191
+ * Lookup a mount namespace by id and take a passive reference count. Taking a
192
+ * passive reference means the mount namespace can be emptied if e.g., the last
193
+ * task holding an active reference exits. To access the mounts of the
194
+ * namespace the @namespace_sem must first be acquired. If the namespace has
195
+ * already shut down before acquiring @namespace_sem, {list,stat}mount() will
196
+ * see that the mount rbtree of the namespace is empty.
197
+ */
198
+ static struct mnt_namespace * lookup_mnt_ns (u64 mnt_ns_id )
199
+ {
200
+ struct mnt_namespace * ns ;
201
+
202
+ guard (read_lock )(& mnt_ns_tree_lock );
203
+ ns = mnt_ns_find_id_at (mnt_ns_id );
204
+ if (!ns || ns -> seq != mnt_ns_id )
205
+ return NULL ;
206
+
207
+ refcount_inc (& ns -> passive );
208
+ return ns ;
209
+ }
210
+
106
211
static inline void lock_mount_hash (void )
107
212
{
108
213
write_seqlock (& mount_lock );
@@ -3733,8 +3838,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
3733
3838
if (!is_anon_ns (ns ))
3734
3839
ns_free_inum (& ns -> ns );
3735
3840
dec_mnt_namespaces (ns -> ucounts );
3736
- put_user_ns (ns -> user_ns );
3737
- kfree (ns );
3841
+ mnt_ns_tree_remove (ns );
3738
3842
}
3739
3843
3740
3844
/*
@@ -3773,7 +3877,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
3773
3877
if (!anon )
3774
3878
new_ns -> seq = atomic64_add_return (1 , & mnt_ns_seq );
3775
3879
refcount_set (& new_ns -> ns .count , 1 );
3880
+ refcount_set (& new_ns -> passive , 1 );
3776
3881
new_ns -> mounts = RB_ROOT ;
3882
+ RB_CLEAR_NODE (& new_ns -> mnt_ns_tree_node );
3777
3883
init_waitqueue_head (& new_ns -> poll );
3778
3884
new_ns -> user_ns = get_user_ns (user_ns );
3779
3885
new_ns -> ucounts = ucounts ;
@@ -3850,6 +3956,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
3850
3956
while (p -> mnt .mnt_root != q -> mnt .mnt_root )
3851
3957
p = next_mnt (skip_mnt_tree (p ), old );
3852
3958
}
3959
+ mnt_ns_tree_add (new_ns );
3853
3960
namespace_unlock ();
3854
3961
3855
3962
if (rootmnt )
@@ -4867,6 +4974,12 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq)
4867
4974
return 0 ;
4868
4975
}
4869
4976
4977
+ static void statmount_mnt_ns_id (struct kstatmount * s , struct mnt_namespace * ns )
4978
+ {
4979
+ s -> sm .mask |= STATMOUNT_MNT_NS_ID ;
4980
+ s -> sm .mnt_ns_id = ns -> seq ;
4981
+ }
4982
+
4870
4983
static int statmount_string (struct kstatmount * s , u64 flag )
4871
4984
{
4872
4985
int ret ;
@@ -4930,14 +5043,15 @@ static int copy_statmount_to_user(struct kstatmount *s)
4930
5043
static int do_statmount (struct kstatmount * s )
4931
5044
{
4932
5045
struct mount * m = real_mount (s -> mnt );
5046
+ struct mnt_namespace * ns = m -> mnt_ns ;
4933
5047
int err ;
4934
5048
4935
5049
/*
4936
5050
* Don't trigger audit denials. We just want to determine what
4937
5051
* mounts to show users.
4938
5052
*/
4939
5053
if (!is_path_reachable (m , m -> mnt .mnt_root , & s -> root ) &&
4940
- !ns_capable_noaudit (& init_user_ns , CAP_SYS_ADMIN ))
5054
+ !ns_capable_noaudit (ns -> user_ns , CAP_SYS_ADMIN ))
4941
5055
return - EPERM ;
4942
5056
4943
5057
err = security_sb_statfs (s -> mnt -> mnt_root );
@@ -4962,6 +5076,9 @@ static int do_statmount(struct kstatmount *s)
4962
5076
if (!err && s -> mask & STATMOUNT_MNT_POINT )
4963
5077
err = statmount_string (s , STATMOUNT_MNT_POINT );
4964
5078
5079
+ if (!err && s -> mask & STATMOUNT_MNT_NS_ID )
5080
+ statmount_mnt_ns_id (s , ns );
5081
+
4965
5082
if (err )
4966
5083
return err ;
4967
5084
@@ -5003,7 +5120,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
5003
5120
int ret ;
5004
5121
size_t usize ;
5005
5122
5006
- BUILD_BUG_ON (sizeof (struct mnt_id_req ) != MNT_ID_REQ_SIZE_VER0 );
5123
+ BUILD_BUG_ON (sizeof (struct mnt_id_req ) != MNT_ID_REQ_SIZE_VER1 );
5007
5124
5008
5125
ret = get_user (usize , & req -> size );
5009
5126
if (ret )
@@ -5021,10 +5138,63 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
5021
5138
return 0 ;
5022
5139
}
5023
5140
5141
+ static struct mount * listmnt_next (struct mount * curr , bool reverse )
5142
+ {
5143
+ struct rb_node * node ;
5144
+
5145
+ if (reverse )
5146
+ node = rb_prev (& curr -> mnt_node );
5147
+ else
5148
+ node = rb_next (& curr -> mnt_node );
5149
+
5150
+ return node_to_mount (node );
5151
+ }
5152
+
5153
+ static int grab_requested_root (struct mnt_namespace * ns , struct path * root )
5154
+ {
5155
+ struct mount * first ;
5156
+
5157
+ rwsem_assert_held (& namespace_sem );
5158
+
5159
+ /* We're looking at our own ns, just use get_fs_root. */
5160
+ if (ns == current -> nsproxy -> mnt_ns ) {
5161
+ get_fs_root (current -> fs , root );
5162
+ return 0 ;
5163
+ }
5164
+
5165
+ /*
5166
+ * We have to find the first mount in our ns and use that, however it
5167
+ * may not exist, so handle that properly.
5168
+ */
5169
+ if (RB_EMPTY_ROOT (& ns -> mounts ))
5170
+ return - ENOENT ;
5171
+
5172
+ first = listmnt_next (ns -> root , false);
5173
+ if (!first )
5174
+ return - ENOENT ;
5175
+ root -> mnt = mntget (& first -> mnt );
5176
+ root -> dentry = dget (root -> mnt -> mnt_root );
5177
+ return 0 ;
5178
+ }
5179
+
5180
+ /*
5181
+ * If the user requested a specific mount namespace id, look that up and return
5182
+ * that, or if not simply grab a passive reference on our mount namespace and
5183
+ * return that.
5184
+ */
5185
+ static struct mnt_namespace * grab_requested_mnt_ns (u64 mnt_ns_id )
5186
+ {
5187
+ if (mnt_ns_id )
5188
+ return lookup_mnt_ns (mnt_ns_id );
5189
+ refcount_inc (& current -> nsproxy -> mnt_ns -> passive );
5190
+ return current -> nsproxy -> mnt_ns ;
5191
+ }
5192
+
5024
5193
SYSCALL_DEFINE4 (statmount , const struct mnt_id_req __user * , req ,
5025
5194
struct statmount __user * , buf , size_t , bufsize ,
5026
5195
unsigned int , flags )
5027
5196
{
5197
+ struct mnt_namespace * ns __free (mnt_ns_release ) = NULL ;
5028
5198
struct vfsmount * mnt ;
5029
5199
struct mnt_id_req kreq ;
5030
5200
struct kstatmount ks ;
@@ -5039,21 +5209,41 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
5039
5209
if (ret )
5040
5210
return ret ;
5041
5211
5212
+ ns = grab_requested_mnt_ns (kreq .mnt_ns_id );
5213
+ if (!ns )
5214
+ return - ENOENT ;
5215
+
5216
+ if (kreq .mnt_ns_id && (ns != current -> nsproxy -> mnt_ns ) &&
5217
+ !ns_capable_noaudit (ns -> user_ns , CAP_SYS_ADMIN ))
5218
+ return - ENOENT ;
5219
+
5042
5220
retry :
5043
5221
ret = prepare_kstatmount (& ks , & kreq , buf , bufsize , seq_size );
5044
5222
if (ret )
5045
5223
return ret ;
5046
5224
5047
5225
down_read (& namespace_sem );
5048
- mnt = lookup_mnt_in_ns (kreq .mnt_id , current -> nsproxy -> mnt_ns );
5226
+ /* Has the namespace already been emptied? */
5227
+ if (kreq .mnt_ns_id && RB_EMPTY_ROOT (& ns -> mounts )) {
5228
+ up_read (& namespace_sem );
5229
+ kvfree (ks .seq .buf );
5230
+ return - ENOENT ;
5231
+ }
5232
+
5233
+ mnt = lookup_mnt_in_ns (kreq .mnt_id , ns );
5049
5234
if (!mnt ) {
5050
5235
up_read (& namespace_sem );
5051
5236
kvfree (ks .seq .buf );
5052
5237
return - ENOENT ;
5053
5238
}
5054
5239
5055
5240
ks .mnt = mnt ;
5056
- get_fs_root (current -> fs , & ks .root );
5241
+ ret = grab_requested_root (ns , & ks .root );
5242
+ if (ret ) {
5243
+ up_read (& namespace_sem );
5244
+ kvfree (ks .seq .buf );
5245
+ return ret ;
5246
+ }
5057
5247
ret = do_statmount (& ks );
5058
5248
path_put (& ks .root );
5059
5249
up_read (& namespace_sem );
@@ -5066,30 +5256,21 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
5066
5256
return ret ;
5067
5257
}
5068
5258
5069
- static struct mount * listmnt_next (struct mount * curr , bool reverse )
5070
- {
5071
- struct rb_node * node ;
5072
-
5073
- if (reverse )
5074
- node = rb_prev (& curr -> mnt_node );
5075
- else
5076
- node = rb_next (& curr -> mnt_node );
5077
-
5078
- return node_to_mount (node );
5079
- }
5080
-
5081
- static ssize_t do_listmount (u64 mnt_parent_id , u64 last_mnt_id , u64 * mnt_ids ,
5082
- size_t nr_mnt_ids , bool reverse )
5259
+ static ssize_t do_listmount (struct mnt_namespace * ns , u64 mnt_parent_id ,
5260
+ u64 last_mnt_id , u64 * mnt_ids , size_t nr_mnt_ids ,
5261
+ bool reverse )
5083
5262
{
5084
5263
struct path root __free (path_put ) = {};
5085
- struct mnt_namespace * ns = current -> nsproxy -> mnt_ns ;
5086
5264
struct path orig ;
5087
5265
struct mount * r , * first ;
5088
5266
ssize_t ret ;
5089
5267
5090
5268
rwsem_assert_held (& namespace_sem );
5091
5269
5092
- get_fs_root (current -> fs , & root );
5270
+ ret = grab_requested_root (ns , & root );
5271
+ if (ret )
5272
+ return ret ;
5273
+
5093
5274
if (mnt_parent_id == LSMT_ROOT ) {
5094
5275
orig = root ;
5095
5276
} else {
@@ -5104,7 +5285,7 @@ static ssize_t do_listmount(u64 mnt_parent_id, u64 last_mnt_id, u64 *mnt_ids,
5104
5285
* mounts to show users.
5105
5286
*/
5106
5287
if (!is_path_reachable (real_mount (orig .mnt ), orig .dentry , & root ) &&
5107
- !ns_capable_noaudit (& init_user_ns , CAP_SYS_ADMIN ))
5288
+ !ns_capable_noaudit (ns -> user_ns , CAP_SYS_ADMIN ))
5108
5289
return - EPERM ;
5109
5290
5110
5291
ret = security_sb_statfs (orig .dentry );
@@ -5141,6 +5322,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
5141
5322
{
5142
5323
u64 * kmnt_ids __free (kvfree ) = NULL ;
5143
5324
const size_t maxcount = 1000000 ;
5325
+ struct mnt_namespace * ns __free (mnt_ns_release ) = NULL ;
5144
5326
struct mnt_id_req kreq ;
5145
5327
ssize_t ret ;
5146
5328
@@ -5167,8 +5349,16 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
5167
5349
if (!kmnt_ids )
5168
5350
return - ENOMEM ;
5169
5351
5352
+ ns = grab_requested_mnt_ns (kreq .mnt_ns_id );
5353
+ if (!ns )
5354
+ return - ENOENT ;
5355
+
5356
+ if (kreq .mnt_ns_id && (ns != current -> nsproxy -> mnt_ns ) &&
5357
+ !ns_capable_noaudit (ns -> user_ns , CAP_SYS_ADMIN ))
5358
+ return - ENOENT ;
5359
+
5170
5360
scoped_guard (rwsem_read , & namespace_sem )
5171
- ret = do_listmount (kreq .mnt_id , kreq .param , kmnt_ids ,
5361
+ ret = do_listmount (ns , kreq .mnt_id , kreq .param , kmnt_ids ,
5172
5362
nr_mnt_ids , (flags & LISTMOUNT_REVERSE ));
5173
5363
5174
5364
if (copy_to_user (mnt_ids , kmnt_ids , ret * sizeof (* mnt_ids )))
@@ -5204,6 +5394,8 @@ static void __init init_mount_tree(void)
5204
5394
5205
5395
set_fs_pwd (current -> fs , & root );
5206
5396
set_fs_root (current -> fs , & root );
5397
+
5398
+ mnt_ns_tree_add (ns );
5207
5399
}
5208
5400
5209
5401
void __init mnt_init (void )
0 commit comments