Skip to content

Commit 1064f87

Browse files
committed
mnt: Tuck mounts under others instead of creating shadow/side mounts.
Ever since mount propagation was introduced in cases where a mount in propagated to parent mount mountpoint pair that is already in use the code has placed the new mount behind the old mount in the mount hash table. This implementation detail is problematic as it allows creating arbitrary length mount hash chains. Furthermore it invalidates the constraint maintained elsewhere in the mount code that a parent mount and a mountpoint pair will have exactly one mount upon them. Making it hard to deal with and to talk about this special case in the mount code. Modify mount propagation to notice when there is already a mount at the parent mount and mountpoint where a new mount is propagating to and place that preexisting mount on top of the new mount. Modify unmount propagation to notice when a mount that is being unmounted has another mount on top of it (and no other children), and to replace the unmounted mount with the mount on top of it. Move the MNT_UMUONT test from __lookup_mnt_last into __propagate_umount as that is the only call of __lookup_mnt_last where MNT_UMOUNT may be set on any mount visible in the mount hash table. These modifications allow: - __lookup_mnt_last to be removed. - attach_shadows to be renamed __attach_mnt and its shadow handling to be removed. - commit_tree to be simplified - copy_tree to be simplified The result is an easier to understand tree of mounts that does not allow creation of arbitrary length hash chains in the mount hash table. The result is also a very slight userspace visible difference in semantics. The following two cases now behave identically, where before order mattered: case 1: (explicit user action) B is a slave of A mount something on A/a , it will propagate to B/a and than mount something on B/a case 2: (tucked mount) B is a slave of A mount something on B/a and than mount something on A/a Histroically umount A/a would fail in case 1 and succeed in case 2. Now umount A/a succeeds in both configurations. This very small change in semantics appears if anything to be a bug fix to me and my survey of userspace leads me to believe that no programs will notice or care of this subtle semantic change. v2: Updated to mnt_change_mountpoint to not call dput or mntput and instead to decrement the counts directly. It is guaranteed that there will be other references when mnt_change_mountpoint is called so this is safe. v3: Moved put_mountpoint under mount_lock in attach_recursive_mnt As the locking in fs/namespace.c changed between v2 and v3. v4: Reworked the logic in propagate_mount_busy and __propagate_umount that detects when a mount completely covers another mount. v5: Removed unnecessary tests whose result is alwasy true in find_topper and attach_recursive_mnt. v6: Document the user space visible semantic difference. Cc: [email protected] Fixes: b90fa9a ("[PATCH] shared mount handling: bind and rbind") Tested-by: Andrei Vagin <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
1 parent 749860c commit 1064f87

File tree

4 files changed

+111
-63
lines changed

4 files changed

+111
-63
lines changed

fs/mount.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ static inline int is_mounted(struct vfsmount *mnt)
8989
}
9090

9191
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
92-
extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
9392

9493
extern int __legitimize_mnt(struct vfsmount *, unsigned);
9594
extern bool legitimize_mnt(struct vfsmount *, unsigned);

fs/namespace.c

Lines changed: 60 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -636,28 +636,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
636636
return NULL;
637637
}
638638

639-
/*
640-
* find the last mount at @dentry on vfsmount @mnt.
641-
* mount_lock must be held.
642-
*/
643-
struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
644-
{
645-
struct mount *p, *res = NULL;
646-
p = __lookup_mnt(mnt, dentry);
647-
if (!p)
648-
goto out;
649-
if (!(p->mnt.mnt_flags & MNT_UMOUNT))
650-
res = p;
651-
hlist_for_each_entry_continue(p, mnt_hash) {
652-
if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
653-
break;
654-
if (!(p->mnt.mnt_flags & MNT_UMOUNT))
655-
res = p;
656-
}
657-
out:
658-
return res;
659-
}
660-
661639
/*
662640
* lookup_mnt - Return the first child mount mounted at path
663641
*
@@ -878,6 +856,13 @@ void mnt_set_mountpoint(struct mount *mnt,
878856
hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
879857
}
880858

859+
static void __attach_mnt(struct mount *mnt, struct mount *parent)
860+
{
861+
hlist_add_head_rcu(&mnt->mnt_hash,
862+
m_hash(&parent->mnt, mnt->mnt_mountpoint));
863+
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
864+
}
865+
881866
/*
882867
* vfsmount lock must be held for write
883868
*/
@@ -886,28 +871,45 @@ static void attach_mnt(struct mount *mnt,
886871
struct mountpoint *mp)
887872
{
888873
mnt_set_mountpoint(parent, mp, mnt);
889-
hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
890-
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
874+
__attach_mnt(mnt, parent);
891875
}
892876

893-
static void attach_shadowed(struct mount *mnt,
894-
struct mount *parent,
895-
struct mount *shadows)
877+
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
896878
{
897-
if (shadows) {
898-
hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
899-
list_add(&mnt->mnt_child, &shadows->mnt_child);
900-
} else {
901-
hlist_add_head_rcu(&mnt->mnt_hash,
902-
m_hash(&parent->mnt, mnt->mnt_mountpoint));
903-
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
904-
}
879+
struct mountpoint *old_mp = mnt->mnt_mp;
880+
struct dentry *old_mountpoint = mnt->mnt_mountpoint;
881+
struct mount *old_parent = mnt->mnt_parent;
882+
883+
list_del_init(&mnt->mnt_child);
884+
hlist_del_init(&mnt->mnt_mp_list);
885+
hlist_del_init_rcu(&mnt->mnt_hash);
886+
887+
attach_mnt(mnt, parent, mp);
888+
889+
put_mountpoint(old_mp);
890+
891+
/*
892+
* Safely avoid even the suggestion this code might sleep or
893+
* lock the mount hash by taking advantage of the knowledge that
894+
* mnt_change_mountpoint will not release the final reference
895+
* to a mountpoint.
896+
*
897+
* During mounting, the mount passed in as the parent mount will
898+
* continue to use the old mountpoint and during unmounting, the
899+
* old mountpoint will continue to exist until namespace_unlock,
900+
* which happens well after mnt_change_mountpoint.
901+
*/
902+
spin_lock(&old_mountpoint->d_lock);
903+
old_mountpoint->d_lockref.count--;
904+
spin_unlock(&old_mountpoint->d_lock);
905+
906+
mnt_add_count(old_parent, -1);
905907
}
906908

907909
/*
908910
* vfsmount lock must be held for write
909911
*/
910-
static void commit_tree(struct mount *mnt, struct mount *shadows)
912+
static void commit_tree(struct mount *mnt)
911913
{
912914
struct mount *parent = mnt->mnt_parent;
913915
struct mount *m;
@@ -925,7 +927,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
925927
n->mounts += n->pending_mounts;
926928
n->pending_mounts = 0;
927929

928-
attach_shadowed(mnt, parent, shadows);
930+
__attach_mnt(mnt, parent);
929931
touch_mnt_namespace(n);
930932
}
931933

@@ -1779,7 +1781,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
17791781
continue;
17801782

17811783
for (s = r; s; s = next_mnt(s, r)) {
1782-
struct mount *t = NULL;
17831784
if (!(flag & CL_COPY_UNBINDABLE) &&
17841785
IS_MNT_UNBINDABLE(s)) {
17851786
s = skip_mnt_tree(s);
@@ -1801,14 +1802,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
18011802
goto out;
18021803
lock_mount_hash();
18031804
list_add_tail(&q->mnt_list, &res->mnt_list);
1804-
mnt_set_mountpoint(parent, p->mnt_mp, q);
1805-
if (!list_empty(&parent->mnt_mounts)) {
1806-
t = list_last_entry(&parent->mnt_mounts,
1807-
struct mount, mnt_child);
1808-
if (t->mnt_mp != p->mnt_mp)
1809-
t = NULL;
1810-
}
1811-
attach_shadowed(q, parent, t);
1805+
attach_mnt(q, parent, p->mnt_mp);
18121806
unlock_mount_hash();
18131807
}
18141808
}
@@ -2007,10 +2001,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
20072001
{
20082002
HLIST_HEAD(tree_list);
20092003
struct mnt_namespace *ns = dest_mnt->mnt_ns;
2004+
struct mountpoint *smp;
20102005
struct mount *child, *p;
20112006
struct hlist_node *n;
20122007
int err;
20132008

2009+
/* Preallocate a mountpoint in case the new mounts need
2010+
* to be tucked under other mounts.
2011+
*/
2012+
smp = get_mountpoint(source_mnt->mnt.mnt_root);
2013+
if (IS_ERR(smp))
2014+
return PTR_ERR(smp);
2015+
20142016
/* Is there space to add these mounts to the mount namespace? */
20152017
if (!parent_path) {
20162018
err = count_mounts(ns, source_mnt);
@@ -2037,16 +2039,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
20372039
touch_mnt_namespace(source_mnt->mnt_ns);
20382040
} else {
20392041
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2040-
commit_tree(source_mnt, NULL);
2042+
commit_tree(source_mnt);
20412043
}
20422044

20432045
hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
20442046
struct mount *q;
20452047
hlist_del_init(&child->mnt_hash);
2046-
q = __lookup_mnt_last(&child->mnt_parent->mnt,
2047-
child->mnt_mountpoint);
2048-
commit_tree(child, q);
2048+
q = __lookup_mnt(&child->mnt_parent->mnt,
2049+
child->mnt_mountpoint);
2050+
if (q)
2051+
mnt_change_mountpoint(child, smp, q);
2052+
commit_tree(child);
20492053
}
2054+
put_mountpoint(smp);
20502055
unlock_mount_hash();
20512056

20522057
return 0;
@@ -2061,6 +2066,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
20612066
cleanup_group_ids(source_mnt, NULL);
20622067
out:
20632068
ns->pending_mounts = 0;
2069+
2070+
read_seqlock_excl(&mount_lock);
2071+
put_mountpoint(smp);
2072+
read_sequnlock_excl(&mount_lock);
2073+
20642074
return err;
20652075
}
20662076

fs/pnode.c

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,21 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
322322
return ret;
323323
}
324324

325+
static struct mount *find_topper(struct mount *mnt)
326+
{
327+
/* If there is exactly one mount covering mnt completely return it. */
328+
struct mount *child;
329+
330+
if (!list_is_singular(&mnt->mnt_mounts))
331+
return NULL;
332+
333+
child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
334+
if (child->mnt_mountpoint != mnt->mnt.mnt_root)
335+
return NULL;
336+
337+
return child;
338+
}
339+
325340
/*
326341
* return true if the refcount is greater than count
327342
*/
@@ -342,9 +357,8 @@ static inline int do_refcount_check(struct mount *mnt, int count)
342357
*/
343358
int propagate_mount_busy(struct mount *mnt, int refcnt)
344359
{
345-
struct mount *m, *child;
360+
struct mount *m, *child, *topper;
346361
struct mount *parent = mnt->mnt_parent;
347-
int ret = 0;
348362

349363
if (mnt == parent)
350364
return do_refcount_check(mnt, refcnt);
@@ -359,12 +373,24 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
359373

360374
for (m = propagation_next(parent, parent); m;
361375
m = propagation_next(m, parent)) {
362-
child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
363-
if (child && list_empty(&child->mnt_mounts) &&
364-
(ret = do_refcount_check(child, 1)))
365-
break;
376+
int count = 1;
377+
child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
378+
if (!child)
379+
continue;
380+
381+
/* Is there exactly one mount on the child that covers
382+
* it completely whose reference should be ignored?
383+
*/
384+
topper = find_topper(child);
385+
if (topper)
386+
count += 1;
387+
else if (!list_empty(&child->mnt_mounts))
388+
continue;
389+
390+
if (do_refcount_check(child, count))
391+
return 1;
366392
}
367-
return ret;
393+
return 0;
368394
}
369395

370396
/*
@@ -381,7 +407,7 @@ void propagate_mount_unlock(struct mount *mnt)
381407

382408
for (m = propagation_next(parent, parent); m;
383409
m = propagation_next(m, parent)) {
384-
child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
410+
child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
385411
if (child)
386412
child->mnt.mnt_flags &= ~MNT_LOCKED;
387413
}
@@ -399,9 +425,11 @@ static void mark_umount_candidates(struct mount *mnt)
399425

400426
for (m = propagation_next(parent, parent); m;
401427
m = propagation_next(m, parent)) {
402-
struct mount *child = __lookup_mnt_last(&m->mnt,
428+
struct mount *child = __lookup_mnt(&m->mnt,
403429
mnt->mnt_mountpoint);
404-
if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
430+
if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
431+
continue;
432+
if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
405433
SET_MNT_MARK(child);
406434
}
407435
}
@@ -420,8 +448,8 @@ static void __propagate_umount(struct mount *mnt)
420448

421449
for (m = propagation_next(parent, parent); m;
422450
m = propagation_next(m, parent)) {
423-
424-
struct mount *child = __lookup_mnt_last(&m->mnt,
451+
struct mount *topper;
452+
struct mount *child = __lookup_mnt(&m->mnt,
425453
mnt->mnt_mountpoint);
426454
/*
427455
* umount the child only if the child has no children
@@ -430,6 +458,15 @@ static void __propagate_umount(struct mount *mnt)
430458
if (!child || !IS_MNT_MARKED(child))
431459
continue;
432460
CLEAR_MNT_MARK(child);
461+
462+
/* If there is exactly one mount covering all of child
463+
* replace child with that mount.
464+
*/
465+
topper = find_topper(child);
466+
if (topper)
467+
mnt_change_mountpoint(child->mnt_parent, child->mnt_mp,
468+
topper);
469+
433470
if (list_empty(&child->mnt_mounts)) {
434471
list_del_init(&child->mnt_child);
435472
child->mnt.mnt_flags |= MNT_UMOUNT;

fs/pnode.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ int get_dominating_id(struct mount *mnt, const struct path *root);
4949
unsigned int mnt_get_count(struct mount *mnt);
5050
void mnt_set_mountpoint(struct mount *, struct mountpoint *,
5151
struct mount *);
52+
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
53+
struct mount *mnt);
5254
struct mount *copy_tree(struct mount *, struct dentry *, int);
5355
bool is_path_reachable(struct mount *, struct dentry *,
5456
const struct path *root);

0 commit comments

Comments
 (0)