Skip to content

Commit 2a18672

Browse files
author
Christian Brauner
committed
fs: add mount_setattr()
This implements the missing mount_setattr() syscall. While the new mount api allows to change the properties of a superblock there is currently no way to change the properties of a mount or a mount tree using file descriptors which the new mount api is based on. In addition the old mount api has the restriction that mount options cannot be applied recursively. This hasn't changed since changing mount options on a per-mount basis was implemented in [1] and has been a frequent request not just for convenience but also for security reasons. The legacy mount syscall is unable to accommodate this behavior without introducing a whole new set of flags because MS_REC | MS_REMOUNT | MS_BIND | MS_RDONLY | MS_NOEXEC | [...] only apply the mount option to the topmost mount. Changing MS_REC to apply to the whole mount tree would mean introducing a significant uapi change and would likely cause significant regressions. The new mount_setattr() syscall allows to recursively clear and set mount options in one shot. Multiple calls to change mount options requesting the same changes are idempotent: int mount_setattr(int dfd, const char *path, unsigned flags, struct mount_attr *uattr, size_t usize); Flags to modify path resolution behavior are specified in the @flags argument. Currently, AT_EMPTY_PATH, AT_RECURSIVE, AT_SYMLINK_NOFOLLOW, and AT_NO_AUTOMOUNT are supported. If useful, additional lookup flags to restrict path resolution as introduced with openat2() might be supported in the future. The mount_setattr() syscall can be expected to grow over time and is designed with extensibility in mind. It follows the extensible syscall pattern we have used with other syscalls such as openat2(), clone3(), sched_{set,get}attr(), and others. The set of mount options is passed in the uapi struct mount_attr which currently has the following layout: struct mount_attr { __u64 attr_set; __u64 attr_clr; __u64 propagation; __u64 userns_fd; }; The @attr_set and @attr_clr members are used to clear and set mount options. This way a user can e.g. request that a set of flags is to be raised such as turning mounts readonly by raising MOUNT_ATTR_RDONLY in @attr_set while at the same time requesting that another set of flags is to be lowered such as removing noexec from a mount tree by specifying MOUNT_ATTR_NOEXEC in @attr_clr. Note, since the MOUNT_ATTR_<atime> values are an enum starting from 0, not a bitmap, users wanting to transition to a different atime setting cannot simply specify the atime setting in @attr_set, but must also specify MOUNT_ATTR__ATIME in the @attr_clr field. So we ensure that MOUNT_ATTR__ATIME can't be partially set in @attr_clr and that @attr_set can't have any atime bits set if MOUNT_ATTR__ATIME isn't set in @attr_clr. The @propagation field lets callers specify the propagation type of a mount tree. Propagation is a single property that has four different settings and as such is not really a flag argument but an enum. Specifically, it would be unclear what setting and clearing propagation settings in combination would amount to. The legacy mount() syscall thus forbids the combination of multiple propagation settings too. The goal is to keep the semantics of mount propagation somewhat simple as they are overly complex as it is. The @userns_fd field lets user specify a user namespace whose idmapping becomes the idmapping of the mount. This is implemented and explained in detail in the next patch. [1]: commit 2e4b7fc ("[PATCH] r/o bind mounts: honor mount writer counts at remount") Link: https://lore.kernel.org/r/[email protected] Cc: David Howells <[email protected]> Cc: Aleksa Sarai <[email protected]> Cc: Al Viro <[email protected]> Cc: [email protected] Cc: [email protected] Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: Christian Brauner <[email protected]>
1 parent 5b49050 commit 2a18672

File tree

23 files changed

+307
-3
lines changed

23 files changed

+307
-3
lines changed

arch/alpha/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,3 +481,4 @@
481481
549 common faccessat2 sys_faccessat2
482482
550 common process_madvise sys_process_madvise
483483
551 common epoll_pwait2 sys_epoll_pwait2
484+
552 common mount_setattr sys_mount_setattr

arch/arm/tools/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,3 +455,4 @@
455455
439 common faccessat2 sys_faccessat2
456456
440 common process_madvise sys_process_madvise
457457
441 common epoll_pwait2 sys_epoll_pwait2
458+
442 common mount_setattr sys_mount_setattr

arch/arm64/include/asm/unistd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
3939
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
4040

41-
#define __NR_compat_syscalls 442
41+
#define __NR_compat_syscalls 443
4242
#endif
4343

4444
#define __ARCH_WANT_SYS_CLONE

arch/arm64/include/asm/unistd32.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,8 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
891891
__SYSCALL(__NR_process_madvise, sys_process_madvise)
892892
#define __NR_epoll_pwait2 441
893893
__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
894+
#define __NR_mount_setattr 442
895+
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
894896

895897
/*
896898
* Please add new compat syscalls above this comment and update

arch/ia64/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,4 @@
362362
439 common faccessat2 sys_faccessat2
363363
440 common process_madvise sys_process_madvise
364364
441 common epoll_pwait2 sys_epoll_pwait2
365+
442 common mount_setattr sys_mount_setattr

arch/m68k/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,3 +441,4 @@
441441
439 common faccessat2 sys_faccessat2
442442
440 common process_madvise sys_process_madvise
443443
441 common epoll_pwait2 sys_epoll_pwait2
444+
442 common mount_setattr sys_mount_setattr

arch/microblaze/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,3 +447,4 @@
447447
439 common faccessat2 sys_faccessat2
448448
440 common process_madvise sys_process_madvise
449449
441 common epoll_pwait2 sys_epoll_pwait2
450+
442 common mount_setattr sys_mount_setattr

arch/mips/kernel/syscalls/syscall_n32.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,3 +380,4 @@
380380
439 n32 faccessat2 sys_faccessat2
381381
440 n32 process_madvise sys_process_madvise
382382
441 n32 epoll_pwait2 compat_sys_epoll_pwait2
383+
442 n32 mount_setattr sys_mount_setattr

arch/mips/kernel/syscalls/syscall_n64.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,3 +356,4 @@
356356
439 n64 faccessat2 sys_faccessat2
357357
440 n64 process_madvise sys_process_madvise
358358
441 n64 epoll_pwait2 sys_epoll_pwait2
359+
442 n64 mount_setattr sys_mount_setattr

arch/mips/kernel/syscalls/syscall_o32.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,3 +429,4 @@
429429
439 o32 faccessat2 sys_faccessat2
430430
440 o32 process_madvise sys_process_madvise
431431
441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
432+
442 o32 mount_setattr sys_mount_setattr

arch/parisc/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,3 +439,4 @@
439439
439 common faccessat2 sys_faccessat2
440440
440 common process_madvise sys_process_madvise
441441
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442+
442 common mount_setattr sys_mount_setattr

arch/powerpc/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,3 +531,4 @@
531531
439 common faccessat2 sys_faccessat2
532532
440 common process_madvise sys_process_madvise
533533
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
534+
442 common mount_setattr sys_mount_setattr

arch/s390/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,3 +444,4 @@
444444
439 common faccessat2 sys_faccessat2 sys_faccessat2
445445
440 common process_madvise sys_process_madvise sys_process_madvise
446446
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
447+
442 common mount_setattr sys_mount_setattr sys_mount_setattr

arch/sh/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,3 +444,4 @@
444444
439 common faccessat2 sys_faccessat2
445445
440 common process_madvise sys_process_madvise
446446
441 common epoll_pwait2 sys_epoll_pwait2
447+
442 common mount_setattr sys_mount_setattr

arch/sparc/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,3 +487,4 @@
487487
439 common faccessat2 sys_faccessat2
488488
440 common process_madvise sys_process_madvise
489489
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
490+
442 common mount_setattr sys_mount_setattr

arch/x86/entry/syscalls/syscall_32.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,3 +446,4 @@
446446
439 i386 faccessat2 sys_faccessat2
447447
440 i386 process_madvise sys_process_madvise
448448
441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
449+
442 i386 mount_setattr sys_mount_setattr

arch/x86/entry/syscalls/syscall_64.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@
363363
439 common faccessat2 sys_faccessat2
364364
440 common process_madvise sys_process_madvise
365365
441 common epoll_pwait2 sys_epoll_pwait2
366+
442 common mount_setattr sys_mount_setattr
366367

367368
#
368369
# Due to a historical design error, certain syscalls are numbered differently

arch/xtensa/kernel/syscalls/syscall.tbl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,3 +412,4 @@
412412
439 common faccessat2 sys_faccessat2
413413
440 common process_madvise sys_process_madvise
414414
441 common epoll_pwait2 sys_epoll_pwait2
415+
442 common mount_setattr sys_mount_setattr

fs/namespace.c

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,14 @@ static DECLARE_RWSEM(namespace_sem);
7373
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
7474
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
7575

76+
struct mount_kattr {
77+
unsigned int attr_set;
78+
unsigned int attr_clr;
79+
unsigned int propagation;
80+
unsigned int lookup_flags;
81+
bool recurse;
82+
};
83+
7684
/* /sys/fs */
7785
struct kobject *fs_kobj;
7886
EXPORT_SYMBOL_GPL(fs_kobj);
@@ -3469,6 +3477,11 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
34693477
(MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \
34703478
MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME)
34713479

3480+
#define MOUNT_SETATTR_VALID_FLAGS FSMOUNT_VALID_FLAGS
3481+
3482+
#define MOUNT_SETATTR_PROPAGATION_FLAGS \
3483+
(MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
3484+
34723485
static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
34733486
{
34743487
unsigned int mnt_flags = 0;
@@ -3820,6 +3833,256 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
38203833
return error;
38213834
}
38223835

3836+
static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
3837+
{
3838+
unsigned int flags = mnt->mnt.mnt_flags;
3839+
3840+
/* flags to clear */
3841+
flags &= ~kattr->attr_clr;
3842+
/* flags to raise */
3843+
flags |= kattr->attr_set;
3844+
3845+
return flags;
3846+
}
3847+
3848+
static struct mount *mount_setattr_prepare(struct mount_kattr *kattr,
3849+
struct mount *mnt, int *err)
3850+
{
3851+
struct mount *m = mnt, *last = NULL;
3852+
3853+
if (!is_mounted(&m->mnt)) {
3854+
*err = -EINVAL;
3855+
goto out;
3856+
}
3857+
3858+
if (!(mnt_has_parent(m) ? check_mnt(m) : is_anon_ns(m->mnt_ns))) {
3859+
*err = -EINVAL;
3860+
goto out;
3861+
}
3862+
3863+
do {
3864+
unsigned int flags;
3865+
3866+
flags = recalc_flags(kattr, m);
3867+
if (!can_change_locked_flags(m, flags)) {
3868+
*err = -EPERM;
3869+
goto out;
3870+
}
3871+
3872+
last = m;
3873+
3874+
if ((kattr->attr_set & MNT_READONLY) &&
3875+
!(m->mnt.mnt_flags & MNT_READONLY)) {
3876+
*err = mnt_hold_writers(m);
3877+
if (*err)
3878+
goto out;
3879+
}
3880+
} while (kattr->recurse && (m = next_mnt(m, mnt)));
3881+
3882+
out:
3883+
return last;
3884+
}
3885+
3886+
static void mount_setattr_commit(struct mount_kattr *kattr,
3887+
struct mount *mnt, struct mount *last,
3888+
int err)
3889+
{
3890+
struct mount *m = mnt;
3891+
3892+
do {
3893+
if (!err) {
3894+
unsigned int flags;
3895+
3896+
flags = recalc_flags(kattr, m);
3897+
WRITE_ONCE(m->mnt.mnt_flags, flags);
3898+
}
3899+
3900+
/*
3901+
* We either set MNT_READONLY above so make it visible
3902+
* before ~MNT_WRITE_HOLD or we failed to recursively
3903+
* apply mount options.
3904+
*/
3905+
if ((kattr->attr_set & MNT_READONLY) &&
3906+
(m->mnt.mnt_flags & MNT_WRITE_HOLD))
3907+
mnt_unhold_writers(m);
3908+
3909+
if (!err && kattr->propagation)
3910+
change_mnt_propagation(m, kattr->propagation);
3911+
3912+
/*
3913+
* On failure, only cleanup until we found the first mount
3914+
* we failed to handle.
3915+
*/
3916+
if (err && m == last)
3917+
break;
3918+
} while (kattr->recurse && (m = next_mnt(m, mnt)));
3919+
3920+
if (!err)
3921+
touch_mnt_namespace(mnt->mnt_ns);
3922+
}
3923+
3924+
static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
3925+
{
3926+
struct mount *mnt = real_mount(path->mnt), *last = NULL;
3927+
int err = 0;
3928+
3929+
if (path->dentry != mnt->mnt.mnt_root)
3930+
return -EINVAL;
3931+
3932+
if (kattr->propagation) {
3933+
/*
3934+
* Only take namespace_lock() if we're actually changing
3935+
* propagation.
3936+
*/
3937+
namespace_lock();
3938+
if (kattr->propagation == MS_SHARED) {
3939+
err = invent_group_ids(mnt, kattr->recurse);
3940+
if (err) {
3941+
namespace_unlock();
3942+
return err;
3943+
}
3944+
}
3945+
}
3946+
3947+
lock_mount_hash();
3948+
3949+
/*
3950+
* Get the mount tree in a shape where we can change mount
3951+
* properties without failure.
3952+
*/
3953+
last = mount_setattr_prepare(kattr, mnt, &err);
3954+
if (last) /* Commit all changes or revert to the old state. */
3955+
mount_setattr_commit(kattr, mnt, last, err);
3956+
3957+
unlock_mount_hash();
3958+
3959+
if (kattr->propagation) {
3960+
namespace_unlock();
3961+
if (err)
3962+
cleanup_group_ids(mnt, NULL);
3963+
}
3964+
3965+
return err;
3966+
}
3967+
3968+
static int build_mount_kattr(const struct mount_attr *attr,
3969+
struct mount_kattr *kattr, unsigned int flags)
3970+
{
3971+
unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
3972+
3973+
if (flags & AT_NO_AUTOMOUNT)
3974+
lookup_flags &= ~LOOKUP_AUTOMOUNT;
3975+
if (flags & AT_SYMLINK_NOFOLLOW)
3976+
lookup_flags &= ~LOOKUP_FOLLOW;
3977+
if (flags & AT_EMPTY_PATH)
3978+
lookup_flags |= LOOKUP_EMPTY;
3979+
3980+
*kattr = (struct mount_kattr) {
3981+
.lookup_flags = lookup_flags,
3982+
.recurse = !!(flags & AT_RECURSIVE),
3983+
};
3984+
3985+
if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
3986+
return -EINVAL;
3987+
if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
3988+
return -EINVAL;
3989+
kattr->propagation = attr->propagation;
3990+
3991+
if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
3992+
return -EINVAL;
3993+
3994+
if (attr->userns_fd)
3995+
return -EINVAL;
3996+
3997+
kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
3998+
kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
3999+
4000+
/*
4001+
* Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap,
4002+
* users wanting to transition to a different atime setting cannot
4003+
* simply specify the atime setting in @attr_set, but must also
4004+
* specify MOUNT_ATTR__ATIME in the @attr_clr field.
4005+
* So ensure that MOUNT_ATTR__ATIME can't be partially set in
4006+
* @attr_clr and that @attr_set can't have any atime bits set if
4007+
* MOUNT_ATTR__ATIME isn't set in @attr_clr.
4008+
*/
4009+
if (attr->attr_clr & MOUNT_ATTR__ATIME) {
4010+
if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
4011+
return -EINVAL;
4012+
4013+
/*
4014+
* Clear all previous time settings as they are mutually
4015+
* exclusive.
4016+
*/
4017+
kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
4018+
switch (attr->attr_set & MOUNT_ATTR__ATIME) {
4019+
case MOUNT_ATTR_RELATIME:
4020+
kattr->attr_set |= MNT_RELATIME;
4021+
break;
4022+
case MOUNT_ATTR_NOATIME:
4023+
kattr->attr_set |= MNT_NOATIME;
4024+
break;
4025+
case MOUNT_ATTR_STRICTATIME:
4026+
break;
4027+
default:
4028+
return -EINVAL;
4029+
}
4030+
} else {
4031+
if (attr->attr_set & MOUNT_ATTR__ATIME)
4032+
return -EINVAL;
4033+
}
4034+
4035+
return 0;
4036+
}
4037+
4038+
SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
4039+
unsigned int, flags, struct mount_attr __user *, uattr,
4040+
size_t, usize)
4041+
{
4042+
int err;
4043+
struct path target;
4044+
struct mount_attr attr;
4045+
struct mount_kattr kattr;
4046+
4047+
BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
4048+
4049+
if (flags & ~(AT_EMPTY_PATH |
4050+
AT_RECURSIVE |
4051+
AT_SYMLINK_NOFOLLOW |
4052+
AT_NO_AUTOMOUNT))
4053+
return -EINVAL;
4054+
4055+
if (unlikely(usize > PAGE_SIZE))
4056+
return -E2BIG;
4057+
if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
4058+
return -EINVAL;
4059+
4060+
if (!may_mount())
4061+
return -EPERM;
4062+
4063+
err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
4064+
if (err)
4065+
return err;
4066+
4067+
/* Don't bother walking through the mounts if this is a nop. */
4068+
if (attr.attr_set == 0 &&
4069+
attr.attr_clr == 0 &&
4070+
attr.propagation == 0)
4071+
return 0;
4072+
4073+
err = build_mount_kattr(&attr, &kattr, flags);
4074+
if (err)
4075+
return err;
4076+
4077+
err = user_path_at(dfd, path, kattr.lookup_flags, &target);
4078+
if (err)
4079+
return err;
4080+
4081+
err = do_mount_setattr(&target, &kattr);
4082+
path_put(&target);
4083+
return err;
4084+
}
4085+
38234086
static void __init init_mount_tree(void)
38244087
{
38254088
struct vfsmount *mnt;

0 commit comments

Comments
 (0)