Skip to content

Commit d7c9e99

Browse files
legionusebiederm
authored andcommitted
Reimplement RLIMIT_MEMLOCK on top of ucounts
The rlimit counter is tied to uid in the user_namespace. This allows rlimit values to be specified in userns even if they are already globally exceeded by the user. However, the value of the previous user_namespaces cannot be exceeded. Changelog v11: * Fix issue found by lkp robot. v8: * Fix issues found by lkp-tests project. v7: * Keep only ucounts for RLIMIT_MEMLOCK checks instead of struct cred. v6: * Fix bug in hugetlb_file_setup() detected by trinity. Reported-by: kernel test robot <[email protected]> Reported-by: kernel test robot <[email protected]> Signed-off-by: Alexey Gladkov <[email protected]> Link: https://lkml.kernel.org/r/970d50c70c71bfd4496e0e8d2a0a32feebebb350.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman <[email protected]>
1 parent d646969 commit d7c9e99

File tree

15 files changed

+53
-45
lines changed

15 files changed

+53
-45
lines changed

fs/hugetlbfs/inode.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,7 +1443,7 @@ static int get_hstate_idx(int page_size_log)
14431443
* otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
14441444
*/
14451445
struct file *hugetlb_file_setup(const char *name, size_t size,
1446-
vm_flags_t acctflag, struct user_struct **user,
1446+
vm_flags_t acctflag, struct ucounts **ucounts,
14471447
int creat_flags, int page_size_log)
14481448
{
14491449
struct inode *inode;
@@ -1455,20 +1455,20 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
14551455
if (hstate_idx < 0)
14561456
return ERR_PTR(-ENODEV);
14571457

1458-
*user = NULL;
1458+
*ucounts = NULL;
14591459
mnt = hugetlbfs_vfsmount[hstate_idx];
14601460
if (!mnt)
14611461
return ERR_PTR(-ENOENT);
14621462

14631463
if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
1464-
*user = current_user();
1465-
if (user_shm_lock(size, *user)) {
1464+
*ucounts = current_ucounts();
1465+
if (user_shm_lock(size, *ucounts)) {
14661466
task_lock(current);
14671467
pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
14681468
current->comm, current->pid);
14691469
task_unlock(current);
14701470
} else {
1471-
*user = NULL;
1471+
*ucounts = NULL;
14721472
return ERR_PTR(-EPERM);
14731473
}
14741474
}
@@ -1495,9 +1495,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
14951495

14961496
iput(inode);
14971497
out:
1498-
if (*user) {
1499-
user_shm_unlock(size, *user);
1500-
*user = NULL;
1498+
if (*ucounts) {
1499+
user_shm_unlock(size, *ucounts);
1500+
*ucounts = NULL;
15011501
}
15021502
return file;
15031503
}

include/linux/hugetlb.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
434434
extern const struct file_operations hugetlbfs_file_operations;
435435
extern const struct vm_operations_struct hugetlb_vm_ops;
436436
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
437-
struct user_struct **user, int creat_flags,
437+
struct ucounts **ucounts, int creat_flags,
438438
int page_size_log);
439439

440440
static inline bool is_file_hugepages(struct file *file)
@@ -454,7 +454,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
454454
#define is_file_hugepages(file) false
455455
static inline struct file *
456456
hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
457-
struct user_struct **user, int creat_flags,
457+
struct ucounts **ucounts, int creat_flags,
458458
int page_size_log)
459459
{
460460
return ERR_PTR(-ENOSYS);

include/linux/mm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1670,8 +1670,8 @@ extern bool can_do_mlock(void);
16701670
#else
16711671
static inline bool can_do_mlock(void) { return false; }
16721672
#endif
1673-
extern int user_shm_lock(size_t, struct user_struct *);
1674-
extern void user_shm_unlock(size_t, struct user_struct *);
1673+
extern int user_shm_lock(size_t, struct ucounts *);
1674+
extern void user_shm_unlock(size_t, struct ucounts *);
16751675

16761676
/*
16771677
* Parameter block passed down to zap_pte_range in exceptional cases.

include/linux/sched/user.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ struct user_struct {
1818
#ifdef CONFIG_EPOLL
1919
atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
2020
#endif
21-
unsigned long locked_shm; /* How many pages of mlocked shm ? */
2221
unsigned long unix_inflight; /* How many files in flight in unix sockets */
2322
atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
2423

include/linux/shmem_fs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
6565
extern int shmem_zero_setup(struct vm_area_struct *);
6666
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
6767
unsigned long len, unsigned long pgoff, unsigned long flags);
68-
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
68+
extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
6969
#ifdef CONFIG_SHMEM
7070
extern const struct address_space_operations shmem_aops;
7171
static inline bool shmem_mapping(struct address_space *mapping)

include/linux/user_namespace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ enum ucount_type {
5353
UCOUNT_RLIMIT_NPROC,
5454
UCOUNT_RLIMIT_MSGQUEUE,
5555
UCOUNT_RLIMIT_SIGPENDING,
56+
UCOUNT_RLIMIT_MEMLOCK,
5657
UCOUNT_COUNTS,
5758
};
5859

ipc/shm.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ struct shmid_kernel /* private to the kernel */
6060
time64_t shm_ctim;
6161
struct pid *shm_cprid;
6262
struct pid *shm_lprid;
63-
struct user_struct *mlock_user;
63+
struct ucounts *mlock_ucounts;
6464

6565
/* The task created the shm object. NULL if the task is dead. */
6666
struct task_struct *shm_creator;
@@ -286,10 +286,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
286286
shm_rmid(ns, shp);
287287
shm_unlock(shp);
288288
if (!is_file_hugepages(shm_file))
289-
shmem_lock(shm_file, 0, shp->mlock_user);
290-
else if (shp->mlock_user)
289+
shmem_lock(shm_file, 0, shp->mlock_ucounts);
290+
else if (shp->mlock_ucounts)
291291
user_shm_unlock(i_size_read(file_inode(shm_file)),
292-
shp->mlock_user);
292+
shp->mlock_ucounts);
293293
fput(shm_file);
294294
ipc_update_pid(&shp->shm_cprid, NULL);
295295
ipc_update_pid(&shp->shm_lprid, NULL);
@@ -625,7 +625,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
625625

626626
shp->shm_perm.key = key;
627627
shp->shm_perm.mode = (shmflg & S_IRWXUGO);
628-
shp->mlock_user = NULL;
628+
shp->mlock_ucounts = NULL;
629629

630630
shp->shm_perm.security = NULL;
631631
error = security_shm_alloc(&shp->shm_perm);
@@ -650,7 +650,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
650650
if (shmflg & SHM_NORESERVE)
651651
acctflag = VM_NORESERVE;
652652
file = hugetlb_file_setup(name, hugesize, acctflag,
653-
&shp->mlock_user, HUGETLB_SHMFS_INODE,
653+
&shp->mlock_ucounts, HUGETLB_SHMFS_INODE,
654654
(shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
655655
} else {
656656
/*
@@ -698,8 +698,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
698698
no_id:
699699
ipc_update_pid(&shp->shm_cprid, NULL);
700700
ipc_update_pid(&shp->shm_lprid, NULL);
701-
if (is_file_hugepages(file) && shp->mlock_user)
702-
user_shm_unlock(size, shp->mlock_user);
701+
if (is_file_hugepages(file) && shp->mlock_ucounts)
702+
user_shm_unlock(size, shp->mlock_ucounts);
703703
fput(file);
704704
ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
705705
return error;
@@ -1105,22 +1105,22 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
11051105
goto out_unlock0;
11061106

11071107
if (cmd == SHM_LOCK) {
1108-
struct user_struct *user = current_user();
1108+
struct ucounts *ucounts = current_ucounts();
11091109

1110-
err = shmem_lock(shm_file, 1, user);
1110+
err = shmem_lock(shm_file, 1, ucounts);
11111111
if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
11121112
shp->shm_perm.mode |= SHM_LOCKED;
1113-
shp->mlock_user = user;
1113+
shp->mlock_ucounts = ucounts;
11141114
}
11151115
goto out_unlock0;
11161116
}
11171117

11181118
/* SHM_UNLOCK */
11191119
if (!(shp->shm_perm.mode & SHM_LOCKED))
11201120
goto out_unlock0;
1121-
shmem_lock(shm_file, 0, shp->mlock_user);
1121+
shmem_lock(shm_file, 0, shp->mlock_ucounts);
11221122
shp->shm_perm.mode &= ~SHM_LOCKED;
1123-
shp->mlock_user = NULL;
1123+
shp->mlock_ucounts = NULL;
11241124
get_file(shm_file);
11251125
ipc_unlock_object(&shp->shm_perm);
11261126
rcu_read_unlock();

kernel/fork.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,7 @@ void __init fork_init(void)
825825
init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
826826
init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
827827
init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING);
828+
init_user_ns.ucount_max[UCOUNT_RLIMIT_MEMLOCK] = task_rlimit(&init_task, RLIMIT_MEMLOCK);
828829

829830
#ifdef CONFIG_VMAP_STACK
830831
cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",

kernel/ucount.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ static struct ctl_table user_table[] = {
8383
{ },
8484
{ },
8585
{ },
86+
{ },
8687
{ }
8788
};
8889
#endif /* CONFIG_SYSCTL */

kernel/user.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
9898
/* root_user.__count is 1, for init task cred */
9999
struct user_struct root_user = {
100100
.__count = REFCOUNT_INIT(1),
101-
.locked_shm = 0,
102101
.uid = GLOBAL_ROOT_UID,
103102
.ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
104103
};

kernel/user_namespace.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ int create_user_ns(struct cred *new)
125125
ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
126126
ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
127127
ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING);
128+
ns->ucount_max[UCOUNT_RLIMIT_MEMLOCK] = rlimit(RLIMIT_MEMLOCK);
128129
ns->ucounts = ucounts;
129130

130131
/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */

mm/memfd.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,9 +297,9 @@ SYSCALL_DEFINE2(memfd_create,
297297
}
298298

299299
if (flags & MFD_HUGETLB) {
300-
struct user_struct *user = NULL;
300+
struct ucounts *ucounts = NULL;
301301

302-
file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
302+
file = hugetlb_file_setup(name, 0, VM_NORESERVE, &ucounts,
303303
HUGETLB_ANONHUGE_INODE,
304304
(flags >> MFD_HUGE_SHIFT) &
305305
MFD_HUGE_MASK);

mm/mlock.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -817,9 +817,10 @@ SYSCALL_DEFINE0(munlockall)
817817
*/
818818
static DEFINE_SPINLOCK(shmlock_user_lock);
819819

820-
int user_shm_lock(size_t size, struct user_struct *user)
820+
int user_shm_lock(size_t size, struct ucounts *ucounts)
821821
{
822822
unsigned long lock_limit, locked;
823+
long memlock;
823824
int allowed = 0;
824825

825826
locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -828,21 +829,26 @@ int user_shm_lock(size_t size, struct user_struct *user)
828829
allowed = 1;
829830
lock_limit >>= PAGE_SHIFT;
830831
spin_lock(&shmlock_user_lock);
831-
if (!allowed &&
832-
locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
832+
memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
833+
834+
if (!allowed && (memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
835+
dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
836+
goto out;
837+
}
838+
if (!get_ucounts(ucounts)) {
839+
dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
833840
goto out;
834-
get_uid(user);
835-
user->locked_shm += locked;
841+
}
836842
allowed = 1;
837843
out:
838844
spin_unlock(&shmlock_user_lock);
839845
return allowed;
840846
}
841847

842-
void user_shm_unlock(size_t size, struct user_struct *user)
848+
void user_shm_unlock(size_t size, struct ucounts *ucounts)
843849
{
844850
spin_lock(&shmlock_user_lock);
845-
user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
851+
dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
846852
spin_unlock(&shmlock_user_lock);
847-
free_uid(user);
853+
put_ucounts(ucounts);
848854
}

mm/mmap.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,7 +1605,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
16051605
goto out_fput;
16061606
}
16071607
} else if (flags & MAP_HUGETLB) {
1608-
struct user_struct *user = NULL;
1608+
struct ucounts *ucounts = NULL;
16091609
struct hstate *hs;
16101610

16111611
hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
@@ -1621,7 +1621,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
16211621
*/
16221622
file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
16231623
VM_NORESERVE,
1624-
&user, HUGETLB_ANONHUGE_INODE,
1624+
&ucounts, HUGETLB_ANONHUGE_INODE,
16251625
(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
16261626
if (IS_ERR(file))
16271627
return PTR_ERR(file);

mm/shmem.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2227,7 +2227,7 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
22272227
}
22282228
#endif
22292229

2230-
int shmem_lock(struct file *file, int lock, struct user_struct *user)
2230+
int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
22312231
{
22322232
struct inode *inode = file_inode(file);
22332233
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -2239,13 +2239,13 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
22392239
* no serialization needed when called from shm_destroy().
22402240
*/
22412241
if (lock && !(info->flags & VM_LOCKED)) {
2242-
if (!user_shm_lock(inode->i_size, user))
2242+
if (!user_shm_lock(inode->i_size, ucounts))
22432243
goto out_nomem;
22442244
info->flags |= VM_LOCKED;
22452245
mapping_set_unevictable(file->f_mapping);
22462246
}
2247-
if (!lock && (info->flags & VM_LOCKED) && user) {
2248-
user_shm_unlock(inode->i_size, user);
2247+
if (!lock && (info->flags & VM_LOCKED) && ucounts) {
2248+
user_shm_unlock(inode->i_size, ucounts);
22492249
info->flags &= ~VM_LOCKED;
22502250
mapping_clear_unevictable(file->f_mapping);
22512251
}
@@ -4093,7 +4093,7 @@ int shmem_unuse(unsigned int type, bool frontswap,
40934093
return 0;
40944094
}
40954095

4096-
int shmem_lock(struct file *file, int lock, struct user_struct *user)
4096+
int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
40974097
{
40984098
return 0;
40994099
}

0 commit comments

Comments
 (0)