Skip to content

Commit 8463833

Browse files
koct9itorvalds
authored andcommitted
mm: rework virtual memory accounting
When inspecting a vague code inside prctl(PR_SET_MM_MEM) call (which testing the RLIMIT_DATA value to figure out if we're allowed to assign new @start_brk, @brk, @start_data, @end_data from mm_struct) it's been commited that RLIMIT_DATA in a form it's implemented now doesn't do anything useful because most of user-space libraries use mmap() syscall for dynamic memory allocations. Linus suggested to convert RLIMIT_DATA rlimit into something suitable for anonymous memory accounting. But in this patch we go further, and the changes are bundled together as: * keep vma counting if CONFIG_PROC_FS=n, will be used for limits * replace mm->shared_vm with better defined mm->data_vm * account anonymous executable areas as executable * account file-backed growsdown/up areas as stack * drop struct file* argument from vm_stat_account * enforce RLIMIT_DATA for size of data areas This way code looks cleaner: now code/stack/data classification depends only on vm_flags state: VM_EXEC & ~VM_WRITE -> code (VmExe + VmLib in proc) VM_GROWSUP | VM_GROWSDOWN -> stack (VmStk) VM_WRITE & ~VM_SHARED & !stack -> data (VmData) The rest (VmSize - VmData - VmStk - VmExe - VmLib) could be called "shared", but that might be strange beast like readonly-private or VM_IO area. - RLIMIT_AS limits whole address space "VmSize" - RLIMIT_STACK limits stack "VmStk" (but each vma individually) - RLIMIT_DATA now limits "VmData" Signed-off-by: Konstantin Khlebnikov <[email protected]> Signed-off-by: Cyrill Gorcunov <[email protected]> Cc: Quentin Casasnovas <[email protected]> Cc: Vegard Nossum <[email protected]> Acked-by: Linus Torvalds <[email protected]> Cc: Willy Tarreau <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Kees Cook <[email protected]> Cc: Vladimir Davydov <[email protected]> Cc: Pavel Emelyanov <[email protected]> Cc: Peter Zijlstra <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent d30b554 commit 8463833

File tree

9 files changed

+54
-58
lines changed

9 files changed

+54
-58
lines changed

arch/ia64/kernel/perfmon.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2332,8 +2332,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
23322332
*/
23332333
insert_vm_struct(mm, vma);
23342334

2335-
vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
2336-
vma_pages(vma));
2335+
vm_stat_account(vma->vm_mm, vma->vm_flags, vma_pages(vma));
23372336
up_write(&task->mm->mmap_sem);
23382337

23392338
/*

fs/proc/task_mmu.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
void task_mem(struct seq_file *m, struct mm_struct *mm)
2525
{
26-
unsigned long data, text, lib, swap, ptes, pmds, anon, file, shmem;
26+
unsigned long text, lib, swap, ptes, pmds, anon, file, shmem;
2727
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
2828

2929
anon = get_mm_counter(mm, MM_ANONPAGES);
@@ -44,7 +44,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
4444
if (hiwater_rss < mm->hiwater_rss)
4545
hiwater_rss = mm->hiwater_rss;
4646

47-
data = mm->total_vm - mm->shared_vm - mm->stack_vm;
4847
text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
4948
lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
5049
swap = get_mm_counter(mm, MM_SWAPENTS);
@@ -76,7 +75,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
7675
anon << (PAGE_SHIFT-10),
7776
file << (PAGE_SHIFT-10),
7877
shmem << (PAGE_SHIFT-10),
79-
data << (PAGE_SHIFT-10),
78+
mm->data_vm << (PAGE_SHIFT-10),
8079
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
8180
ptes >> 10,
8281
pmds >> 10,
@@ -97,7 +96,7 @@ unsigned long task_statm(struct mm_struct *mm,
9796
get_mm_counter(mm, MM_SHMEMPAGES);
9897
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
9998
>> PAGE_SHIFT;
100-
*data = mm->total_vm - mm->shared_vm;
99+
*data = mm->data_vm + mm->stack_vm;
101100
*resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
102101
return mm->total_vm;
103102
}

include/linux/mm.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1929,7 +1929,9 @@ extern void mm_drop_all_locks(struct mm_struct *mm);
19291929
extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
19301930
extern struct file *get_mm_exe_file(struct mm_struct *mm);
19311931

1932-
extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
1932+
extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
1933+
extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
1934+
19331935
extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
19341936
unsigned long addr, unsigned long len,
19351937
unsigned long flags,
@@ -2147,15 +2149,6 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
21472149
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
21482150
unsigned long size, pte_fn_t fn, void *data);
21492151

2150-
#ifdef CONFIG_PROC_FS
2151-
void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
2152-
#else
2153-
static inline void vm_stat_account(struct mm_struct *mm,
2154-
unsigned long flags, struct file *file, long pages)
2155-
{
2156-
mm->total_vm += pages;
2157-
}
2158-
#endif /* CONFIG_PROC_FS */
21592152

21602153
#ifdef CONFIG_DEBUG_PAGEALLOC
21612154
extern bool _debug_pagealloc_enabled;

include/linux/mm_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ struct mm_struct {
427427
unsigned long total_vm; /* Total pages mapped */
428428
unsigned long locked_vm; /* Pages that have PG_mlocked set */
429429
unsigned long pinned_vm; /* Refcount permanently increased */
430-
unsigned long shared_vm; /* Shared pages (files) */
430+
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED/GROWSDOWN */
431431
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
432432
unsigned long stack_vm; /* VM_GROWSUP/DOWN */
433433
unsigned long def_flags;

kernel/fork.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
414414
RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
415415

416416
mm->total_vm = oldmm->total_vm;
417-
mm->shared_vm = oldmm->shared_vm;
417+
mm->data_vm = oldmm->data_vm;
418418
mm->exec_vm = oldmm->exec_vm;
419419
mm->stack_vm = oldmm->stack_vm;
420420

@@ -433,8 +433,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
433433
struct file *file;
434434

435435
if (mpnt->vm_flags & VM_DONTCOPY) {
436-
vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
437-
-vma_pages(mpnt));
436+
vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
438437
continue;
439438
}
440439
charge = 0;

mm/debug.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ void dump_mm(const struct mm_struct *mm)
175175
"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
176176
"pgd %p mm_users %d mm_count %d nr_ptes %lu nr_pmds %lu map_count %d\n"
177177
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
178-
"pinned_vm %lx shared_vm %lx exec_vm %lx stack_vm %lx\n"
178+
"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
179179
"start_code %lx end_code %lx start_data %lx end_data %lx\n"
180180
"start_brk %lx brk %lx start_stack %lx\n"
181181
"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
@@ -209,7 +209,7 @@ void dump_mm(const struct mm_struct *mm)
209209
mm_nr_pmds((struct mm_struct *)mm),
210210
mm->map_count,
211211
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
212-
mm->pinned_vm, mm->shared_vm, mm->exec_vm, mm->stack_vm,
212+
mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
213213
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
214214
mm->start_brk, mm->brk, mm->start_stack,
215215
mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,

mm/mmap.c

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,24 +1220,6 @@ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
12201220
return NULL;
12211221
}
12221222

1223-
#ifdef CONFIG_PROC_FS
1224-
void vm_stat_account(struct mm_struct *mm, unsigned long flags,
1225-
struct file *file, long pages)
1226-
{
1227-
const unsigned long stack_flags
1228-
= VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
1229-
1230-
mm->total_vm += pages;
1231-
1232-
if (file) {
1233-
mm->shared_vm += pages;
1234-
if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
1235-
mm->exec_vm += pages;
1236-
} else if (flags & stack_flags)
1237-
mm->stack_vm += pages;
1238-
}
1239-
#endif /* CONFIG_PROC_FS */
1240-
12411223
/*
12421224
* If a hint addr is less than mmap_min_addr change hint to be as
12431225
* low as possible but still greater than mmap_min_addr
@@ -1556,7 +1538,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
15561538
unsigned long charged = 0;
15571539

15581540
/* Check against address space limit. */
1559-
if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
1541+
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
15601542
unsigned long nr_pages;
15611543

15621544
/*
@@ -1565,7 +1547,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
15651547
*/
15661548
nr_pages = count_vma_pages_range(mm, addr, addr + len);
15671549

1568-
if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
1550+
if (!may_expand_vm(mm, vm_flags,
1551+
(len >> PAGE_SHIFT) - nr_pages))
15691552
return -ENOMEM;
15701553
}
15711554

@@ -1664,7 +1647,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
16641647
out:
16651648
perf_event_mmap(vma);
16661649

1667-
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1650+
vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
16681651
if (vm_flags & VM_LOCKED) {
16691652
if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
16701653
vma == get_gate_vma(current->mm)))
@@ -2111,7 +2094,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
21112094
unsigned long new_start, actual_size;
21122095

21132096
/* address space limit tests */
2114-
if (!may_expand_vm(mm, grow))
2097+
if (!may_expand_vm(mm, vma->vm_flags, grow))
21152098
return -ENOMEM;
21162099

21172100
/* Stack limit test */
@@ -2208,8 +2191,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
22082191
spin_lock(&mm->page_table_lock);
22092192
if (vma->vm_flags & VM_LOCKED)
22102193
mm->locked_vm += grow;
2211-
vm_stat_account(mm, vma->vm_flags,
2212-
vma->vm_file, grow);
2194+
vm_stat_account(mm, vma->vm_flags, grow);
22132195
anon_vma_interval_tree_pre_update_vma(vma);
22142196
vma->vm_end = address;
22152197
anon_vma_interval_tree_post_update_vma(vma);
@@ -2284,8 +2266,7 @@ int expand_downwards(struct vm_area_struct *vma,
22842266
spin_lock(&mm->page_table_lock);
22852267
if (vma->vm_flags & VM_LOCKED)
22862268
mm->locked_vm += grow;
2287-
vm_stat_account(mm, vma->vm_flags,
2288-
vma->vm_file, grow);
2269+
vm_stat_account(mm, vma->vm_flags, grow);
22892270
anon_vma_interval_tree_pre_update_vma(vma);
22902271
vma->vm_start = address;
22912272
vma->vm_pgoff -= grow;
@@ -2399,7 +2380,7 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
23992380

24002381
if (vma->vm_flags & VM_ACCOUNT)
24012382
nr_accounted += nrpages;
2402-
vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
2383+
vm_stat_account(mm, vma->vm_flags, -nrpages);
24032384
vma = remove_vma(vma);
24042385
} while (vma);
24052386
vm_unacct_memory(nr_accounted);
@@ -2769,7 +2750,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
27692750
}
27702751

27712752
/* Check against address space limits *after* clearing old maps... */
2772-
if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2753+
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
27732754
return -ENOMEM;
27742755

27752756
if (mm->map_count > sysctl_max_map_count)
@@ -2804,6 +2785,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
28042785
out:
28052786
perf_event_mmap(vma);
28062787
mm->total_vm += len >> PAGE_SHIFT;
2788+
mm->data_vm += len >> PAGE_SHIFT;
28072789
if (flags & VM_LOCKED)
28082790
mm->locked_vm += (len >> PAGE_SHIFT);
28092791
vma->vm_flags |= VM_SOFTDIRTY;
@@ -2995,9 +2977,28 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
29952977
* Return true if the calling process may expand its vm space by the passed
29962978
* number of pages
29972979
*/
2998-
int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2980+
bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
29992981
{
3000-
return mm->total_vm + npages <= rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2982+
if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
2983+
return false;
2984+
2985+
if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS &
2986+
(VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE)
2987+
return mm->data_vm + npages <= rlimit(RLIMIT_DATA);
2988+
2989+
return true;
2990+
}
2991+
2992+
void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
2993+
{
2994+
mm->total_vm += npages;
2995+
2996+
if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC)
2997+
mm->exec_vm += npages;
2998+
else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)))
2999+
mm->stack_vm += npages;
3000+
else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
3001+
mm->data_vm += npages;
30013002
}
30023003

30033004
static int special_mapping_fault(struct vm_area_struct *vma,
@@ -3079,7 +3080,7 @@ static struct vm_area_struct *__install_special_mapping(
30793080
if (ret)
30803081
goto out;
30813082

3082-
mm->total_vm += len >> PAGE_SHIFT;
3083+
vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
30833084

30843085
perf_event_mmap(vma);
30853086

mm/mprotect.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
278278
* even if read-only so there is no need to account for them here
279279
*/
280280
if (newflags & VM_WRITE) {
281+
/* Check space limits when area turns into data. */
282+
if (!may_expand_vm(mm, newflags, nrpages) &&
283+
may_expand_vm(mm, oldflags, nrpages))
284+
return -ENOMEM;
281285
if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
282286
VM_SHARED|VM_NORESERVE))) {
283287
charged = nrpages;
@@ -334,8 +338,8 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
334338
populate_vma_page_range(vma, start, end, NULL);
335339
}
336340

337-
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
338-
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
341+
vm_stat_account(mm, oldflags, -nrpages);
342+
vm_stat_account(mm, newflags, nrpages);
339343
perf_event_mmap(vma);
340344
return 0;
341345

mm/mremap.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
317317
* If this were a serious issue, we'd add a flag to do_munmap().
318318
*/
319319
hiwater_vm = mm->hiwater_vm;
320-
vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
320+
vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
321321

322322
/* Tell pfnmap has moved from this vma */
323323
if (unlikely(vma->vm_flags & VM_PFNMAP))
@@ -383,7 +383,8 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
383383
return ERR_PTR(-EAGAIN);
384384
}
385385

386-
if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT))
386+
if (!may_expand_vm(mm, vma->vm_flags,
387+
(new_len - old_len) >> PAGE_SHIFT))
387388
return ERR_PTR(-ENOMEM);
388389

389390
if (vma->vm_flags & VM_ACCOUNT) {
@@ -545,7 +546,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
545546
goto out;
546547
}
547548

548-
vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
549+
vm_stat_account(mm, vma->vm_flags, pages);
549550
if (vma->vm_flags & VM_LOCKED) {
550551
mm->locked_vm += pages;
551552
locked = true;

0 commit comments

Comments
 (0)