Skip to content

Commit def5efe

Browse files
rientjestorvalds
authored andcommitted
mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
If madvise(2) advice will result in the underlying vma being split and the number of areas mapped by the process will exceed /proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN. EAGAIN is returned by madvise(2) when a kernel resource, such as slab, is temporarily unavailable. It indicates that userspace should retry the advice in the near future. This is important for advice such as MADV_DONTNEED which is often used by malloc implementations to free memory back to the system: we really do want to free memory back when madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas, or mempolicies) cannot be allocated. Encountering /proc/sys/vm/max_map_count is not a temporary failure, however, so return ENOMEM to indicate this is a more serious issue. A followup patch to the man page will specify this behavior. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: David Rientjes <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Jerome Marchand <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Cc: Michael Kerrisk <[email protected]> Cc: Anshuman Khandual <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 712c604 commit def5efe

File tree

5 files changed

+56
-17
lines changed

5 files changed

+56
-17
lines changed

Documentation/sysctl/vm.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,8 +376,8 @@ max_map_count:
376376

377377
This file contains the maximum number of memory map areas a process
378378
may have. Memory map areas are used as a side-effect of calling
379-
malloc, directly by mmap and mprotect, and also when loading shared
380-
libraries.
379+
malloc, directly by mmap, mprotect, and madvise, and also when loading
380+
shared libraries.
381381

382382
While most applications need less than a thousand maps, certain
383383
programs, particularly malloc debuggers, may consume lots of them,

Documentation/vm/ksm.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ the range for whenever the KSM daemon is started; even if the range
3838
cannot contain any pages which KSM could actually merge; even if
3939
MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
4040

41+
If a region of memory must be split into at least one new MADV_MERGEABLE
42+
or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
43+
will exceed vm.max_map_count (see Documentation/sysctl/vm.txt).
44+
4145
Like other madvise calls, they are intended for use on mapped areas of
4246
the user address space: they will report ENOMEM if the specified range
4347
includes unmapped gaps (though working on the intervening mapped areas),

include/linux/mm.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2041,8 +2041,10 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *,
20412041
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
20422042
struct mempolicy *, struct vm_userfaultfd_ctx);
20432043
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
2044-
extern int split_vma(struct mm_struct *,
2045-
struct vm_area_struct *, unsigned long addr, int new_below);
2044+
extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
2045+
unsigned long addr, int new_below);
2046+
extern int split_vma(struct mm_struct *, struct vm_area_struct *,
2047+
unsigned long addr, int new_below);
20462048
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
20472049
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
20482050
struct rb_node **, struct rb_node *);

mm/madvise.c

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,28 @@ static long madvise_behavior(struct vm_area_struct *vma,
9292
case MADV_MERGEABLE:
9393
case MADV_UNMERGEABLE:
9494
error = ksm_madvise(vma, start, end, behavior, &new_flags);
95-
if (error)
95+
if (error) {
96+
/*
97+
* madvise() returns EAGAIN if kernel resources, such as
98+
* slab, are temporarily unavailable.
99+
*/
100+
if (error == -ENOMEM)
101+
error = -EAGAIN;
96102
goto out;
103+
}
97104
break;
98105
case MADV_HUGEPAGE:
99106
case MADV_NOHUGEPAGE:
100107
error = hugepage_madvise(vma, &new_flags, behavior);
101-
if (error)
108+
if (error) {
109+
/*
110+
* madvise() returns EAGAIN if kernel resources, such as
111+
* slab, are temporarily unavailable.
112+
*/
113+
if (error == -ENOMEM)
114+
error = -EAGAIN;
102115
goto out;
116+
}
103117
break;
104118
}
105119

@@ -120,26 +134,45 @@ static long madvise_behavior(struct vm_area_struct *vma,
120134
*prev = vma;
121135

122136
if (start != vma->vm_start) {
123-
error = split_vma(mm, vma, start, 1);
124-
if (error)
137+
if (unlikely(mm->map_count >= sysctl_max_map_count)) {
138+
error = -ENOMEM;
125139
goto out;
140+
}
141+
error = __split_vma(mm, vma, start, 1);
142+
if (error) {
143+
/*
144+
* madvise() returns EAGAIN if kernel resources, such as
145+
* slab, are temporarily unavailable.
146+
*/
147+
if (error == -ENOMEM)
148+
error = -EAGAIN;
149+
goto out;
150+
}
126151
}
127152

128153
if (end != vma->vm_end) {
129-
error = split_vma(mm, vma, end, 0);
130-
if (error)
154+
if (unlikely(mm->map_count >= sysctl_max_map_count)) {
155+
error = -ENOMEM;
156+
goto out;
157+
}
158+
error = __split_vma(mm, vma, end, 0);
159+
if (error) {
160+
/*
161+
* madvise() returns EAGAIN if kernel resources, such as
162+
* slab, are temporarily unavailable.
163+
*/
164+
if (error == -ENOMEM)
165+
error = -EAGAIN;
131166
goto out;
167+
}
132168
}
133169

134170
success:
135171
/*
136172
* vm_flags is protected by the mmap_sem held in write mode.
137173
*/
138174
vma->vm_flags = new_flags;
139-
140175
out:
141-
if (error == -ENOMEM)
142-
error = -EAGAIN;
143176
return error;
144177
}
145178

mm/mmap.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,11 +2499,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
24992499
}
25002500

25012501
/*
2502-
* __split_vma() bypasses sysctl_max_map_count checking. We use this on the
2503-
* munmap path where it doesn't make sense to fail.
2502+
* __split_vma() bypasses sysctl_max_map_count checking. We use this where it
2503+
* has already been checked or doesn't make sense to fail.
25042504
*/
2505-
static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2506-
unsigned long addr, int new_below)
2505+
int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2506+
unsigned long addr, int new_below)
25072507
{
25082508
struct vm_area_struct *new;
25092509
int err;

0 commit comments

Comments
 (0)