Skip to content

Commit e346b38

Browse files
bgafftorvalds
authored andcommitted
mm/mremap: add MREMAP_DONTUNMAP to mremap()
When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is set, the source mapping will not be removed. The remap operation will be performed as it would have been normally by moving over the page tables to the new mapping. The old vma will have any locked flags cleared, have no pagetables, and any userfaultfds that were watching that range will continue watching it. For a mapping that is shared or not anonymous, MREMAP_DONTUNMAP will cause the mremap() call to fail. Because MREMAP_DONTUNMAP always results in moving a VMA you MUST use the MREMAP_MAYMOVE flag, it's not possible to resize a VMA while also moving with MREMAP_DONTUNMAP so old_len must always be equal to the new_len otherwise it will return -EINVAL. We hope to use this in Chrome OS where with userfaultfd we could write an anonymous mapping to disk without having to STOP the process or worry about VMA permission changes. This feature also has a use case in Android, Lokesh Gidra has said that "As part of using userfaultfd for GC, We'll have to move the physical pages of the java heap to a separate location. For this purpose mremap will be used. Without the MREMAP_DONTUNMAP flag, when I mremap the java heap, its virtual mapping will be removed as well. Therefore, we'll require performing mmap immediately after. This is not only time consuming but also opens a time window where a native thread may call mmap and reserve the java heap's address range for its own usage. This flag solves the problem." [[email protected]: v6] Link: http://lkml.kernel.org/r/[email protected] [[email protected]: v7] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Brian Geffon <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Tested-by: Lokesh Gidra <[email protected]> Reviewed-by: Minchan Kim <[email protected]> Acked-by: Kirill A. Shutemov <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Cc: "Michael S . Tsirkin" <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Will Deacon <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Sonny Rao <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Joel Fernandes <[email protected]> Cc: Yu Zhao <[email protected]> Cc: Jesse Barnes <[email protected]> Cc: Nathan Chancellor <[email protected]> Cc: Florian Weimer <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent df529ca commit e346b38

File tree

2 files changed

+72
-23
lines changed

2 files changed

+72
-23
lines changed

include/uapi/linux/mman.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
#include <asm/mman.h>
66
#include <asm-generic/hugetlb_encode.h>
77

8-
#define MREMAP_MAYMOVE 1
9-
#define MREMAP_FIXED 2
8+
#define MREMAP_MAYMOVE 1
9+
#define MREMAP_FIXED 2
10+
#define MREMAP_DONTUNMAP 4
1011

1112
#define OVERCOMMIT_GUESS 0
1213
#define OVERCOMMIT_ALWAYS 1

mm/mremap.c

Lines changed: 69 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
318318
static unsigned long move_vma(struct vm_area_struct *vma,
319319
unsigned long old_addr, unsigned long old_len,
320320
unsigned long new_len, unsigned long new_addr,
321-
bool *locked, struct vm_userfaultfd_ctx *uf,
322-
struct list_head *uf_unmap)
321+
bool *locked, unsigned long flags,
322+
struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
323323
{
324324
struct mm_struct *mm = vma->vm_mm;
325325
struct vm_area_struct *new_vma;
@@ -408,11 +408,32 @@ static unsigned long move_vma(struct vm_area_struct *vma,
408408
if (unlikely(vma->vm_flags & VM_PFNMAP))
409409
untrack_pfn_moved(vma);
410410

411+
if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
412+
if (vm_flags & VM_ACCOUNT) {
413+
/* Always put back VM_ACCOUNT since we won't unmap */
414+
vma->vm_flags |= VM_ACCOUNT;
415+
416+
vm_acct_memory(vma_pages(new_vma));
417+
}
418+
419+
/* We always clear VM_LOCKED[ONFAULT] on the old vma */
420+
vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
421+
422+
/* Because we won't unmap we don't need to touch locked_vm */
423+
goto out;
424+
}
425+
411426
if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
412427
/* OOM: unable to split vma, just get accounts right */
413428
vm_unacct_memory(excess >> PAGE_SHIFT);
414429
excess = 0;
415430
}
431+
432+
if (vm_flags & VM_LOCKED) {
433+
mm->locked_vm += new_len >> PAGE_SHIFT;
434+
*locked = true;
435+
}
436+
out:
416437
mm->hiwater_vm = hiwater_vm;
417438

418439
/* Restore VM_ACCOUNT if one or two pieces of vma left */
@@ -422,16 +443,12 @@ static unsigned long move_vma(struct vm_area_struct *vma,
422443
vma->vm_next->vm_flags |= VM_ACCOUNT;
423444
}
424445

425-
if (vm_flags & VM_LOCKED) {
426-
mm->locked_vm += new_len >> PAGE_SHIFT;
427-
*locked = true;
428-
}
429-
430446
return new_addr;
431447
}
432448

433449
static struct vm_area_struct *vma_to_resize(unsigned long addr,
434-
unsigned long old_len, unsigned long new_len, unsigned long *p)
450+
unsigned long old_len, unsigned long new_len, unsigned long flags,
451+
unsigned long *p)
435452
{
436453
struct mm_struct *mm = current->mm;
437454
struct vm_area_struct *vma = find_vma(mm, addr);
@@ -453,6 +470,10 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
453470
return ERR_PTR(-EINVAL);
454471
}
455472

473+
if (flags & MREMAP_DONTUNMAP && (!vma_is_anonymous(vma) ||
474+
vma->vm_flags & VM_SHARED))
475+
return ERR_PTR(-EINVAL);
476+
456477
if (is_vm_hugetlb_page(vma))
457478
return ERR_PTR(-EINVAL);
458479

@@ -497,15 +518,15 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
497518

498519
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
499520
unsigned long new_addr, unsigned long new_len, bool *locked,
500-
struct vm_userfaultfd_ctx *uf,
521+
unsigned long flags, struct vm_userfaultfd_ctx *uf,
501522
struct list_head *uf_unmap_early,
502523
struct list_head *uf_unmap)
503524
{
504525
struct mm_struct *mm = current->mm;
505526
struct vm_area_struct *vma;
506527
unsigned long ret = -EINVAL;
507528
unsigned long charged = 0;
508-
unsigned long map_flags;
529+
unsigned long map_flags = 0;
509530

510531
if (offset_in_page(new_addr))
511532
goto out;
@@ -534,9 +555,11 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
534555
if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
535556
return -ENOMEM;
536557

537-
ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
538-
if (ret)
539-
goto out;
558+
if (flags & MREMAP_FIXED) {
559+
ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
560+
if (ret)
561+
goto out;
562+
}
540563

541564
if (old_len >= new_len) {
542565
ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
@@ -545,13 +568,22 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
545568
old_len = new_len;
546569
}
547570

548-
vma = vma_to_resize(addr, old_len, new_len, &charged);
571+
vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
549572
if (IS_ERR(vma)) {
550573
ret = PTR_ERR(vma);
551574
goto out;
552575
}
553576

554-
map_flags = MAP_FIXED;
577+
/* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */
578+
if (flags & MREMAP_DONTUNMAP &&
579+
!may_expand_vm(mm, vma->vm_flags, old_len >> PAGE_SHIFT)) {
580+
ret = -ENOMEM;
581+
goto out;
582+
}
583+
584+
if (flags & MREMAP_FIXED)
585+
map_flags |= MAP_FIXED;
586+
555587
if (vma->vm_flags & VM_MAYSHARE)
556588
map_flags |= MAP_SHARED;
557589

@@ -561,10 +593,16 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
561593
if (IS_ERR_VALUE(ret))
562594
goto out1;
563595

564-
ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
596+
/* We got a new mapping */
597+
if (!(flags & MREMAP_FIXED))
598+
new_addr = ret;
599+
600+
ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
565601
uf_unmap);
602+
566603
if (!(offset_in_page(ret)))
567604
goto out;
605+
568606
out1:
569607
vm_unacct_memory(charged);
570608

@@ -618,12 +656,21 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
618656
*/
619657
addr = untagged_addr(addr);
620658

621-
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
659+
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
622660
return ret;
623661

624662
if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
625663
return ret;
626664

665+
/*
666+
* MREMAP_DONTUNMAP is always a move and it does not allow resizing
667+
* in the process.
668+
*/
669+
if (flags & MREMAP_DONTUNMAP &&
670+
(!(flags & MREMAP_MAYMOVE) || old_len != new_len))
671+
return ret;
672+
673+
627674
if (offset_in_page(addr))
628675
return ret;
629676

@@ -641,9 +688,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
641688
if (down_write_killable(&current->mm->mmap_sem))
642689
return -EINTR;
643690

644-
if (flags & MREMAP_FIXED) {
691+
if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) {
645692
ret = mremap_to(addr, old_len, new_addr, new_len,
646-
&locked, &uf, &uf_unmap_early, &uf_unmap);
693+
&locked, flags, &uf, &uf_unmap_early,
694+
&uf_unmap);
647695
goto out;
648696
}
649697

@@ -671,7 +719,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
671719
/*
672720
* Ok, we need to grow..
673721
*/
674-
vma = vma_to_resize(addr, old_len, new_len, &charged);
722+
vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
675723
if (IS_ERR(vma)) {
676724
ret = PTR_ERR(vma);
677725
goto out;
@@ -721,7 +769,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
721769
}
722770

723771
ret = move_vma(vma, addr, old_len, new_len, new_addr,
724-
&locked, &uf, &uf_unmap);
772+
&locked, flags, &uf, &uf_unmap);
725773
}
726774
out:
727775
if (offset_in_page(ret)) {

0 commit comments

Comments
 (0)