Skip to content

Commit 108d664

Browse files
walken-googletorvalds
authored andcommitted
mm anon rmap: remove anon_vma_moveto_tail
mremap() had a clever optimization where move_ptes() did not take the anon_vma lock to avoid a race with anon rmap users such as page migration. Instead, the avc's were ordered in such a way that the origin vma was always visited by rmap before the destination. This ordering and the use of page table locks rmap usage safe. However, we want to replace the use of linked lists in anon rmap with an interval tree, and this will make it harder to impose such ordering as the interval tree will always be sorted by the avc->vma->vm_pgoff value. For now, let's replace the anon_vma_moveto_tail() ordering function with proper anon_vma locking in move_ptes(). Once we have the anon interval tree in place, we will re-introduce an optimization to avoid taking these locks in the most common cases. Signed-off-by: Michel Lespinasse <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Daniel Santos <[email protected]> Cc: Hugh Dickins <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 9826a51 commit 108d664

File tree

4 files changed

+6
-57
lines changed

4 files changed

+6
-57
lines changed

include/linux/rmap.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ void anon_vma_init(void); /* create anon_vma_cachep */
120120
int anon_vma_prepare(struct vm_area_struct *);
121121
void unlink_anon_vmas(struct vm_area_struct *);
122122
int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
123-
void anon_vma_moveto_tail(struct vm_area_struct *);
124123
int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
125124

126125
static inline void anon_vma_merge(struct vm_area_struct *vma,

mm/mmap.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2378,8 +2378,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
23782378
*/
23792379
VM_BUG_ON(faulted_in_anon_vma);
23802380
*vmap = new_vma;
2381-
} else
2382-
anon_vma_moveto_tail(new_vma);
2381+
}
23832382
} else {
23842383
new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
23852384
if (new_vma) {

mm/mremap.c

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
7474
unsigned long new_addr)
7575
{
7676
struct address_space *mapping = NULL;
77+
struct anon_vma *anon_vma = vma->anon_vma;
7778
struct mm_struct *mm = vma->vm_mm;
7879
pte_t *old_pte, *new_pte, pte;
7980
spinlock_t *old_ptl, *new_ptl;
@@ -88,6 +89,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
8889
mapping = vma->vm_file->f_mapping;
8990
mutex_lock(&mapping->i_mmap_mutex);
9091
}
92+
if (anon_vma)
93+
anon_vma_lock(anon_vma);
9194

9295
/*
9396
* We don't have to worry about the ordering of src and dst
@@ -114,6 +117,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
114117
spin_unlock(new_ptl);
115118
pte_unmap(new_pte - 1);
116119
pte_unmap_unlock(old_pte - 1, old_ptl);
120+
if (anon_vma)
121+
anon_vma_unlock(anon_vma);
117122
if (mapping)
118123
mutex_unlock(&mapping->i_mmap_mutex);
119124
}
@@ -220,15 +225,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
220225

221226
moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
222227
if (moved_len < old_len) {
223-
/*
224-
* Before moving the page tables from the new vma to
225-
* the old vma, we need to be sure the old vma is
226-
* queued after new vma in the same_anon_vma list to
227-
* prevent SMP races with rmap_walk (that could lead
228-
* rmap_walk to miss some page table).
229-
*/
230-
anon_vma_moveto_tail(vma);
231-
232228
/*
233229
* On error, move entries back from new area to old,
234230
* which will succeed since page tables still there,

mm/rmap.c

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -268,51 +268,6 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
268268
return -ENOMEM;
269269
}
270270

271-
/*
272-
* Some rmap walk that needs to find all ptes/hugepmds without false
273-
* negatives (like migrate and split_huge_page) running concurrent
274-
* with operations that copy or move pagetables (like mremap() and
275-
* fork()) to be safe. They depend on the anon_vma "same_anon_vma"
276-
* list to be in a certain order: the dst_vma must be placed after the
277-
* src_vma in the list. This is always guaranteed by fork() but
278-
* mremap() needs to call this function to enforce it in case the
279-
* dst_vma isn't newly allocated and chained with the anon_vma_clone()
280-
* function but just an extension of a pre-existing vma through
281-
* vma_merge.
282-
*
283-
* NOTE: the same_anon_vma list can still be changed by other
284-
* processes while mremap runs because mremap doesn't hold the
285-
* anon_vma mutex to prevent modifications to the list while it
286-
* runs. All we need to enforce is that the relative order of this
287-
* process vmas isn't changing (we don't care about other vmas
288-
* order). Each vma corresponds to an anon_vma_chain structure so
289-
* there's no risk that other processes calling anon_vma_moveto_tail()
290-
* and changing the same_anon_vma list under mremap() will screw with
291-
* the relative order of this process vmas in the list, because we
292-
* they can't alter the order of any vma that belongs to this
293-
* process. And there can't be another anon_vma_moveto_tail() running
294-
* concurrently with mremap() coming from this process because we hold
295-
* the mmap_sem for the whole mremap(). fork() ordering dependency
296-
* also shouldn't be affected because fork() only cares that the
297-
* parent vmas are placed in the list before the child vmas and
298-
* anon_vma_moveto_tail() won't reorder vmas from either the fork()
299-
* parent or child.
300-
*/
301-
void anon_vma_moveto_tail(struct vm_area_struct *dst)
302-
{
303-
struct anon_vma_chain *pavc;
304-
struct anon_vma *root = NULL;
305-
306-
list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {
307-
struct anon_vma *anon_vma = pavc->anon_vma;
308-
VM_BUG_ON(pavc->vma != dst);
309-
root = lock_anon_vma_root(root, anon_vma);
310-
list_del(&pavc->same_anon_vma);
311-
list_add_tail(&pavc->same_anon_vma, &anon_vma->head);
312-
}
313-
unlock_anon_vma_root(root);
314-
}
315-
316271
/*
317272
* Attach vma to its own anon_vma, as well as to the anon_vmas that
318273
* the corresponding VMA in the parent process is attached to.

0 commit comments

Comments
 (0)