Skip to content

Commit c7f8f31

Browse files
surenbaghdasaryanakpm00
authored andcommitted
mm: separate vma->lock from vm_area_struct
vma->lock being part of the vm_area_struct causes performance regression during page faults because during contention its count and owner fields are constantly updated and having other parts of vm_area_struct used during page fault handling next to them causes constant cache line bouncing. Fix that by moving the lock outside of the vm_area_struct. All attempts to keep vma->lock inside vm_area_struct in a separate cache line still produce performance regression especially on NUMA machines. Smallest regression was achieved when lock is placed in the fourth cache line but that bloats vm_area_struct to 256 bytes. Considering performance and memory impact, separate lock looks like the best option. It increases memory footprint of each VMA but that can be optimized later if the new size causes issues. Note that after this change vma_init() does not allocate or initialize vma->lock anymore. A number of drivers allocate a pseudo VMA on the stack but they never use the VMA's lock, therefore it does not need to be allocated. The future drivers which might need the VMA lock should use vm_area_alloc()/vm_area_free() to allocate the VMA. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Suren Baghdasaryan <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 0d2ebf9 commit c7f8f31

File tree

3 files changed

+74
-28
lines changed

3 files changed

+74
-28
lines changed

include/linux/mm.h

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -628,12 +628,6 @@ struct vm_operations_struct {
628628
};
629629

630630
#ifdef CONFIG_PER_VMA_LOCK
631-
static inline void vma_init_lock(struct vm_area_struct *vma)
632-
{
633-
init_rwsem(&vma->lock);
634-
vma->vm_lock_seq = -1;
635-
}
636-
637631
/*
638632
* Try to read-lock a vma. The function is allowed to occasionally yield false
639633
* locked result to avoid performance overhead, in which case we fall back to
@@ -645,17 +639,17 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
645639
if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))
646640
return false;
647641

648-
if (unlikely(down_read_trylock(&vma->lock) == 0))
642+
if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
649643
return false;
650644

651645
/*
652646
* Overflow might produce false locked result.
653647
* False unlocked result is impossible because we modify and check
654-
* vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq
648+
* vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq
655649
* modification invalidates all existing locks.
656650
*/
657651
if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) {
658-
up_read(&vma->lock);
652+
up_read(&vma->vm_lock->lock);
659653
return false;
660654
}
661655
return true;
@@ -664,7 +658,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
664658
static inline void vma_end_read(struct vm_area_struct *vma)
665659
{
666660
rcu_read_lock(); /* keeps vma alive till the end of up_read */
667-
up_read(&vma->lock);
661+
up_read(&vma->vm_lock->lock);
668662
rcu_read_unlock();
669663
}
670664

@@ -687,9 +681,9 @@ static inline void vma_start_write(struct vm_area_struct *vma)
687681
if (__is_vma_write_locked(vma, &mm_lock_seq))
688682
return;
689683

690-
down_write(&vma->lock);
684+
down_write(&vma->vm_lock->lock);
691685
vma->vm_lock_seq = mm_lock_seq;
692-
up_write(&vma->lock);
686+
up_write(&vma->vm_lock->lock);
693687
}
694688

695689
static inline bool vma_try_start_write(struct vm_area_struct *vma)
@@ -740,6 +734,10 @@ static inline void vma_mark_detached(struct vm_area_struct *vma,
740734

741735
#endif /* CONFIG_PER_VMA_LOCK */
742736

737+
/*
738+
* WARNING: vma_init does not initialize vma->vm_lock.
739+
* Use vm_area_alloc()/vm_area_free() if vma needs locking.
740+
*/
743741
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
744742
{
745743
static const struct vm_operations_struct dummy_vm_ops = {};
@@ -749,7 +747,6 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
749747
vma->vm_ops = &dummy_vm_ops;
750748
INIT_LIST_HEAD(&vma->anon_vma_chain);
751749
vma_mark_detached(vma, false);
752-
vma_init_lock(vma);
753750
}
754751

755752
/* Use when VMA is not part of the VMA tree and needs no locking */

include/linux/mm_types.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,10 @@ struct anon_vma_name {
471471
char name[];
472472
};
473473

474+
struct vma_lock {
475+
struct rw_semaphore lock;
476+
};
477+
474478
/*
475479
* This struct describes a virtual memory area. There is one of these
476480
* per VM-area/task. A VM area is any part of the process virtual memory
@@ -505,7 +509,7 @@ struct vm_area_struct {
505509

506510
#ifdef CONFIG_PER_VMA_LOCK
507511
int vm_lock_seq;
508-
struct rw_semaphore lock;
512+
struct vma_lock *vm_lock;
509513

510514
/* Flag to indicate areas detached from the mm->mm_mt tree */
511515
bool detached;

kernel/fork.c

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -451,38 +451,80 @@ static struct kmem_cache *vm_area_cachep;
451451
/* SLAB cache for mm_struct structures (tsk->mm) */
452452
static struct kmem_cache *mm_cachep;
453453

454+
#ifdef CONFIG_PER_VMA_LOCK
455+
456+
/* SLAB cache for vm_area_struct.lock */
457+
static struct kmem_cache *vma_lock_cachep;
458+
459+
static bool vma_lock_alloc(struct vm_area_struct *vma)
460+
{
461+
vma->vm_lock = kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL);
462+
if (!vma->vm_lock)
463+
return false;
464+
465+
init_rwsem(&vma->vm_lock->lock);
466+
vma->vm_lock_seq = -1;
467+
468+
return true;
469+
}
470+
471+
static inline void vma_lock_free(struct vm_area_struct *vma)
472+
{
473+
kmem_cache_free(vma_lock_cachep, vma->vm_lock);
474+
}
475+
476+
#else /* CONFIG_PER_VMA_LOCK */
477+
478+
static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return true; }
479+
static inline void vma_lock_free(struct vm_area_struct *vma) {}
480+
481+
#endif /* CONFIG_PER_VMA_LOCK */
482+
454483
struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
455484
{
456485
struct vm_area_struct *vma;
457486

458487
vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
459-
if (vma)
460-
vma_init(vma, mm);
488+
if (!vma)
489+
return NULL;
490+
491+
vma_init(vma, mm);
492+
if (!vma_lock_alloc(vma)) {
493+
kmem_cache_free(vm_area_cachep, vma);
494+
return NULL;
495+
}
496+
461497
return vma;
462498
}
463499

464500
struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
465501
{
466502
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
467503

468-
if (new) {
469-
ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
470-
ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
471-
/*
472-
* orig->shared.rb may be modified concurrently, but the clone
473-
* will be reinitialized.
474-
*/
475-
data_race(memcpy(new, orig, sizeof(*new)));
476-
INIT_LIST_HEAD(&new->anon_vma_chain);
477-
vma_init_lock(new);
478-
dup_anon_vma_name(orig, new);
504+
if (!new)
505+
return NULL;
506+
507+
ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
508+
ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
509+
/*
510+
* orig->shared.rb may be modified concurrently, but the clone
511+
* will be reinitialized.
512+
*/
513+
data_race(memcpy(new, orig, sizeof(*new)));
514+
if (!vma_lock_alloc(new)) {
515+
kmem_cache_free(vm_area_cachep, new);
516+
return NULL;
479517
}
518+
INIT_LIST_HEAD(&new->anon_vma_chain);
519+
dup_anon_vma_name(orig, new);
520+
480521
return new;
481522
}
482523

483524
void __vm_area_free(struct vm_area_struct *vma)
484525
{
485526
free_anon_vma_name(vma);
527+
vma_lock_free(vma);
486528
kmem_cache_free(vm_area_cachep, vma);
487529
}
488530

@@ -493,7 +535,7 @@ static void vm_area_free_rcu_cb(struct rcu_head *head)
493535
vm_rcu);
494536

495537
/* The vma should not be locked while being destroyed. */
496-
VM_BUG_ON_VMA(rwsem_is_locked(&vma->lock), vma);
538+
VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma);
497539
__vm_area_free(vma);
498540
}
499541
#endif
@@ -3152,6 +3194,9 @@ void __init proc_caches_init(void)
31523194
NULL);
31533195

31543196
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
3197+
#ifdef CONFIG_PER_VMA_LOCK
3198+
vma_lock_cachep = KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT);
3199+
#endif
31553200
mmap_init();
31563201
nsproxy_cache_init();
31573202
}

0 commit comments

Comments
 (0)