Skip to content

Commit 285b8dc

Browse files
yhuang-inteltorvalds
authored andcommitted
mm, hugetlbfs: pass fault address to no page handler
This is to take better advantage of general huge page clearing optimization (commit c79b57e: "mm: hugetlb: clear target sub-page last when clearing huge page") for hugetlbfs. In the general optimization patch, the sub-page to access will be cleared last to avoid the cache lines of to access sub-page to be evicted when clearing other sub-pages. This works better if we have the address of the sub-page to access, that is, the fault address inside the huge page. So the hugetlbfs no page fault handler is changed to pass that information. This will benefit workloads which don't access the begin of the hugetlbfs huge page after the page fault under heavy cache contention for shared last level cache. The patch is a generic optimization which should benefit quite some workloads, not for a specific use case. To demonstrate the performance benefit of the patch, we tested it with vm-scalability run on hugetlbfs. With this patch, the throughput increases ~28.1% in vm-scalability anon-w-seq test case with 88 processes on a 2 socket Xeon E5 2699 v4 system (44 cores, 88 threads). The test case creates 88 processes, each process mmaps a big anonymous memory area with MAP_HUGETLB and writes to it from the end to the begin. For each process, other processes could be seen as other workload which generates heavy cache pressure. At the same time, the cache miss rate reduced from ~36.3% to ~25.6%, the IPC (instruction per cycle) increased from 0.3 to 0.37, and the time spent in user space is reduced ~19.3%. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: "Huang, Ying" <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Cc: Michal Hocko <[email protected]> Cc: David Rientjes <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Jan Kara <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Shaohua Li <[email protected]> Cc: Christopher Lameter <[email protected]> Cc: "Aneesh Kumar K.V" <[email protected]> Cc: Punit Agrawal <[email protected]> Cc: Anshuman Khandual <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent b3ec9f3 commit 285b8dc

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

mm/hugetlb.c

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3686,6 +3686,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
36863686
struct page *page;
36873687
pte_t new_pte;
36883688
spinlock_t *ptl;
3689+
unsigned long haddr = address & huge_page_mask(h);
36893690

36903691
/*
36913692
* Currently, we are forced to kill the process in the event the
@@ -3716,7 +3717,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
37163717
u32 hash;
37173718
struct vm_fault vmf = {
37183719
.vma = vma,
3719-
.address = address,
3720+
.address = haddr,
37203721
.flags = flags,
37213722
/*
37223723
* Hard to debug if it ends up being
@@ -3733,14 +3734,14 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
37333734
* fault to make calling code simpler.
37343735
*/
37353736
hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping,
3736-
idx, address);
3737+
idx, haddr);
37373738
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
37383739
ret = handle_userfault(&vmf, VM_UFFD_MISSING);
37393740
mutex_lock(&hugetlb_fault_mutex_table[hash]);
37403741
goto out;
37413742
}
37423743

3743-
page = alloc_huge_page(vma, address, 0);
3744+
page = alloc_huge_page(vma, haddr, 0);
37443745
if (IS_ERR(page)) {
37453746
ret = PTR_ERR(page);
37463747
if (ret == -ENOMEM)
@@ -3789,12 +3790,12 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
37893790
* the spinlock.
37903791
*/
37913792
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
3792-
if (vma_needs_reservation(h, vma, address) < 0) {
3793+
if (vma_needs_reservation(h, vma, haddr) < 0) {
37933794
ret = VM_FAULT_OOM;
37943795
goto backout_unlocked;
37953796
}
37963797
/* Just decrements count, does not deallocate */
3797-
vma_end_reservation(h, vma, address);
3798+
vma_end_reservation(h, vma, haddr);
37983799
}
37993800

38003801
ptl = huge_pte_lock(h, mm, ptep);
@@ -3808,17 +3809,17 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
38083809

38093810
if (anon_rmap) {
38103811
ClearPagePrivate(page);
3811-
hugepage_add_new_anon_rmap(page, vma, address);
3812+
hugepage_add_new_anon_rmap(page, vma, haddr);
38123813
} else
38133814
page_dup_rmap(page, true);
38143815
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
38153816
&& (vma->vm_flags & VM_SHARED)));
3816-
set_huge_pte_at(mm, address, ptep, new_pte);
3817+
set_huge_pte_at(mm, haddr, ptep, new_pte);
38173818

38183819
hugetlb_count_add(pages_per_huge_page(h), mm);
38193820
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
38203821
/* Optimization, do the COW without a second fault */
3821-
ret = hugetlb_cow(mm, vma, address, ptep, page, ptl);
3822+
ret = hugetlb_cow(mm, vma, haddr, ptep, page, ptl);
38223823
}
38233824

38243825
spin_unlock(ptl);
@@ -3830,7 +3831,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
38303831
spin_unlock(ptl);
38313832
backout_unlocked:
38323833
unlock_page(page);
3833-
restore_reserve_on_error(h, vma, address, page);
3834+
restore_reserve_on_error(h, vma, haddr, page);
38343835
put_page(page);
38353836
goto out;
38363837
}
@@ -3883,10 +3884,9 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
38833884
struct hstate *h = hstate_vma(vma);
38843885
struct address_space *mapping;
38853886
int need_wait_lock = 0;
3887+
unsigned long haddr = address & huge_page_mask(h);
38863888

3887-
address &= huge_page_mask(h);
3888-
3889-
ptep = huge_pte_offset(mm, address, huge_page_size(h));
3889+
ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
38903890
if (ptep) {
38913891
entry = huge_ptep_get(ptep);
38923892
if (unlikely(is_hugetlb_entry_migration(entry))) {
@@ -3896,20 +3896,20 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
38963896
return VM_FAULT_HWPOISON_LARGE |
38973897
VM_FAULT_SET_HINDEX(hstate_index(h));
38983898
} else {
3899-
ptep = huge_pte_alloc(mm, address, huge_page_size(h));
3899+
ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
39003900
if (!ptep)
39013901
return VM_FAULT_OOM;
39023902
}
39033903

39043904
mapping = vma->vm_file->f_mapping;
3905-
idx = vma_hugecache_offset(h, vma, address);
3905+
idx = vma_hugecache_offset(h, vma, haddr);
39063906

39073907
/*
39083908
* Serialize hugepage allocation and instantiation, so that we don't
39093909
* get spurious allocation failures if two CPUs race to instantiate
39103910
* the same page in the page cache.
39113911
*/
3912-
hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address);
3912+
hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, haddr);
39133913
mutex_lock(&hugetlb_fault_mutex_table[hash]);
39143914

39153915
entry = huge_ptep_get(ptep);
@@ -3939,16 +3939,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
39393939
* consumed.
39403940
*/
39413941
if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) {
3942-
if (vma_needs_reservation(h, vma, address) < 0) {
3942+
if (vma_needs_reservation(h, vma, haddr) < 0) {
39433943
ret = VM_FAULT_OOM;
39443944
goto out_mutex;
39453945
}
39463946
/* Just decrements count, does not deallocate */
3947-
vma_end_reservation(h, vma, address);
3947+
vma_end_reservation(h, vma, haddr);
39483948

39493949
if (!(vma->vm_flags & VM_MAYSHARE))
39503950
pagecache_page = hugetlbfs_pagecache_page(h,
3951-
vma, address);
3951+
vma, haddr);
39523952
}
39533953

39543954
ptl = huge_pte_lock(h, mm, ptep);
@@ -3973,16 +3973,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
39733973

39743974
if (flags & FAULT_FLAG_WRITE) {
39753975
if (!huge_pte_write(entry)) {
3976-
ret = hugetlb_cow(mm, vma, address, ptep,
3976+
ret = hugetlb_cow(mm, vma, haddr, ptep,
39773977
pagecache_page, ptl);
39783978
goto out_put_page;
39793979
}
39803980
entry = huge_pte_mkdirty(entry);
39813981
}
39823982
entry = pte_mkyoung(entry);
3983-
if (huge_ptep_set_access_flags(vma, address, ptep, entry,
3983+
if (huge_ptep_set_access_flags(vma, haddr, ptep, entry,
39843984
flags & FAULT_FLAG_WRITE))
3985-
update_mmu_cache(vma, address, ptep);
3985+
update_mmu_cache(vma, haddr, ptep);
39863986
out_put_page:
39873987
if (page != pagecache_page)
39883988
unlock_page(page);

0 commit comments

Comments
 (0)