@@ -3728,6 +3728,18 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
3728
3728
return false;
3729
3729
}
3730
3730
3731
+ static void
3732
+ hugetlb_install_page (struct vm_area_struct * vma , pte_t * ptep , unsigned long addr ,
3733
+ struct page * new_page )
3734
+ {
3735
+ __SetPageUptodate (new_page );
3736
+ set_huge_pte_at (vma -> vm_mm , addr , ptep , make_huge_pte (vma , new_page , 1 ));
3737
+ hugepage_add_new_anon_rmap (new_page , vma , addr );
3738
+ hugetlb_count_add (pages_per_huge_page (hstate_vma (vma )), vma -> vm_mm );
3739
+ ClearHPageRestoreReserve (new_page );
3740
+ SetHPageMigratable (new_page );
3741
+ }
3742
+
3731
3743
int copy_hugetlb_page_range (struct mm_struct * dst , struct mm_struct * src ,
3732
3744
struct vm_area_struct * vma )
3733
3745
{
@@ -3737,6 +3749,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
3737
3749
bool cow = is_cow_mapping (vma -> vm_flags );
3738
3750
struct hstate * h = hstate_vma (vma );
3739
3751
unsigned long sz = huge_page_size (h );
3752
+ unsigned long npages = pages_per_huge_page (h );
3740
3753
struct address_space * mapping = vma -> vm_file -> f_mapping ;
3741
3754
struct mmu_notifier_range range ;
3742
3755
int ret = 0 ;
@@ -3785,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
3785
3798
spin_lock_nested (src_ptl , SINGLE_DEPTH_NESTING );
3786
3799
entry = huge_ptep_get (src_pte );
3787
3800
dst_entry = huge_ptep_get (dst_pte );
3801
+ again :
3788
3802
if (huge_pte_none (entry ) || !huge_pte_none (dst_entry )) {
3789
3803
/*
3790
3804
* Skip if src entry none. Also, skip in the
@@ -3808,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
3808
3822
}
3809
3823
set_huge_swap_pte_at (dst , addr , dst_pte , entry , sz );
3810
3824
} else {
3825
+ entry = huge_ptep_get (src_pte );
3826
+ ptepage = pte_page (entry );
3827
+ get_page (ptepage );
3828
+
3829
+ /*
3830
+ * This is a rare case where we see pinned hugetlb
3831
+ * pages while they're prone to COW. We need to do the
3832
+ * COW earlier during fork.
3833
+ *
3834
+ * When pre-allocating the page or copying data, we
3835
+ * need to be without the pgtable locks since we could
3836
+ * sleep during the process.
3837
+ */
3838
+ if (unlikely (page_needs_cow_for_dma (vma , ptepage ))) {
3839
+ pte_t src_pte_old = entry ;
3840
+ struct page * new ;
3841
+
3842
+ spin_unlock (src_ptl );
3843
+ spin_unlock (dst_ptl );
3844
+ /* Do not use reserve as it's private owned */
3845
+ new = alloc_huge_page (vma , addr , 1 );
3846
+ if (IS_ERR (new )) {
3847
+ put_page (ptepage );
3848
+ ret = PTR_ERR (new );
3849
+ break ;
3850
+ }
3851
+ copy_user_huge_page (new , ptepage , addr , vma ,
3852
+ npages );
3853
+ put_page (ptepage );
3854
+
3855
+ /* Install the new huge page if src pte stable */
3856
+ dst_ptl = huge_pte_lock (h , dst , dst_pte );
3857
+ src_ptl = huge_pte_lockptr (h , src , src_pte );
3858
+ spin_lock_nested (src_ptl , SINGLE_DEPTH_NESTING );
3859
+ entry = huge_ptep_get (src_pte );
3860
+ if (!pte_same (src_pte_old , entry )) {
3861
+ put_page (new );
3862
+ /* dst_entry won't change as in child */
3863
+ goto again ;
3864
+ }
3865
+ hugetlb_install_page (vma , dst_pte , addr , new );
3866
+ spin_unlock (src_ptl );
3867
+ spin_unlock (dst_ptl );
3868
+ continue ;
3869
+ }
3870
+
3811
3871
if (cow ) {
3812
3872
/*
3813
3873
* No need to notify as we are downgrading page
@@ -3818,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
3818
3878
*/
3819
3879
huge_ptep_set_wrprotect (src , addr , src_pte );
3820
3880
}
3821
- entry = huge_ptep_get (src_pte );
3822
- ptepage = pte_page (entry );
3823
- get_page (ptepage );
3881
+
3824
3882
page_dup_rmap (ptepage , true);
3825
3883
set_huge_pte_at (dst , addr , dst_pte , entry );
3826
- hugetlb_count_add (pages_per_huge_page ( h ) , dst );
3884
+ hugetlb_count_add (npages , dst );
3827
3885
}
3828
3886
spin_unlock (src_ptl );
3829
3887
spin_unlock (dst_ptl );
0 commit comments