@@ -2977,7 +2977,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
2977
2977
}
2978
2978
2979
2979
void __split_huge_pmd (struct vm_area_struct * vma , pmd_t * pmd ,
2980
- unsigned long address )
2980
+ unsigned long address , bool freeze )
2981
2981
{
2982
2982
spinlock_t * ptl ;
2983
2983
struct mm_struct * mm = vma -> vm_mm ;
@@ -2994,7 +2994,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
2994
2994
page = NULL ;
2995
2995
} else if (!pmd_devmap (* pmd ))
2996
2996
goto out ;
2997
- __split_huge_pmd_locked (vma , pmd , haddr , false );
2997
+ __split_huge_pmd_locked (vma , pmd , haddr , freeze );
2998
2998
out :
2999
2999
spin_unlock (ptl );
3000
3000
mmu_notifier_invalidate_range_end (mm , haddr , haddr + HPAGE_PMD_SIZE );
@@ -3006,7 +3006,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
3006
3006
}
3007
3007
}
3008
3008
3009
- void split_huge_pmd_address (struct vm_area_struct * vma , unsigned long address )
3009
+ void split_huge_pmd_address (struct vm_area_struct * vma , unsigned long address ,
3010
+ bool freeze , struct page * page )
3010
3011
{
3011
3012
pgd_t * pgd ;
3012
3013
pud_t * pud ;
@@ -3023,11 +3024,20 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address)
3023
3024
pmd = pmd_offset (pud , address );
3024
3025
if (!pmd_present (* pmd ) || (!pmd_trans_huge (* pmd ) && !pmd_devmap (* pmd )))
3025
3026
return ;
3027
+
3028
+ /*
3029
+ * If caller asks to setup a migration entries, we need a page to check
3030
+ * pmd against. Otherwise we can end up replacing wrong page.
3031
+ */
3032
+ VM_BUG_ON (freeze && !page );
3033
+ if (page && page != pmd_page (* pmd ))
3034
+ return ;
3035
+
3026
3036
/*
3027
3037
* Caller holds the mmap_sem write mode, so a huge pmd cannot
3028
3038
* materialize from under us.
3029
3039
*/
3030
- split_huge_pmd (vma , pmd , address );
3040
+ __split_huge_pmd (vma , pmd , address , freeze );
3031
3041
}
3032
3042
3033
3043
void vma_adjust_trans_huge (struct vm_area_struct * vma ,
@@ -3043,7 +3053,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
3043
3053
if (start & ~HPAGE_PMD_MASK &&
3044
3054
(start & HPAGE_PMD_MASK ) >= vma -> vm_start &&
3045
3055
(start & HPAGE_PMD_MASK ) + HPAGE_PMD_SIZE <= vma -> vm_end )
3046
- split_huge_pmd_address (vma , start );
3056
+ split_huge_pmd_address (vma , start , false, NULL );
3047
3057
3048
3058
/*
3049
3059
* If the new end address isn't hpage aligned and it could
@@ -3053,7 +3063,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
3053
3063
if (end & ~HPAGE_PMD_MASK &&
3054
3064
(end & HPAGE_PMD_MASK ) >= vma -> vm_start &&
3055
3065
(end & HPAGE_PMD_MASK ) + HPAGE_PMD_SIZE <= vma -> vm_end )
3056
- split_huge_pmd_address (vma , end );
3066
+ split_huge_pmd_address (vma , end , false, NULL );
3057
3067
3058
3068
/*
3059
3069
* If we're also updating the vma->vm_next->vm_start, if the new
@@ -3067,184 +3077,36 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
3067
3077
if (nstart & ~HPAGE_PMD_MASK &&
3068
3078
(nstart & HPAGE_PMD_MASK ) >= next -> vm_start &&
3069
3079
(nstart & HPAGE_PMD_MASK ) + HPAGE_PMD_SIZE <= next -> vm_end )
3070
- split_huge_pmd_address (next , nstart );
3080
+ split_huge_pmd_address (next , nstart , false, NULL );
3071
3081
}
3072
3082
}
3073
3083
3074
- static void freeze_page_vma (struct vm_area_struct * vma , struct page * page ,
3075
- unsigned long address )
3084
+ static void freeze_page (struct page * page )
3076
3085
{
3077
- unsigned long haddr = address & HPAGE_PMD_MASK ;
3078
- spinlock_t * ptl ;
3079
- pgd_t * pgd ;
3080
- pud_t * pud ;
3081
- pmd_t * pmd ;
3082
- pte_t * pte ;
3083
- int i , nr = HPAGE_PMD_NR ;
3084
-
3085
- /* Skip pages which doesn't belong to the VMA */
3086
- if (address < vma -> vm_start ) {
3087
- int off = (vma -> vm_start - address ) >> PAGE_SHIFT ;
3088
- page += off ;
3089
- nr -= off ;
3090
- address = vma -> vm_start ;
3091
- }
3092
-
3093
- pgd = pgd_offset (vma -> vm_mm , address );
3094
- if (!pgd_present (* pgd ))
3095
- return ;
3096
- pud = pud_offset (pgd , address );
3097
- if (!pud_present (* pud ))
3098
- return ;
3099
- pmd = pmd_offset (pud , address );
3100
- ptl = pmd_lock (vma -> vm_mm , pmd );
3101
- if (!pmd_present (* pmd )) {
3102
- spin_unlock (ptl );
3103
- return ;
3104
- }
3105
- if (pmd_trans_huge (* pmd )) {
3106
- if (page == pmd_page (* pmd ))
3107
- __split_huge_pmd_locked (vma , pmd , haddr , true);
3108
- spin_unlock (ptl );
3109
- return ;
3110
- }
3111
- spin_unlock (ptl );
3112
-
3113
- pte = pte_offset_map_lock (vma -> vm_mm , pmd , address , & ptl );
3114
- for (i = 0 ; i < nr ; i ++ , address += PAGE_SIZE , page ++ , pte ++ ) {
3115
- pte_t entry , swp_pte ;
3116
- swp_entry_t swp_entry ;
3117
-
3118
- /*
3119
- * We've just crossed page table boundary: need to map next one.
3120
- * It can happen if THP was mremaped to non PMD-aligned address.
3121
- */
3122
- if (unlikely (address == haddr + HPAGE_PMD_SIZE )) {
3123
- pte_unmap_unlock (pte - 1 , ptl );
3124
- pmd = mm_find_pmd (vma -> vm_mm , address );
3125
- if (!pmd )
3126
- return ;
3127
- pte = pte_offset_map_lock (vma -> vm_mm , pmd ,
3128
- address , & ptl );
3129
- }
3130
-
3131
- if (!pte_present (* pte ))
3132
- continue ;
3133
- if (page_to_pfn (page ) != pte_pfn (* pte ))
3134
- continue ;
3135
- flush_cache_page (vma , address , page_to_pfn (page ));
3136
- entry = ptep_clear_flush (vma , address , pte );
3137
- if (pte_dirty (entry ))
3138
- SetPageDirty (page );
3139
- swp_entry = make_migration_entry (page , pte_write (entry ));
3140
- swp_pte = swp_entry_to_pte (swp_entry );
3141
- if (pte_soft_dirty (entry ))
3142
- swp_pte = pte_swp_mksoft_dirty (swp_pte );
3143
- set_pte_at (vma -> vm_mm , address , pte , swp_pte );
3144
- page_remove_rmap (page , false);
3145
- put_page (page );
3146
- }
3147
- pte_unmap_unlock (pte - 1 , ptl );
3148
- }
3149
-
3150
- static void freeze_page (struct anon_vma * anon_vma , struct page * page )
3151
- {
3152
- struct anon_vma_chain * avc ;
3153
- pgoff_t pgoff = page_to_pgoff (page );
3086
+ enum ttu_flags ttu_flags = TTU_MIGRATION | TTU_IGNORE_MLOCK |
3087
+ TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED ;
3088
+ int i , ret ;
3154
3089
3155
3090
VM_BUG_ON_PAGE (!PageHead (page ), page );
3156
3091
3157
- anon_vma_interval_tree_foreach (avc , & anon_vma -> rb_root , pgoff ,
3158
- pgoff + HPAGE_PMD_NR - 1 ) {
3159
- unsigned long address = __vma_address (page , avc -> vma );
3160
-
3161
- mmu_notifier_invalidate_range_start (avc -> vma -> vm_mm ,
3162
- address , address + HPAGE_PMD_SIZE );
3163
- freeze_page_vma (avc -> vma , page , address );
3164
- mmu_notifier_invalidate_range_end (avc -> vma -> vm_mm ,
3165
- address , address + HPAGE_PMD_SIZE );
3166
- }
3167
- }
3168
-
3169
- static void unfreeze_page_vma (struct vm_area_struct * vma , struct page * page ,
3170
- unsigned long address )
3171
- {
3172
- spinlock_t * ptl ;
3173
- pmd_t * pmd ;
3174
- pte_t * pte , entry ;
3175
- swp_entry_t swp_entry ;
3176
- unsigned long haddr = address & HPAGE_PMD_MASK ;
3177
- int i , nr = HPAGE_PMD_NR ;
3178
-
3179
- /* Skip pages which doesn't belong to the VMA */
3180
- if (address < vma -> vm_start ) {
3181
- int off = (vma -> vm_start - address ) >> PAGE_SHIFT ;
3182
- page += off ;
3183
- nr -= off ;
3184
- address = vma -> vm_start ;
3185
- }
3186
-
3187
- pmd = mm_find_pmd (vma -> vm_mm , address );
3188
- if (!pmd )
3189
- return ;
3190
-
3191
- pte = pte_offset_map_lock (vma -> vm_mm , pmd , address , & ptl );
3192
- for (i = 0 ; i < nr ; i ++ , address += PAGE_SIZE , page ++ , pte ++ ) {
3193
- /*
3194
- * We've just crossed page table boundary: need to map next one.
3195
- * It can happen if THP was mremaped to non-PMD aligned address.
3196
- */
3197
- if (unlikely (address == haddr + HPAGE_PMD_SIZE )) {
3198
- pte_unmap_unlock (pte - 1 , ptl );
3199
- pmd = mm_find_pmd (vma -> vm_mm , address );
3200
- if (!pmd )
3201
- return ;
3202
- pte = pte_offset_map_lock (vma -> vm_mm , pmd ,
3203
- address , & ptl );
3204
- }
3205
-
3206
- if (!is_swap_pte (* pte ))
3207
- continue ;
3208
-
3209
- swp_entry = pte_to_swp_entry (* pte );
3210
- if (!is_migration_entry (swp_entry ))
3211
- continue ;
3212
- if (migration_entry_to_page (swp_entry ) != page )
3213
- continue ;
3214
-
3215
- get_page (page );
3216
- page_add_anon_rmap (page , vma , address , false);
3217
-
3218
- entry = pte_mkold (mk_pte (page , vma -> vm_page_prot ));
3219
- if (PageDirty (page ))
3220
- entry = pte_mkdirty (entry );
3221
- if (is_write_migration_entry (swp_entry ))
3222
- entry = maybe_mkwrite (entry , vma );
3223
-
3224
- flush_dcache_page (page );
3225
- set_pte_at (vma -> vm_mm , address , pte , entry );
3092
+ /* We only need TTU_SPLIT_HUGE_PMD once */
3093
+ ret = try_to_unmap (page , ttu_flags | TTU_SPLIT_HUGE_PMD );
3094
+ for (i = 1 ; !ret && i < HPAGE_PMD_NR ; i ++ ) {
3095
+ /* Cut short if the page is unmapped */
3096
+ if (page_count (page ) == 1 )
3097
+ return ;
3226
3098
3227
- /* No need to invalidate - it was non-present before */
3228
- update_mmu_cache (vma , address , pte );
3099
+ ret = try_to_unmap (page + i , ttu_flags );
3229
3100
}
3230
- pte_unmap_unlock ( pte - 1 , ptl );
3101
+ VM_BUG_ON ( ret );
3231
3102
}
3232
3103
3233
- static void unfreeze_page (struct anon_vma * anon_vma , struct page * page )
3104
+ static void unfreeze_page (struct page * page )
3234
3105
{
3235
- struct anon_vma_chain * avc ;
3236
- pgoff_t pgoff = page_to_pgoff (page );
3237
-
3238
- anon_vma_interval_tree_foreach (avc , & anon_vma -> rb_root ,
3239
- pgoff , pgoff + HPAGE_PMD_NR - 1 ) {
3240
- unsigned long address = __vma_address (page , avc -> vma );
3106
+ int i ;
3241
3107
3242
- mmu_notifier_invalidate_range_start (avc -> vma -> vm_mm ,
3243
- address , address + HPAGE_PMD_SIZE );
3244
- unfreeze_page_vma (avc -> vma , page , address );
3245
- mmu_notifier_invalidate_range_end (avc -> vma -> vm_mm ,
3246
- address , address + HPAGE_PMD_SIZE );
3247
- }
3108
+ for (i = 0 ; i < HPAGE_PMD_NR ; i ++ )
3109
+ remove_migration_ptes (page + i , page + i , true);
3248
3110
}
3249
3111
3250
3112
static void __split_huge_page_tail (struct page * head , int tail ,
@@ -3322,7 +3184,7 @@ static void __split_huge_page(struct page *page, struct list_head *list)
3322
3184
ClearPageCompound (head );
3323
3185
spin_unlock_irq (& zone -> lru_lock );
3324
3186
3325
- unfreeze_page (page_anon_vma ( head ), head );
3187
+ unfreeze_page (head );
3326
3188
3327
3189
for (i = 0 ; i < HPAGE_PMD_NR ; i ++ ) {
3328
3190
struct page * subpage = head + i ;
@@ -3418,7 +3280,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
3418
3280
}
3419
3281
3420
3282
mlocked = PageMlocked (page );
3421
- freeze_page (anon_vma , head );
3283
+ freeze_page (head );
3422
3284
VM_BUG_ON_PAGE (compound_mapcount (head ), head );
3423
3285
3424
3286
/* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -3447,7 +3309,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
3447
3309
BUG ();
3448
3310
} else {
3449
3311
spin_unlock_irqrestore (& pgdata -> split_queue_lock , flags );
3450
- unfreeze_page (anon_vma , head );
3312
+ unfreeze_page (head );
3451
3313
ret = - EBUSY ;
3452
3314
}
3453
3315
0 commit comments