@@ -2770,7 +2770,6 @@ static void lru_add_page_tail(struct page *head, struct page *tail,
2770
2770
struct lruvec * lruvec , struct list_head * list )
2771
2771
{
2772
2772
VM_BUG_ON_PAGE (!PageHead (head ), head );
2773
- VM_BUG_ON_PAGE (PageCompound (tail ), head );
2774
2773
VM_BUG_ON_PAGE (PageLRU (tail ), head );
2775
2774
lockdep_assert_held (& lruvec -> lru_lock );
2776
2775
@@ -2791,7 +2790,8 @@ static void lru_add_page_tail(struct page *head, struct page *tail,
2791
2790
}
2792
2791
2793
2792
static void __split_huge_page_tail (struct folio * folio , int tail ,
2794
- struct lruvec * lruvec , struct list_head * list )
2793
+ struct lruvec * lruvec , struct list_head * list ,
2794
+ unsigned int new_order )
2795
2795
{
2796
2796
struct page * head = & folio -> page ;
2797
2797
struct page * page_tail = head + tail ;
@@ -2861,10 +2861,15 @@ static void __split_huge_page_tail(struct folio *folio, int tail,
2861
2861
* which needs correct compound_head().
2862
2862
*/
2863
2863
clear_compound_head (page_tail );
2864
+ if (new_order ) {
2865
+ prep_compound_page (page_tail , new_order );
2866
+ folio_prep_large_rmappable (new_folio );
2867
+ }
2864
2868
2865
2869
/* Finally unfreeze refcount. Additional reference from page cache. */
2866
- page_ref_unfreeze (page_tail , 1 + (!folio_test_anon (folio ) ||
2867
- folio_test_swapcache (folio )));
2870
+ page_ref_unfreeze (page_tail ,
2871
+ 1 + ((!folio_test_anon (folio ) || folio_test_swapcache (folio )) ?
2872
+ folio_nr_pages (new_folio ) : 0 ));
2868
2873
2869
2874
if (folio_test_young (folio ))
2870
2875
folio_set_young (new_folio );
@@ -2882,19 +2887,20 @@ static void __split_huge_page_tail(struct folio *folio, int tail,
2882
2887
}
2883
2888
2884
2889
static void __split_huge_page (struct page * page , struct list_head * list ,
2885
- pgoff_t end )
2890
+ pgoff_t end , unsigned int new_order )
2886
2891
{
2887
2892
struct folio * folio = page_folio (page );
2888
2893
struct page * head = & folio -> page ;
2889
2894
struct lruvec * lruvec ;
2890
2895
struct address_space * swap_cache = NULL ;
2891
2896
unsigned long offset = 0 ;
2892
2897
int i , nr_dropped = 0 ;
2898
+ unsigned int new_nr = 1 << new_order ;
2893
2899
int order = folio_order (folio );
2894
2900
unsigned int nr = 1 << order ;
2895
2901
2896
2902
/* complete memcg works before add pages to LRU */
2897
- split_page_memcg (head , order , 0 );
2903
+ split_page_memcg (head , order , new_order );
2898
2904
2899
2905
if (folio_test_anon (folio ) && folio_test_swapcache (folio )) {
2900
2906
offset = swp_offset (folio -> swap );
@@ -2907,8 +2913,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
2907
2913
2908
2914
ClearPageHasHWPoisoned (head );
2909
2915
2910
- for (i = nr - 1 ; i >= 1 ; i -- ) {
2911
- __split_huge_page_tail (folio , i , lruvec , list );
2916
+ for (i = nr - new_nr ; i >= new_nr ; i -= new_nr ) {
2917
+ __split_huge_page_tail (folio , i , lruvec , list , new_order );
2912
2918
/* Some pages can be beyond EOF: drop them from page cache */
2913
2919
if (head [i ].index >= end ) {
2914
2920
struct folio * tail = page_folio (head + i );
@@ -2929,24 +2935,30 @@ static void __split_huge_page(struct page *page, struct list_head *list,
2929
2935
}
2930
2936
}
2931
2937
2932
- ClearPageCompound (head );
2938
+ if (!new_order )
2939
+ ClearPageCompound (head );
2940
+ else {
2941
+ struct folio * new_folio = (struct folio * )head ;
2942
+
2943
+ folio_set_order (new_folio , new_order );
2944
+ }
2933
2945
unlock_page_lruvec (lruvec );
2934
2946
/* Caller disabled irqs, so they are still disabled here */
2935
2947
2936
- split_page_owner (head , order , 0 );
2948
+ split_page_owner (head , order , new_order );
2937
2949
2938
2950
/* See comment in __split_huge_page_tail() */
2939
2951
if (PageAnon (head )) {
2940
2952
/* Additional pin to swap cache */
2941
2953
if (PageSwapCache (head )) {
2942
- page_ref_add (head , 2 );
2954
+ page_ref_add (head , 1 + new_nr );
2943
2955
xa_unlock (& swap_cache -> i_pages );
2944
2956
} else {
2945
2957
page_ref_inc (head );
2946
2958
}
2947
2959
} else {
2948
2960
/* Additional pin to page cache */
2949
- page_ref_add (head , 2 );
2961
+ page_ref_add (head , 1 + new_nr );
2950
2962
xa_unlock (& head -> mapping -> i_pages );
2951
2963
}
2952
2964
local_irq_enable ();
@@ -2958,7 +2970,15 @@ static void __split_huge_page(struct page *page, struct list_head *list,
2958
2970
if (folio_test_swapcache (folio ))
2959
2971
split_swap_cluster (folio -> swap );
2960
2972
2961
- for (i = 0 ; i < nr ; i ++ ) {
2973
+ /*
2974
+ * set page to its compound_head when split to non order-0 pages, so
2975
+ * we can skip unlocking it below, since PG_locked is transferred to
2976
+ * the compound_head of the page and the caller will unlock it.
2977
+ */
2978
+ if (new_order )
2979
+ page = compound_head (page );
2980
+
2981
+ for (i = 0 ; i < nr ; i += new_nr ) {
2962
2982
struct page * subpage = head + i ;
2963
2983
if (subpage == page )
2964
2984
continue ;
@@ -2992,29 +3012,36 @@ bool can_split_folio(struct folio *folio, int *pextra_pins)
2992
3012
}
2993
3013
2994
3014
/*
2995
- * This function splits huge page into normal pages. @page can point to any
2996
- * subpage of huge page to split. Split doesn't change the position of @page.
3015
+ * This function splits huge page into pages in @new_order. @page can point to
3016
+ * any subpage of huge page to split. Split doesn't change the position of
3017
+ * @page.
3018
+ *
3019
+ * NOTE: order-1 anonymous folio is not supported because _deferred_list,
3020
+ * which is used by partially mapped folios, is stored in subpage 2 and an
3021
+ * order-1 folio only has subpage 0 and 1. File-backed order-1 folios are OK,
3022
+ * since they do not use _deferred_list.
2997
3023
*
2998
3024
* Only caller must hold pin on the @page, otherwise split fails with -EBUSY.
2999
3025
* The huge page must be locked.
3000
3026
*
3001
3027
* If @list is null, tail pages will be added to LRU list, otherwise, to @list.
3002
3028
*
3003
- * Both head page and tail pages will inherit mapping, flags, and so on from
3004
- * the hugepage.
3029
+ * Pages in new_order will inherit mapping, flags, and so on from the hugepage.
3005
3030
*
3006
- * GUP pin and PG_locked transferred to @page. Rest subpages can be freed if
3007
- * they are not mapped.
3031
+ * GUP pin and PG_locked transferred to @page or the compound page @page belongs
3032
+ * to. Rest subpages can be freed if they are not mapped.
3008
3033
*
3009
3034
* Returns 0 if the hugepage is split successfully.
3010
3035
* Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
3011
3036
* us.
3012
3037
*/
3013
- int split_huge_page_to_list (struct page * page , struct list_head * list )
3038
+ int split_huge_page_to_list_to_order (struct page * page , struct list_head * list ,
3039
+ unsigned int new_order )
3014
3040
{
3015
3041
struct folio * folio = page_folio (page );
3016
3042
struct deferred_split * ds_queue = get_deferred_split_queue (folio );
3017
- XA_STATE (xas , & folio -> mapping -> i_pages , folio -> index );
3043
+ /* reset xarray order to new order after split */
3044
+ XA_STATE_ORDER (xas , & folio -> mapping -> i_pages , folio -> index , new_order );
3018
3045
struct anon_vma * anon_vma = NULL ;
3019
3046
struct address_space * mapping = NULL ;
3020
3047
int extra_pins , ret ;
@@ -3024,6 +3051,31 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
3024
3051
VM_BUG_ON_FOLIO (!folio_test_locked (folio ), folio );
3025
3052
VM_BUG_ON_FOLIO (!folio_test_large (folio ), folio );
3026
3053
3054
+ /* Cannot split anonymous THP to order-1 */
3055
+ if (new_order == 1 && folio_test_anon (folio )) {
3056
+ VM_WARN_ONCE (1 , "Cannot split to order-1 folio" );
3057
+ return - EINVAL ;
3058
+ }
3059
+
3060
+ if (new_order ) {
3061
+ /* Only swapping a whole PMD-mapped folio is supported */
3062
+ if (folio_test_swapcache (folio ))
3063
+ return - EINVAL ;
3064
+ /* Split shmem folio to non-zero order not supported */
3065
+ if (shmem_mapping (folio -> mapping )) {
3066
+ VM_WARN_ONCE (1 ,
3067
+ "Cannot split shmem folio to non-0 order" );
3068
+ return - EINVAL ;
3069
+ }
3070
+ /* No split if the file system does not support large folio */
3071
+ if (!mapping_large_folio_support (folio -> mapping )) {
3072
+ VM_WARN_ONCE (1 ,
3073
+ "Cannot split file folio to non-0 order" );
3074
+ return - EINVAL ;
3075
+ }
3076
+ }
3077
+
3078
+
3027
3079
is_hzp = is_huge_zero_page (& folio -> page );
3028
3080
if (is_hzp ) {
3029
3081
pr_warn_ratelimited ("Called split_huge_page for huge zero page\n" );
@@ -3120,14 +3172,21 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
3120
3172
if (folio_order (folio ) > 1 &&
3121
3173
!list_empty (& folio -> _deferred_list )) {
3122
3174
ds_queue -> split_queue_len -- ;
3123
- list_del (& folio -> _deferred_list );
3175
+ /*
3176
+ * Reinitialize page_deferred_list after removing the
3177
+ * page from the split_queue, otherwise a subsequent
3178
+ * split will see list corruption when checking the
3179
+ * page_deferred_list.
3180
+ */
3181
+ list_del_init (& folio -> _deferred_list );
3124
3182
}
3125
3183
spin_unlock (& ds_queue -> split_queue_lock );
3126
3184
if (mapping ) {
3127
3185
int nr = folio_nr_pages (folio );
3128
3186
3129
3187
xas_split (& xas , folio , folio_order (folio ));
3130
- if (folio_test_pmd_mappable (folio )) {
3188
+ if (folio_test_pmd_mappable (folio ) &&
3189
+ new_order < HPAGE_PMD_ORDER ) {
3131
3190
if (folio_test_swapbacked (folio )) {
3132
3191
__lruvec_stat_mod_folio (folio ,
3133
3192
NR_SHMEM_THPS , - nr );
@@ -3139,7 +3198,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
3139
3198
}
3140
3199
}
3141
3200
3142
- __split_huge_page (page , list , end );
3201
+ __split_huge_page (page , list , end , new_order );
3143
3202
ret = 0 ;
3144
3203
} else {
3145
3204
spin_unlock (& ds_queue -> split_queue_lock );
0 commit comments