@@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3523
3523
struct page * page = NULL ;
3524
3524
unsigned int alloc_flags ;
3525
3525
unsigned long did_some_progress ;
3526
- enum compact_priority compact_priority = DEF_COMPACT_PRIORITY ;
3526
+ enum compact_priority compact_priority ;
3527
3527
enum compact_result compact_result ;
3528
- int compaction_retries = 0 ;
3529
- int no_progress_loops = 0 ;
3528
+ int compaction_retries ;
3529
+ int no_progress_loops ;
3530
3530
unsigned long alloc_start = jiffies ;
3531
3531
unsigned int stall_timeout = 10 * HZ ;
3532
+ unsigned int cpuset_mems_cookie ;
3532
3533
3533
3534
/*
3534
3535
* In the slowpath, we sanity check order to avoid ever trying to
@@ -3549,6 +3550,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3549
3550
(__GFP_ATOMIC |__GFP_DIRECT_RECLAIM )))
3550
3551
gfp_mask &= ~__GFP_ATOMIC ;
3551
3552
3553
+ retry_cpuset :
3554
+ compaction_retries = 0 ;
3555
+ no_progress_loops = 0 ;
3556
+ compact_priority = DEF_COMPACT_PRIORITY ;
3557
+ cpuset_mems_cookie = read_mems_allowed_begin ();
3558
+
3552
3559
/*
3553
3560
* The fast path uses conservative alloc_flags to succeed only until
3554
3561
* kswapd needs to be woken up, and to avoid the cost of setting up
@@ -3720,6 +3727,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3720
3727
}
3721
3728
3722
3729
nopage :
3730
+ /*
3731
+ * When updating a task's mems_allowed, it is possible to race with
3732
+ * parallel threads in such a way that an allocation can fail while
3733
+ * the mask is being updated. If a page allocation is about to fail,
3734
+ * check if the cpuset changed during allocation and if so, retry.
3735
+ */
3736
+ if (read_mems_allowed_retry (cpuset_mems_cookie ))
3737
+ goto retry_cpuset ;
3738
+
3723
3739
warn_alloc (gfp_mask ,
3724
3740
"page allocation failure: order:%u" , order );
3725
3741
got_pg :
@@ -3734,7 +3750,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3734
3750
struct zonelist * zonelist , nodemask_t * nodemask )
3735
3751
{
3736
3752
struct page * page ;
3737
- unsigned int cpuset_mems_cookie ;
3738
3753
unsigned int alloc_flags = ALLOC_WMARK_LOW ;
3739
3754
gfp_t alloc_mask = gfp_mask ; /* The gfp_t that was actually used for allocation */
3740
3755
struct alloc_context ac = {
@@ -3771,9 +3786,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3771
3786
if (IS_ENABLED (CONFIG_CMA ) && ac .migratetype == MIGRATE_MOVABLE )
3772
3787
alloc_flags |= ALLOC_CMA ;
3773
3788
3774
- retry_cpuset :
3775
- cpuset_mems_cookie = read_mems_allowed_begin ();
3776
-
3777
3789
/* Dirty zone balancing only done in the fast path */
3778
3790
ac .spread_dirty_pages = (gfp_mask & __GFP_WRITE );
3779
3791
@@ -3786,6 +3798,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3786
3798
ac .high_zoneidx , ac .nodemask );
3787
3799
if (!ac .preferred_zoneref -> zone ) {
3788
3800
page = NULL ;
3801
+ /*
3802
+ * This might be due to race with cpuset_current_mems_allowed
3803
+ * update, so make sure we retry with original nodemask in the
3804
+ * slow path.
3805
+ */
3789
3806
goto no_zone ;
3790
3807
}
3791
3808
@@ -3794,6 +3811,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3794
3811
if (likely (page ))
3795
3812
goto out ;
3796
3813
3814
+ no_zone :
3797
3815
/*
3798
3816
* Runtime PM, block IO and its error handling path can deadlock
3799
3817
* because I/O on the device might not complete.
@@ -3811,24 +3829,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3811
3829
ac .nodemask = nodemask ;
3812
3830
ac .preferred_zoneref = first_zones_zonelist (ac .zonelist ,
3813
3831
ac .high_zoneidx , ac .nodemask );
3814
- if (!ac .preferred_zoneref -> zone )
3815
- goto no_zone ;
3832
+ /* If we have NULL preferred zone, slowpath wll handle that */
3816
3833
}
3817
3834
3818
3835
page = __alloc_pages_slowpath (alloc_mask , order , & ac );
3819
3836
3820
- no_zone :
3821
- /*
3822
- * When updating a task's mems_allowed, it is possible to race with
3823
- * parallel threads in such a way that an allocation can fail while
3824
- * the mask is being updated. If a page allocation is about to fail,
3825
- * check if the cpuset changed during allocation and if so, retry.
3826
- */
3827
- if (unlikely (!page && read_mems_allowed_retry (cpuset_mems_cookie ))) {
3828
- alloc_mask = gfp_mask ;
3829
- goto retry_cpuset ;
3830
- }
3831
-
3832
3837
out :
3833
3838
if (memcg_kmem_enabled () && (gfp_mask & __GFP_ACCOUNT ) && page &&
3834
3839
unlikely (memcg_kmem_charge (page , gfp_mask , order ) != 0 )) {
0 commit comments