Skip to content

Commit 4167e9b

Browse files
rientjestorvalds
authored andcommitted
mm: remove GFP_THISNODE
NOTE: this is not about __GFP_THISNODE, this is only about GFP_THISNODE. GFP_THISNODE is a secret combination of gfp bits that have different behavior than expected. It is a combination of __GFP_THISNODE, __GFP_NORETRY, and __GFP_NOWARN and is special-cased in the page allocator slowpath to fail without trying reclaim even though it may be used in combination with __GFP_WAIT. An example of the problem this creates: commit e97ca8e ("mm: fix GFP_THISNODE callers and clarify") fixed up many users of GFP_THISNODE that really just wanted __GFP_THISNODE. The problem doesn't end there, however, because even it was a no-op for alloc_misplaced_dst_page(), which also sets __GFP_NORETRY and __GFP_NOWARN, and migrate_misplaced_transhuge_page(), where __GFP_NORETRY and __GFP_NOWAIT is set in GFP_TRANSHUGE. Converting GFP_THISNODE to __GFP_THISNODE is a no-op in these cases since the page allocator special-cases __GFP_THISNODE && __GFP_NORETRY && __GFP_NOWARN. It's time to just remove GFP_THISNODE entirely. We leave __GFP_THISNODE to restrict an allocation to a local node, but remove GFP_THISNODE and its obscurity. Instead, we require that a caller clear __GFP_WAIT if it wants to avoid reclaim. This allows the aforementioned functions to actually reclaim as they should. It also enables any future callers that want to do __GFP_THISNODE but also __GFP_NORETRY && __GFP_NOWARN to reclaim. The rule is simple: if you don't want to reclaim, then don't set __GFP_WAIT. Aside: ovs_flow_stats_update() really wants to avoid reclaim as well, so it is unchanged. Signed-off-by: David Rientjes <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Cc: Christoph Lameter <[email protected]> Acked-by: Pekka Enberg <[email protected]> Cc: Joonsoo Kim <[email protected]> Acked-by: Johannes Weiner <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Pravin Shelar <[email protected]> Cc: Jarno Rajahalme <[email protected]> Cc: Li Zefan <[email protected]> Cc: Greg Thelen <[email protected]> Cc: Tejun Heo <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent b360edb commit 4167e9b

File tree

4 files changed

+27
-31
lines changed

4 files changed

+27
-31
lines changed

include/linux/gfp.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,16 +117,6 @@ struct vm_area_struct;
117117
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
118118
__GFP_NO_KSWAPD)
119119

120-
/*
121-
* GFP_THISNODE does not perform any reclaim, you most likely want to
122-
* use __GFP_THISNODE to allocate from a given node without fallback!
123-
*/
124-
#ifdef CONFIG_NUMA
125-
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
126-
#else
127-
#define GFP_THISNODE ((__force gfp_t)0)
128-
#endif
129-
130120
/* This mask makes up all the page movable related flags */
131121
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
132122

mm/page_alloc.c

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2412,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
24122412
*did_some_progress = 1;
24132413
goto out;
24142414
}
2415-
/*
2416-
* GFP_THISNODE contains __GFP_NORETRY and we never hit this.
2417-
* Sanity check for bare calls of __GFP_THISNODE, not real OOM.
2418-
* The caller should handle page allocation failure by itself if
2419-
* it specifies __GFP_THISNODE.
2420-
* Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
2421-
*/
2415+
/* The OOM killer may not free memory on a specific node */
24222416
if (gfp_mask & __GFP_THISNODE)
24232417
goto out;
24242418
}
@@ -2673,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
26732667
}
26742668

26752669
/*
2676-
* GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and
2677-
* __GFP_NOWARN set) should not cause reclaim since the subsystem
2678-
* (f.e. slab) using GFP_THISNODE may choose to trigger reclaim
2679-
* using a larger set of nodes after it has established that the
2680-
* allowed per node queues are empty and that nodes are
2681-
* over allocated.
2670+
* If this allocation cannot block and it is for a specific node, then
2671+
* fail early. There's no need to wakeup kswapd or retry for a
2672+
* speculative node-specific allocation.
26822673
*/
2683-
if (IS_ENABLED(CONFIG_NUMA) &&
2684-
(gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2674+
if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
26852675
goto nopage;
26862676

26872677
retry:
@@ -2874,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
28742864
/*
28752865
* Check the zones suitable for the gfp_mask contain at least one
28762866
* valid zone. It's possible to have an empty zonelist as a result
2877-
* of GFP_THISNODE and a memoryless node
2867+
* of __GFP_THISNODE and a memoryless node
28782868
*/
28792869
if (unlikely(!zonelist->_zonerefs->zone))
28802870
return NULL;

mm/slab.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,11 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
857857
return NULL;
858858
}
859859

860+
static inline gfp_t gfp_exact_node(gfp_t flags)
861+
{
862+
return flags;
863+
}
864+
860865
#else /* CONFIG_NUMA */
861866

862867
static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
@@ -1023,6 +1028,15 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
10231028

10241029
return __cache_free_alien(cachep, objp, node, page_node);
10251030
}
1031+
1032+
/*
1033+
* Construct gfp mask to allocate from a specific node but do not invoke reclaim
1034+
* or warn about failures.
1035+
*/
1036+
static inline gfp_t gfp_exact_node(gfp_t flags)
1037+
{
1038+
return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT;
1039+
}
10261040
#endif
10271041

10281042
/*
@@ -2825,7 +2839,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
28252839
if (unlikely(!ac->avail)) {
28262840
int x;
28272841
force_grow:
2828-
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
2842+
x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);
28292843

28302844
/* cache_grow can reenable interrupts, then ac could change. */
28312845
ac = cpu_cache_get(cachep);
@@ -3019,7 +3033,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
30193033
get_node(cache, nid) &&
30203034
get_node(cache, nid)->free_objects) {
30213035
obj = ____cache_alloc_node(cache,
3022-
flags | GFP_THISNODE, nid);
3036+
gfp_exact_node(flags), nid);
30233037
if (obj)
30243038
break;
30253039
}
@@ -3047,7 +3061,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
30473061
nid = page_to_nid(page);
30483062
if (cache_grow(cache, flags, nid, page)) {
30493063
obj = ____cache_alloc_node(cache,
3050-
flags | GFP_THISNODE, nid);
3064+
gfp_exact_node(flags), nid);
30513065
if (!obj)
30523066
/*
30533067
* Another processor may allocate the
@@ -3118,7 +3132,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
31183132

31193133
must_grow:
31203134
spin_unlock(&n->list_lock);
3121-
x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3135+
x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL);
31223136
if (x)
31233137
goto retry;
31243138

net/openvswitch/flow.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
100100

101101
new_stats =
102102
kmem_cache_alloc_node(flow_stats_cache,
103-
GFP_THISNODE |
103+
GFP_NOWAIT |
104+
__GFP_THISNODE |
105+
__GFP_NOWARN |
104106
__GFP_NOMEMALLOC,
105107
node);
106108
if (likely(new_stats)) {

0 commit comments

Comments
 (0)