Skip to content

Commit 356ff8a

Browse files
rientjestorvalds
authored andcommitted
Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"
This reverts commit 89c83fb. This should have been done as part of 2f0799a ("mm, thp: restore node-local hugepage allocations"). The movement of the thp allocation policy from alloc_pages_vma() to alloc_hugepage_direct_gfpmask() was intended to only set __GFP_THISNODE for mempolicies that are not MPOL_BIND whereas the revert could set this regardless of mempolicy. While the check for MPOL_BIND between alloc_hugepage_direct_gfpmask() and alloc_pages_vma() was racy, that has since been removed since the revert. What is left is the possibility to use __GFP_THISNODE in policy_node() when it is unexpected because the special handling for hugepages in alloc_pages_vma() was removed as part of the consolidation. Secondly, prior to 89c83fb, alloc_pages_vma() implemented a somewhat different policy for hugepage allocations, which were allocated through alloc_hugepage_vma(). For hugepage allocations, if the allocating process's node is in the set of allowed nodes, allocate with __GFP_THISNODE for that node (for MPOL_PREFERRED, use that node with __GFP_THISNODE instead). This was changed for shmem_alloc_hugepage() to allow fallback to other nodes in 89c83fb as it did for new_page() in mm/mempolicy.c which is functionally different behavior and removes the requirement to only allocate hugepages locally. So this commit does a full revert of 89c83fb instead of the partial revert that was done in 2f0799a. The result is the same thp allocation policy for 4.20 that was in 4.19. Fixes: 89c83fb ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask") Fixes: 2f0799a ("mm, thp: restore node-local hugepage allocations") Signed-off-by: David Rientjes <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 5f17979 commit 356ff8a

File tree

4 files changed

+51
-22
lines changed

4 files changed

+51
-22
lines changed

include/linux/gfp.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
510510
}
511511
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
512512
struct vm_area_struct *vma, unsigned long addr,
513-
int node);
513+
int node, bool hugepage);
514+
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
515+
alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
514516
#else
515517
#define alloc_pages(gfp_mask, order) \
516518
alloc_pages_node(numa_node_id(), gfp_mask, order)
517-
#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
519+
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
520+
alloc_pages(gfp_mask, order)
521+
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
518522
alloc_pages(gfp_mask, order)
519523
#endif
520524
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
521525
#define alloc_page_vma(gfp_mask, vma, addr) \
522-
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
526+
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
523527
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
524-
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
528+
alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
525529

526530
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
527531
extern unsigned long get_zeroed_page(gfp_t gfp_mask);

mm/huge_memory.c

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -629,30 +629,30 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
629629
* available
630630
* never: never stall for any thp allocation
631631
*/
632-
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
632+
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
633633
{
634634
const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
635-
const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
636635

637636
/* Always do synchronous compaction */
638637
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
639-
return GFP_TRANSHUGE | __GFP_THISNODE |
640-
(vma_madvised ? 0 : __GFP_NORETRY);
638+
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
641639

642640
/* Kick kcompactd and fail quickly */
643641
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
644-
return gfp_mask | __GFP_KSWAPD_RECLAIM;
642+
return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
645643

646644
/* Synchronous compaction if madvised, otherwise kick kcompactd */
647645
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
648-
return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
649-
__GFP_KSWAPD_RECLAIM);
646+
return GFP_TRANSHUGE_LIGHT |
647+
(vma_madvised ? __GFP_DIRECT_RECLAIM :
648+
__GFP_KSWAPD_RECLAIM);
650649

651650
/* Only do synchronous compaction if madvised */
652651
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
653-
return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
652+
return GFP_TRANSHUGE_LIGHT |
653+
(vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
654654

655-
return gfp_mask;
655+
return GFP_TRANSHUGE_LIGHT;
656656
}
657657

658658
/* Caller must hold page table lock. */
@@ -724,8 +724,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
724724
pte_free(vma->vm_mm, pgtable);
725725
return ret;
726726
}
727-
gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
728-
page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
727+
gfp = alloc_hugepage_direct_gfpmask(vma);
728+
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
729729
if (unlikely(!page)) {
730730
count_vm_event(THP_FAULT_FALLBACK);
731731
return VM_FAULT_FALLBACK;
@@ -1295,9 +1295,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
12951295
alloc:
12961296
if (transparent_hugepage_enabled(vma) &&
12971297
!transparent_hugepage_debug_cow()) {
1298-
huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
1299-
new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
1300-
haddr, numa_node_id());
1298+
huge_gfp = alloc_hugepage_direct_gfpmask(vma);
1299+
new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
13011300
} else
13021301
new_page = NULL;
13031302

mm/mempolicy.c

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
11161116
} else if (PageTransHuge(page)) {
11171117
struct page *thp;
11181118

1119-
thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
1120-
address, numa_node_id());
1119+
thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
1120+
HPAGE_PMD_ORDER);
11211121
if (!thp)
11221122
return NULL;
11231123
prep_transhuge_page(thp);
@@ -2011,6 +2011,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
20112011
* @vma: Pointer to VMA or NULL if not available.
20122012
* @addr: Virtual Address of the allocation. Must be inside the VMA.
20132013
* @node: Which node to prefer for allocation (modulo policy).
2014+
* @hugepage: for hugepages try only the preferred node if possible
20142015
*
20152016
* This function allocates a page from the kernel page pool and applies
20162017
* a NUMA policy associated with the VMA or the current process.
@@ -2021,7 +2022,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
20212022
*/
20222023
struct page *
20232024
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2024-
unsigned long addr, int node)
2025+
unsigned long addr, int node, bool hugepage)
20252026
{
20262027
struct mempolicy *pol;
20272028
struct page *page;
@@ -2039,6 +2040,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
20392040
goto out;
20402041
}
20412042

2043+
if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
2044+
int hpage_node = node;
2045+
2046+
/*
2047+
* For hugepage allocation and non-interleave policy which
2048+
* allows the current node (or other explicitly preferred
2049+
* node) we only try to allocate from the current/preferred
2050+
* node and don't fall back to other nodes, as the cost of
2051+
* remote accesses would likely offset THP benefits.
2052+
*
2053+
* If the policy is interleave, or does not allow the current
2054+
* node in its nodemask, we allocate the standard way.
2055+
*/
2056+
if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
2057+
hpage_node = pol->v.preferred_node;
2058+
2059+
nmask = policy_nodemask(gfp, pol);
2060+
if (!nmask || node_isset(hpage_node, *nmask)) {
2061+
mpol_cond_put(pol);
2062+
page = __alloc_pages_node(hpage_node,
2063+
gfp | __GFP_THISNODE, order);
2064+
goto out;
2065+
}
2066+
}
2067+
20422068
nmask = policy_nodemask(gfp, pol);
20432069
preferred_nid = policy_node(gfp, pol, node);
20442070
page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);

mm/shmem.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
14391439

14401440
shmem_pseudo_vma_init(&pvma, info, hindex);
14411441
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
1442-
HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
1442+
HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
14431443
shmem_pseudo_vma_destroy(&pvma);
14441444
if (page)
14451445
prep_transhuge_page(page);

0 commit comments

Comments
 (0)