Skip to content

Commit 92717d4

Browse files
aagittorvalds
authored andcommitted
Revert "Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask""
Patch series "reapply: relax __GFP_THISNODE for MADV_HUGEPAGE mappings". The fixes for what was originally reported as "pathological THP behavior" we rightfully reverted to be sure not to introduced regressions at end of a merge window after a severe regression report from the kernel bot. We can safely re-apply them now that we had time to analyze the problem. The mm process worked fine, because the good fixes were eventually committed upstream without excessive delay. The regression reported by the kernel bot however forced us to revert the good fixes to be sure not to introduce regressions and to give us the time to analyze the issue further. The silver lining is that this extra time allowed to think more at this issue and also plan for a future direction to improve things further in terms of THP NUMA locality. This patch (of 2): This reverts commit 356ff8a ("Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"). So it reapplies 89c83fb ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"). Consolidation of the THP allocation flags at the same place was meant to be a clean up to easier handle otherwise scattered code which is imposing a maintenance burden. There were no real problems observed with the gfp mask consolidation but the reversion was rushed through without a larger consensus regardless. This patch brings the consolidation back because this should make the long term maintainability easier as well as it should allow future changes to be less error prone. [[email protected]: changelog additions] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Andrea Arcangeli <[email protected]> Acked-by: Michal Hocko <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: David Rientjes <[email protected]> Cc: Zi Yan <[email protected]> Cc: Stefan Priebe - Profihost AG <[email protected]> Cc: "Kirill A. Shutemov" <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 0cfaee2 commit 92717d4

File tree

4 files changed

+22
-51
lines changed

4 files changed

+22
-51
lines changed

include/linux/gfp.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
510510
}
511511
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
512512
struct vm_area_struct *vma, unsigned long addr,
513-
int node, bool hugepage);
514-
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
515-
alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
513+
int node);
516514
#else
517515
#define alloc_pages(gfp_mask, order) \
518516
alloc_pages_node(numa_node_id(), gfp_mask, order)
519-
#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
520-
alloc_pages(gfp_mask, order)
521-
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
517+
#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
522518
alloc_pages(gfp_mask, order)
523519
#endif
524520
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
525521
#define alloc_page_vma(gfp_mask, vma, addr) \
526-
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
522+
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
527523
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
528-
alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
524+
alloc_pages_vma(gfp_mask, 0, vma, addr, node)
529525

530526
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
531527
extern unsigned long get_zeroed_page(gfp_t gfp_mask);

mm/huge_memory.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -644,30 +644,30 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
644644
* available
645645
* never: never stall for any thp allocation
646646
*/
647-
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
647+
static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
648648
{
649649
const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
650+
const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
650651

651652
/* Always do synchronous compaction */
652653
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
653-
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
654+
return GFP_TRANSHUGE | __GFP_THISNODE |
655+
(vma_madvised ? 0 : __GFP_NORETRY);
654656

655657
/* Kick kcompactd and fail quickly */
656658
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
657-
return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
659+
return gfp_mask | __GFP_KSWAPD_RECLAIM;
658660

659661
/* Synchronous compaction if madvised, otherwise kick kcompactd */
660662
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
661-
return GFP_TRANSHUGE_LIGHT |
662-
(vma_madvised ? __GFP_DIRECT_RECLAIM :
663-
__GFP_KSWAPD_RECLAIM);
663+
return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
664+
__GFP_KSWAPD_RECLAIM);
664665

665666
/* Only do synchronous compaction if madvised */
666667
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
667-
return GFP_TRANSHUGE_LIGHT |
668-
(vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
668+
return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
669669

670-
return GFP_TRANSHUGE_LIGHT;
670+
return gfp_mask;
671671
}
672672

673673
/* Caller must hold page table lock. */
@@ -739,8 +739,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
739739
pte_free(vma->vm_mm, pgtable);
740740
return ret;
741741
}
742-
gfp = alloc_hugepage_direct_gfpmask(vma);
743-
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
742+
gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
743+
page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
744744
if (unlikely(!page)) {
745745
count_vm_event(THP_FAULT_FALLBACK);
746746
return VM_FAULT_FALLBACK;
@@ -1347,8 +1347,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
13471347
alloc:
13481348
if (__transparent_hugepage_enabled(vma) &&
13491349
!transparent_hugepage_debug_cow()) {
1350-
huge_gfp = alloc_hugepage_direct_gfpmask(vma);
1351-
new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
1350+
huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
1351+
new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
1352+
haddr, numa_node_id());
13521353
} else
13531354
new_page = NULL;
13541355

mm/mempolicy.c

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
11801180
} else if (PageTransHuge(page)) {
11811181
struct page *thp;
11821182

1183-
thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
1184-
HPAGE_PMD_ORDER);
1183+
thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
1184+
address, numa_node_id());
11851185
if (!thp)
11861186
return NULL;
11871187
prep_transhuge_page(thp);
@@ -2083,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
20832083
* @vma: Pointer to VMA or NULL if not available.
20842084
* @addr: Virtual Address of the allocation. Must be inside the VMA.
20852085
* @node: Which node to prefer for allocation (modulo policy).
2086-
* @hugepage: for hugepages try only the preferred node if possible
20872086
*
20882087
* This function allocates a page from the kernel page pool and applies
20892088
* a NUMA policy associated with the VMA or the current process.
@@ -2094,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
20942093
*/
20952094
struct page *
20962095
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2097-
unsigned long addr, int node, bool hugepage)
2096+
unsigned long addr, int node)
20982097
{
20992098
struct mempolicy *pol;
21002099
struct page *page;
@@ -2112,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
21122111
goto out;
21132112
}
21142113

2115-
if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
2116-
int hpage_node = node;
2117-
2118-
/*
2119-
* For hugepage allocation and non-interleave policy which
2120-
* allows the current node (or other explicitly preferred
2121-
* node) we only try to allocate from the current/preferred
2122-
* node and don't fall back to other nodes, as the cost of
2123-
* remote accesses would likely offset THP benefits.
2124-
*
2125-
* If the policy is interleave, or does not allow the current
2126-
* node in its nodemask, we allocate the standard way.
2127-
*/
2128-
if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
2129-
hpage_node = pol->v.preferred_node;
2130-
2131-
nmask = policy_nodemask(gfp, pol);
2132-
if (!nmask || node_isset(hpage_node, *nmask)) {
2133-
mpol_cond_put(pol);
2134-
page = __alloc_pages_node(hpage_node,
2135-
gfp | __GFP_THISNODE, order);
2136-
goto out;
2137-
}
2138-
}
2139-
21402114
nmask = policy_nodemask(gfp, pol);
21412115
preferred_nid = policy_node(gfp, pol, node);
21422116
page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);

mm/shmem.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1466,7 +1466,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
14661466

14671467
shmem_pseudo_vma_init(&pvma, info, hindex);
14681468
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
1469-
HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
1469+
HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
14701470
shmem_pseudo_vma_destroy(&pvma);
14711471
if (page)
14721472
prep_transhuge_page(page);

0 commit comments

Comments
 (0)