Skip to content

Commit f36b753

Browse files
committed
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "13 fixes" * emailed patches from Andrew Morton <[email protected]>: mm, thp: do not cause memcg oom for thp mm/vmscan: wake up flushers for legacy cgroups too Revert "mm: page_alloc: skip over regions of invalid pfns where possible" mm/shmem: do not wait for lock_page() in shmem_unused_huge_shrink() mm/thp: do not wait for lock_page() in deferred_split_scan() mm/khugepaged.c: convert VM_BUG_ON() to collapse fail x86/mm: implement free pmd/pte page interfaces mm/vmalloc: add interfaces to free unmapped page table h8300: remove extraneous __BIG_ENDIAN definition hugetlbfs: check for pgoff value overflow lockdep: fix fs_reclaim warning MAINTAINERS: update Mark Fasheh's e-mail mm/mempolicy.c: avoid use uninitialized preferred_node
2 parents 8401c72 + 9d3c335 commit f36b753

File tree

16 files changed

+153
-79
lines changed

16 files changed

+153
-79
lines changed

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10334,7 +10334,7 @@ F: drivers/oprofile/
1033410334
F: include/linux/oprofile.h
1033510335

1033610336
ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
10337-
M: Mark Fasheh <mfasheh@versity.com>
10337+
M: Mark Fasheh <mark@fasheh.com>
1033810338
M: Joel Becker <[email protected]>
1033910339
L: [email protected] (moderated for non-subscribers)
1034010340
W: http://ocfs2.wiki.kernel.org

arch/arm64/mm/mmu.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,3 +972,13 @@ int pmd_clear_huge(pmd_t *pmdp)
972972
pmd_clear(pmdp);
973973
return 1;
974974
}
975+
976+
int pud_free_pmd_page(pud_t *pud)
977+
{
978+
return pud_none(*pud);
979+
}
980+
981+
int pmd_free_pte_page(pmd_t *pmd)
982+
{
983+
return pmd_none(*pmd);
984+
}

arch/h8300/include/asm/byteorder.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#ifndef __H8300_BYTEORDER_H__
33
#define __H8300_BYTEORDER_H__
44

5-
#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
65
#include <linux/byteorder/big_endian.h>
76

87
#endif

arch/x86/mm/pgtable.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd)
702702

703703
return 0;
704704
}
705+
706+
/**
707+
* pud_free_pmd_page - Clear pud entry and free pmd page.
708+
* @pud: Pointer to a PUD.
709+
*
710+
* Context: The pud range has been unmaped and TLB purged.
711+
* Return: 1 if clearing the entry succeeded. 0 otherwise.
712+
*/
713+
int pud_free_pmd_page(pud_t *pud)
714+
{
715+
pmd_t *pmd;
716+
int i;
717+
718+
if (pud_none(*pud))
719+
return 1;
720+
721+
pmd = (pmd_t *)pud_page_vaddr(*pud);
722+
723+
for (i = 0; i < PTRS_PER_PMD; i++)
724+
if (!pmd_free_pte_page(&pmd[i]))
725+
return 0;
726+
727+
pud_clear(pud);
728+
free_page((unsigned long)pmd);
729+
730+
return 1;
731+
}
732+
733+
/**
734+
* pmd_free_pte_page - Clear pmd entry and free pte page.
735+
* @pmd: Pointer to a PMD.
736+
*
737+
* Context: The pmd range has been unmaped and TLB purged.
738+
* Return: 1 if clearing the entry succeeded. 0 otherwise.
739+
*/
740+
int pmd_free_pte_page(pmd_t *pmd)
741+
{
742+
pte_t *pte;
743+
744+
if (pmd_none(*pmd))
745+
return 1;
746+
747+
pte = (pte_t *)pmd_page_vaddr(*pmd);
748+
pmd_clear(pmd);
749+
free_page((unsigned long)pte);
750+
751+
return 1;
752+
}
705753
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */

fs/hugetlbfs/inode.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
108108
pagevec_reinit(pvec);
109109
}
110110

111+
/*
112+
* Mask used when checking the page offset value passed in via system
113+
* calls. This value will be converted to a loff_t which is signed.
114+
* Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
115+
* value. The extra bit (- 1 in the shift value) is to take the sign
116+
* bit into account.
117+
*/
118+
#define PGOFF_LOFFT_MAX \
119+
(((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
120+
111121
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
112122
{
113123
struct inode *inode = file_inode(file);
@@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
127137
vma->vm_ops = &hugetlb_vm_ops;
128138

129139
/*
130-
* Offset passed to mmap (before page shift) could have been
131-
* negative when represented as a (l)off_t.
140+
* page based offset in vm_pgoff could be sufficiently large to
141+
* overflow a (l)off_t when converted to byte offset.
132142
*/
133-
if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
143+
if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
134144
return -EINVAL;
135145

146+
/* must be huge page aligned */
136147
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
137148
return -EINVAL;
138149

include/asm-generic/pgtable.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
983983
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
984984
int pud_clear_huge(pud_t *pud);
985985
int pmd_clear_huge(pmd_t *pmd);
986+
int pud_free_pmd_page(pud_t *pud);
987+
int pmd_free_pte_page(pmd_t *pmd);
986988
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
987989
static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
988990
{
@@ -1008,6 +1010,14 @@ static inline int pmd_clear_huge(pmd_t *pmd)
10081010
{
10091011
return 0;
10101012
}
1013+
static inline int pud_free_pmd_page(pud_t *pud)
1014+
{
1015+
return 0;
1016+
}
1017+
static inline int pmd_free_pte_page(pmd_t *pmd)
1018+
{
1019+
return 0;
1020+
}
10111021
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
10121022

10131023
#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE

include/linux/memblock.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,6 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
187187
unsigned long *end_pfn);
188188
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
189189
unsigned long *out_end_pfn, int *out_nid);
190-
unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
191190

192191
/**
193192
* for_each_mem_pfn_range - early memory pfn range iterator

lib/ioremap.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
9191

9292
if (ioremap_pmd_enabled() &&
9393
((next - addr) == PMD_SIZE) &&
94-
IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
94+
IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
95+
pmd_free_pte_page(pmd)) {
9596
if (pmd_set_huge(pmd, phys_addr + addr, prot))
9697
continue;
9798
}
@@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
117118

118119
if (ioremap_pud_enabled() &&
119120
((next - addr) == PUD_SIZE) &&
120-
IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
121+
IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
122+
pud_free_pmd_page(pud)) {
121123
if (pud_set_huge(pud, phys_addr + addr, prot))
122124
continue;
123125
}

mm/huge_memory.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
555555

556556
VM_BUG_ON_PAGE(!PageCompound(page), page);
557557

558-
if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
558+
if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
559+
true)) {
559560
put_page(page);
560561
count_vm_event(THP_FAULT_FALLBACK);
561562
return VM_FAULT_FALLBACK;
@@ -1316,7 +1317,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
13161317
}
13171318

13181319
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
1319-
huge_gfp, &memcg, true))) {
1320+
huge_gfp | __GFP_NORETRY, &memcg, true))) {
13201321
put_page(new_page);
13211322
split_huge_pmd(vma, vmf->pmd, vmf->address);
13221323
if (page)
@@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
27832784

27842785
list_for_each_safe(pos, next, &list) {
27852786
page = list_entry((void *)pos, struct page, mapping);
2786-
lock_page(page);
2787+
if (!trylock_page(page))
2788+
goto next;
27872789
/* split_huge_page() removes page from list on success */
27882790
if (!split_huge_page(page))
27892791
split++;
27902792
unlock_page(page);
2793+
next:
27912794
put_page(page);
27922795
}
27932796

mm/hugetlb.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/bootmem.h>
1919
#include <linux/sysfs.h>
2020
#include <linux/slab.h>
21+
#include <linux/mmdebug.h>
2122
#include <linux/sched/signal.h>
2223
#include <linux/rmap.h>
2324
#include <linux/string_helpers.h>
@@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode,
43744375
struct resv_map *resv_map;
43754376
long gbl_reserve;
43764377

4378+
/* This should never happen */
4379+
if (from > to) {
4380+
VM_WARN(1, "%s called with a negative range\n", __func__);
4381+
return -EINVAL;
4382+
}
4383+
43774384
/*
43784385
* Only apply hugepage reservation if asked. At fault time, an
43794386
* attempt will be made for VM_NORESERVE to allocate a page

mm/khugepaged.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
530530
goto out;
531531
}
532532

533-
VM_BUG_ON_PAGE(PageCompound(page), page);
533+
/* TODO: teach khugepaged to collapse THP mapped with pte */
534+
if (PageCompound(page)) {
535+
result = SCAN_PAGE_COMPOUND;
536+
goto out;
537+
}
538+
534539
VM_BUG_ON_PAGE(!PageAnon(page), page);
535540

536541
/*
@@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm,
960965
goto out_nolock;
961966
}
962967

963-
if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
968+
/* Do not oom kill for khugepaged charges */
969+
if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
970+
&memcg, true))) {
964971
result = SCAN_CGROUP_CHARGE_FAIL;
965972
goto out_nolock;
966973
}
@@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm,
13191326
goto out;
13201327
}
13211328

1322-
if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
1329+
/* Do not oom kill for khugepaged charges */
1330+
if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
1331+
&memcg, true))) {
13231332
result = SCAN_CGROUP_CHARGE_FAIL;
13241333
goto out;
13251334
}

mm/memblock.c

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
11011101
*out_nid = r->nid;
11021102
}
11031103

1104-
unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
1105-
unsigned long max_pfn)
1106-
{
1107-
struct memblock_type *type = &memblock.memory;
1108-
unsigned int right = type->cnt;
1109-
unsigned int mid, left = 0;
1110-
phys_addr_t addr = PFN_PHYS(++pfn);
1111-
1112-
do {
1113-
mid = (right + left) / 2;
1114-
1115-
if (addr < type->regions[mid].base)
1116-
right = mid;
1117-
else if (addr >= (type->regions[mid].base +
1118-
type->regions[mid].size))
1119-
left = mid + 1;
1120-
else {
1121-
/* addr is within the region, so pfn is valid */
1122-
return pfn;
1123-
}
1124-
} while (left < right);
1125-
1126-
if (right == type->cnt)
1127-
return -1UL;
1128-
else
1129-
return PHYS_PFN(type->regions[right].base);
1130-
}
1131-
11321104
/**
11331105
* memblock_set_node - set node ID on memblock regions
11341106
* @base: base of area to set node ID for

mm/mempolicy.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
21242124
case MPOL_INTERLEAVE:
21252125
return !!nodes_equal(a->v.nodes, b->v.nodes);
21262126
case MPOL_PREFERRED:
2127+
/* a's ->flags is the same as b's */
2128+
if (a->flags & MPOL_F_LOCAL)
2129+
return true;
21272130
return a->v.preferred_node == b->v.preferred_node;
21282131
default:
21292132
BUG();

mm/page_alloc.c

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3596,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
35963596
return false;
35973597

35983598
/* this guy won't enter reclaim */
3599-
if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
3599+
if (current->flags & PF_MEMALLOC)
36003600
return false;
36013601

36023602
/* We're only interested __GFP_FS allocations for now */
@@ -5356,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
53565356
if (context != MEMMAP_EARLY)
53575357
goto not_early;
53585358

5359-
if (!early_pfn_valid(pfn)) {
5360-
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
5361-
/*
5362-
* Skip to the pfn preceding the next valid one (or
5363-
* end_pfn), such that we hit a valid pfn (or end_pfn)
5364-
* on our next iteration of the loop.
5365-
*/
5366-
pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
5367-
#endif
5359+
if (!early_pfn_valid(pfn))
53685360
continue;
5369-
}
53705361
if (!early_pfn_in_nid(pfn, nid))
53715362
continue;
53725363
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))

mm/shmem.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -493,36 +493,45 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
493493
info = list_entry(pos, struct shmem_inode_info, shrinklist);
494494
inode = &info->vfs_inode;
495495

496-
if (nr_to_split && split >= nr_to_split) {
497-
iput(inode);
498-
continue;
499-
}
496+
if (nr_to_split && split >= nr_to_split)
497+
goto leave;
500498

501-
page = find_lock_page(inode->i_mapping,
499+
page = find_get_page(inode->i_mapping,
502500
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
503501
if (!page)
504502
goto drop;
505503

504+
/* No huge page at the end of the file: nothing to split */
506505
if (!PageTransHuge(page)) {
507-
unlock_page(page);
508506
put_page(page);
509507
goto drop;
510508
}
511509

510+
/*
511+
* Leave the inode on the list if we failed to lock
512+
* the page at this time.
513+
*
514+
* Waiting for the lock may lead to deadlock in the
515+
* reclaim path.
516+
*/
517+
if (!trylock_page(page)) {
518+
put_page(page);
519+
goto leave;
520+
}
521+
512522
ret = split_huge_page(page);
513523
unlock_page(page);
514524
put_page(page);
515525

516-
if (ret) {
517-
/* split failed: leave it on the list */
518-
iput(inode);
519-
continue;
520-
}
526+
/* If split failed leave the inode on the list */
527+
if (ret)
528+
goto leave;
521529

522530
split++;
523531
drop:
524532
list_del_init(&info->shrinklist);
525533
removed++;
534+
leave:
526535
iput(inode);
527536
}
528537

0 commit comments

Comments
 (0)