Skip to content

Commit 8ef6fd0

Browse files
committed
Merge branch 'mm-hotfixes-stable' into mm-stable to pick up "mm: fix
crashes from deferred split racing folio migration", needed by "mm: migrate: split folio_migrate_mapping()".
2 parents 44195d1 + 1e3d28f commit 8ef6fd0

22 files changed

+323
-274
lines changed

.mailmap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ Li Yang <[email protected]> <[email protected]>
384384
385385
386386
387+
387388
388389
389390

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14476,7 +14476,7 @@ MEMORY MAPPING
1447614476
M: Andrew Morton <[email protected]>
1447714477
R: Liam R. Howlett <[email protected]>
1447814478
R: Vlastimil Babka <[email protected]>
14479-
R: Lorenzo Stoakes <lstoakes@gmail.com>
14479+
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
1448014480
1448114481
S: Maintained
1448214482
W: http://www.linux-mm.org

fs/nilfs2/dir.c

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,11 +383,39 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir,
383383

384384
struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)
385385
{
386-
struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);
386+
struct folio *folio;
387+
struct nilfs_dir_entry *de, *next_de;
388+
size_t limit;
389+
char *msg;
387390

391+
de = nilfs_get_folio(dir, 0, &folio);
388392
if (IS_ERR(de))
389393
return NULL;
390-
return nilfs_next_entry(de);
394+
395+
limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */
396+
if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||
397+
!nilfs_match(1, ".", de))) {
398+
msg = "missing '.'";
399+
goto fail;
400+
}
401+
402+
next_de = nilfs_next_entry(de);
403+
/*
404+
* If "next_de" has not reached the end of the chunk, there is
405+
* at least one more record. Check whether it matches "..".
406+
*/
407+
if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||
408+
!nilfs_match(2, "..", next_de))) {
409+
msg = "missing '..'";
410+
goto fail;
411+
}
412+
*foliop = folio;
413+
return next_de;
414+
415+
fail:
416+
nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);
417+
folio_release_kmap(folio, de);
418+
return NULL;
391419
}
392420

393421
ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)

fs/userfaultfd.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
20572057
goto out;
20582058
features = uffdio_api.features;
20592059
ret = -EINVAL;
2060-
if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
2060+
if (uffdio_api.api != UFFD_API)
20612061
goto err_out;
20622062
ret = -EPERM;
20632063
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
@@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
20812081
uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;
20822082
uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;
20832083
#endif
2084+
2085+
ret = -EINVAL;
2086+
if (features & ~uffdio_api.features)
2087+
goto err_out;
2088+
20842089
uffdio_api.ioctls = UFFD_API_IOCTLS;
20852090
ret = -EFAULT;
20862091
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))

include/linux/mmzone.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1981,8 +1981,9 @@ static inline int subsection_map_index(unsigned long pfn)
19811981
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
19821982
{
19831983
int idx = subsection_map_index(pfn);
1984+
struct mem_section_usage *usage = READ_ONCE(ms->usage);
19841985

1985-
return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
1986+
return usage ? test_bit(idx, usage->subsection_map) : 0;
19861987
}
19871988
#else
19881989
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)

include/linux/page_ref.h

Lines changed: 9 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio)
230230

231231
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
232232
{
233-
bool ret = atomic_add_unless(&page->_refcount, nr, u);
233+
bool ret = false;
234+
235+
rcu_read_lock();
236+
/* avoid writing to the vmemmap area being remapped */
237+
if (!page_is_fake_head(page) && page_ref_count(page) != u)
238+
ret = atomic_add_unless(&page->_refcount, nr, u);
239+
rcu_read_unlock();
234240

235241
if (page_ref_tracepoint_active(page_ref_mod_unless))
236242
__page_ref_mod_unless(page, nr, ret);
@@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio)
258264
return folio_ref_add_unless(folio, 1, 0);
259265
}
260266

261-
static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
262-
{
263-
#ifdef CONFIG_TINY_RCU
264-
/*
265-
* The caller guarantees the folio will not be freed from interrupt
266-
* context, so (on !SMP) we only need preemption to be disabled
267-
* and TINY_RCU does that for us.
268-
*/
269-
# ifdef CONFIG_PREEMPT_COUNT
270-
VM_BUG_ON(!in_atomic() && !irqs_disabled());
271-
# endif
272-
VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
273-
folio_ref_add(folio, count);
274-
#else
275-
if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
276-
/* Either the folio has been freed, or will be freed. */
277-
return false;
278-
}
279-
#endif
280-
return true;
281-
}
282-
283-
/**
284-
* folio_try_get_rcu - Attempt to increase the refcount on a folio.
285-
* @folio: The folio.
286-
*
287-
* This is a version of folio_try_get() optimised for non-SMP kernels.
288-
* If you are still holding the rcu_read_lock() after looking up the
289-
* page and know that the page cannot have its refcount decreased to
290-
* zero in interrupt context, you can use this instead of folio_try_get().
291-
*
292-
* Example users include get_user_pages_fast() (as pages are not unmapped
293-
* from interrupt context) and the page cache lookups (as pages are not
294-
* truncated from interrupt context). We also know that pages are not
295-
* frozen in interrupt context for the purposes of splitting or migration.
296-
*
297-
* You can also use this function if you're holding a lock that prevents
298-
* pages being frozen & removed; eg the i_pages lock for the page cache
299-
* or the mmap_lock or page table lock for page tables. In this case,
300-
* it will always succeed, and you could have used a plain folio_get(),
301-
* but it's sometimes more convenient to have a common function called
302-
* from both locked and RCU-protected contexts.
303-
*
304-
* Return: True if the reference count was successfully incremented.
305-
*/
306-
static inline bool folio_try_get_rcu(struct folio *folio)
267+
static inline bool folio_ref_try_add(struct folio *folio, int count)
307268
{
308-
return folio_ref_try_add_rcu(folio, 1);
269+
return folio_ref_add_unless(folio, count, 0);
309270
}
310271

311272
static inline int page_ref_freeze(struct page *page, int count)

include/linux/pagemap.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
354354
* a good order (that's 1MB if you're using 4kB pages)
355355
*/
356356
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
357-
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
357+
#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
358358
#else
359-
#define MAX_PAGECACHE_ORDER 8
359+
#define PREFERRED_MAX_PAGECACHE_ORDER 8
360360
#endif
361361

362+
/*
363+
* xas_split_alloc() does not support arbitrary orders. This implies no
364+
* 512MB THP on ARM64 with 64KB base page size.
365+
*/
366+
#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
367+
#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
368+
362369
/**
363370
* mapping_set_large_folios() - Indicate the file supports large folios.
364371
* @mapping: The file.

include/linux/swap.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
354354
}
355355

356356
/* linux/mm/workingset.c */
357-
bool workingset_test_recent(void *shadow, bool file, bool *workingset);
357+
bool workingset_test_recent(void *shadow, bool file, bool *workingset,
358+
bool flush);
358359
void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
359360
void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
360361
void workingset_refault(struct folio *folio, void *shadow);

lib/build_OID_registry

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ close IN_FILE || die;
3838
#
3939
open C_FILE, ">$ARGV[1]" or die;
4040
print C_FILE "/*\n";
41-
print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n";
41+
my $scriptname = $0;
42+
$scriptname =~ s#^\Q$abs_srctree/\E##;
43+
print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n";
4244
print C_FILE " */\n";
4345

4446
#

mm/damon/core.c

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1694,14 +1694,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
16941694
* access frequencies are similar. This is for minimizing the monitoring
16951695
* overhead under the dynamically changeable access pattern. If a merge was
16961696
* unnecessarily made, later 'kdamond_split_regions()' will revert it.
1697+
*
1698+
* The total number of regions could be higher than the user-defined limit,
1699+
* max_nr_regions for some cases. For example, the user can update
1700+
* max_nr_regions to a number that lower than the current number of regions
1701+
* while DAMON is running. For such a case, repeat merging until the limit is
1702+
* met while increasing @threshold up to possible maximum level.
16971703
*/
16981704
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
16991705
unsigned long sz_limit)
17001706
{
17011707
struct damon_target *t;
1702-
1703-
damon_for_each_target(t, c)
1704-
damon_merge_regions_of(t, threshold, sz_limit);
1708+
unsigned int nr_regions;
1709+
unsigned int max_thres;
1710+
1711+
max_thres = c->attrs.aggr_interval /
1712+
(c->attrs.sample_interval ? c->attrs.sample_interval : 1);
1713+
do {
1714+
nr_regions = 0;
1715+
damon_for_each_target(t, c) {
1716+
damon_merge_regions_of(t, threshold, sz_limit);
1717+
nr_regions += damon_nr_regions(t);
1718+
}
1719+
threshold = max(1, threshold * 2);
1720+
} while (nr_regions > c->attrs.max_nr_regions &&
1721+
threshold / 2 < max_thres);
17051722
}
17061723

17071724
/*

mm/filemap.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,7 +1847,7 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
18471847
if (!folio || xa_is_value(folio))
18481848
goto out;
18491849

1850-
if (!folio_try_get_rcu(folio))
1850+
if (!folio_try_get(folio))
18511851
goto repeat;
18521852

18531853
if (unlikely(folio != xas_reload(&xas))) {
@@ -2001,7 +2001,7 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
20012001
if (!folio || xa_is_value(folio))
20022002
return folio;
20032003

2004-
if (!folio_try_get_rcu(folio))
2004+
if (!folio_try_get(folio))
20052005
goto reset;
20062006

20072007
if (unlikely(folio != xas_reload(xas))) {
@@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
21812181
if (xa_is_value(folio))
21822182
goto update_start;
21832183

2184-
if (!folio_try_get_rcu(folio))
2184+
if (!folio_try_get(folio))
21852185
goto retry;
21862186

21872187
if (unlikely(folio != xas_reload(&xas)))
@@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping,
23132313
break;
23142314
if (xa_is_sibling(folio))
23152315
break;
2316-
if (!folio_try_get_rcu(folio))
2316+
if (!folio_try_get(folio))
23172317
goto retry;
23182318

23192319
if (unlikely(folio != xas_reload(&xas)))
@@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
31243124

31253125
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
31263126
/* Use the readahead code, even if readahead is disabled */
3127-
if (vm_flags & VM_HUGEPAGE) {
3127+
if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
31283128
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
31293129
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
31303130
ra->size = HPAGE_PMD_NR;
@@ -3472,7 +3472,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas,
34723472
continue;
34733473
if (folio_test_locked(folio))
34743474
continue;
3475-
if (!folio_try_get_rcu(folio))
3475+
if (!folio_try_get(folio))
34763476
continue;
34773477
/* Has the page moved or been split? */
34783478
if (unlikely(folio != xas_reload(xas)))
@@ -4248,6 +4248,9 @@ static void filemap_cachestat(struct address_space *mapping,
42484248
XA_STATE(xas, &mapping->i_pages, first_index);
42494249
struct folio *folio;
42504250

4251+
/* Flush stats (and potentially sleep) outside the RCU read section. */
4252+
mem_cgroup_flush_stats_ratelimited(NULL);
4253+
42514254
rcu_read_lock();
42524255
xas_for_each(&xas, folio, last_index) {
42534256
int order;
@@ -4311,7 +4314,7 @@ static void filemap_cachestat(struct address_space *mapping,
43114314
goto resched;
43124315
}
43134316
#endif
4314-
if (workingset_test_recent(shadow, true, &workingset))
4317+
if (workingset_test_recent(shadow, true, &workingset, false))
43154318
cs->nr_recently_evicted += nr_pages;
43164319

43174320
goto resched;

0 commit comments

Comments
 (0)