Skip to content

Commit f0ab773

Browse files
committed
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "13 fixes" * emailed patches from Andrew Morton <[email protected]>: rbtree: include rcu.h scripts/faddr2line: fix error when addr2line output contains discriminator ocfs2: take inode cluster lock before moving reflinked inode from orphan dir mm, oom: fix concurrent munlock and oom reaper unmap, v3 mm: migrate: fix double call of radix_tree_replace_slot() proc/kcore: don't bounds check against address 0 mm: don't show nr_indirectly_reclaimable in /proc/vmstat mm: sections are not offlined during memory hotremove z3fold: fix reclaim lock-ups init: fix false positives in W+X checking lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit() KASAN: prohibit KASAN+STRUCTLEAK combination MAINTAINERS: update Shuah's email address
2 parents 4bc8719 + 2075b16 commit f0ab773

File tree

17 files changed

+164
-87
lines changed

17 files changed

+164
-87
lines changed

MAINTAINERS

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c
36913691

36923692
CPU POWER MONITORING SUBSYSTEM
36933693
M: Thomas Renninger <[email protected]>
3694-
M: Shuah Khan <[email protected]>
36953694
M: Shuah Khan <[email protected]>
36963695
36973696
S: Maintained
@@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/
76967695
F: include/uapi/linux/sunrpc/
76977696

76987697
KERNEL SELFTEST FRAMEWORK
7699-
M: Shuah Khan <[email protected]>
77007698
M: Shuah Khan <[email protected]>
77017699
77027700
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
@@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c
1465014648

1465114649
USB OVER IP DRIVER
1465214650
M: Valentina Manea <[email protected]>
14653-
M: Shuah Khan <[email protected]>
1465414651
M: Shuah Khan <[email protected]>
1465514652
1465614653
S: Maintained

arch/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY
464464
config GCC_PLUGIN_STRUCTLEAK
465465
bool "Force initialization of variables containing userspace addresses"
466466
depends on GCC_PLUGINS
467+
# Currently STRUCTLEAK inserts initialization out of live scope of
468+
# variables from KASAN point of view. This leads to KASAN false
469+
# positive reports. Prohibit this combination for now.
470+
depends on !KASAN_EXTRA
467471
help
468472
This plugin zero-initializes any structures containing a
469473
__user attribute. This can prevent some classes of information

fs/ocfs2/refcounttree.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4250,10 +4250,11 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
42504250
static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
42514251
struct dentry *new_dentry, bool preserve)
42524252
{
4253-
int error;
4253+
int error, had_lock;
42544254
struct inode *inode = d_inode(old_dentry);
42554255
struct buffer_head *old_bh = NULL;
42564256
struct inode *new_orphan_inode = NULL;
4257+
struct ocfs2_lock_holder oh;
42574258

42584259
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
42594260
return -EOPNOTSUPP;
@@ -4295,21 +4296,30 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
42954296
goto out;
42964297
}
42974298

4299+
had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
4300+
&oh);
4301+
if (had_lock < 0) {
4302+
error = had_lock;
4303+
mlog_errno(error);
4304+
goto out;
4305+
}
4306+
42984307
/* If the security isn't preserved, we need to re-initialize them. */
42994308
if (!preserve) {
43004309
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
43014310
&new_dentry->d_name);
43024311
if (error)
43034312
mlog_errno(error);
43044313
}
4305-
out:
43064314
if (!error) {
43074315
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
43084316
new_dentry);
43094317
if (error)
43104318
mlog_errno(error);
43114319
}
4320+
ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
43124321

4322+
out:
43134323
if (new_orphan_inode) {
43144324
/*
43154325
* We need to open_unlock the inode no matter whether we

fs/proc/kcore.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
209209
{
210210
struct list_head *head = (struct list_head *)arg;
211211
struct kcore_list *ent;
212+
struct page *p;
213+
214+
if (!pfn_valid(pfn))
215+
return 1;
216+
217+
p = pfn_to_page(pfn);
218+
if (!memmap_valid_within(pfn, p, page_zone(p)))
219+
return 1;
212220

213221
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
214222
if (!ent)
215223
return -ENOMEM;
216-
ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
224+
ent->addr = (unsigned long)page_to_virt(p);
217225
ent->size = nr_pages << PAGE_SHIFT;
218226

219-
/* Sanity check: Can happen in 32bit arch...maybe */
220-
if (ent->addr < (unsigned long) __va(0))
227+
if (!virt_addr_valid(ent->addr))
221228
goto free_out;
222229

223230
/* cut not-mapped area. ....from ppc-32 code. */
224231
if (ULONG_MAX - ent->addr < ent->size)
225232
ent->size = ULONG_MAX - ent->addr;
226233

227-
/* cut when vmalloc() area is higher than direct-map area */
228-
if (VMALLOC_START > (unsigned long)__va(0)) {
229-
if (ent->addr > VMALLOC_START)
230-
goto free_out;
234+
/*
235+
* We've already checked virt_addr_valid so we know this address
236+
* is a valid pointer, therefore we can check against it to determine
237+
* if we need to trim
238+
*/
239+
if (VMALLOC_START > ent->addr) {
231240
if (VMALLOC_START - ent->addr < ent->size)
232241
ent->size = VMALLOC_START - ent->addr;
233242
}

include/linux/oom.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
9595
return 0;
9696
}
9797

98+
void __oom_reap_task_mm(struct mm_struct *mm);
99+
98100
extern unsigned long oom_badness(struct task_struct *p,
99101
struct mem_cgroup *memcg, const nodemask_t *nodemask,
100102
unsigned long totalpages);

include/linux/rbtree_augmented.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <linux/compiler.h>
2828
#include <linux/rbtree.h>
29+
#include <linux/rcupdate.h>
2930

3031
/*
3132
* Please note - only struct rb_augment_callbacks and the prototypes for

include/linux/rbtree_latch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
#include <linux/rbtree.h>
3737
#include <linux/seqlock.h>
38+
#include <linux/rcupdate.h>
3839

3940
struct latch_tree_node {
4041
struct rb_node node[2];

init/main.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata);
10341034
static void mark_readonly(void)
10351035
{
10361036
if (rodata_enabled) {
1037+
/*
1038+
* load_module() results in W+X mappings, which are cleaned up
1039+
* with call_rcu_sched(). Let's make sure that queued work is
1040+
* flushed so that we don't hit false positives looking for
1041+
* insecure pages which are W+X.
1042+
*/
1043+
rcu_barrier_sched();
10371044
mark_rodata_ro();
10381045
rodata_test();
10391046
} else

kernel/module.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
35173517
* walking this with preempt disabled. In all the failure paths, we
35183518
* call synchronize_sched(), but we don't want to slow down the success
35193519
* path, so use actual RCU here.
3520+
* Note that module_alloc() on most architectures creates W+X page
3521+
* mappings which won't be cleaned up until do_free_init() runs. Any
3522+
* code such as mark_rodata_ro() which depends on those mappings to
3523+
* be cleaned up needs to sync with the queued work - ie
3524+
* rcu_barrier_sched()
35203525
*/
35213526
call_rcu_sched(&freeinit->rcu, do_free_init);
35223527
mutex_unlock(&module_mutex);

lib/find_bit_benchmark.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,12 @@ static int __init find_bit_test(void)
132132
test_find_next_bit(bitmap, BITMAP_LEN);
133133
test_find_next_zero_bit(bitmap, BITMAP_LEN);
134134
test_find_last_bit(bitmap, BITMAP_LEN);
135-
test_find_first_bit(bitmap, BITMAP_LEN);
135+
136+
/*
137+
* test_find_first_bit() may take some time, so
138+
* traverse only part of bitmap to avoid soft lockup.
139+
*/
140+
test_find_first_bit(bitmap, BITMAP_LEN / 10);
136141
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
137142

138143
pr_err("\nStart testing find_bit() with sparse bitmap\n");

mm/migrate.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
528528
int i;
529529
int index = page_index(page);
530530

531-
for (i = 0; i < HPAGE_PMD_NR; i++) {
531+
for (i = 1; i < HPAGE_PMD_NR; i++) {
532532
pslot = radix_tree_lookup_slot(&mapping->i_pages,
533533
index + i);
534534
radix_tree_replace_slot(&mapping->i_pages, pslot,
535535
newpage + i);
536536
}
537-
} else {
538-
radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
539537
}
540538

541539
/*

mm/mmap.c

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm)
30563056
/* mm's last user has gone, and its about to be pulled down */
30573057
mmu_notifier_release(mm);
30583058

3059+
if (unlikely(mm_is_oom_victim(mm))) {
3060+
/*
3061+
* Manually reap the mm to free as much memory as possible.
3062+
* Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
3063+
* this mm from further consideration. Taking mm->mmap_sem for
3064+
* write after setting MMF_OOM_SKIP will guarantee that the oom
3065+
* reaper will not run on this mm again after mmap_sem is
3066+
* dropped.
3067+
*
3068+
* Nothing can be holding mm->mmap_sem here and the above call
3069+
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
3070+
* __oom_reap_task_mm() will not block.
3071+
*
3072+
* This needs to be done before calling munlock_vma_pages_all(),
3073+
* which clears VM_LOCKED, otherwise the oom reaper cannot
3074+
* reliably test it.
3075+
*/
3076+
mutex_lock(&oom_lock);
3077+
__oom_reap_task_mm(mm);
3078+
mutex_unlock(&oom_lock);
3079+
3080+
set_bit(MMF_OOM_SKIP, &mm->flags);
3081+
down_write(&mm->mmap_sem);
3082+
up_write(&mm->mmap_sem);
3083+
}
3084+
30593085
if (mm->locked_vm) {
30603086
vma = mm->mmap;
30613087
while (vma) {
@@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm)
30773103
/* update_hiwater_rss(mm) here? but nobody should be looking */
30783104
/* Use -1 here to ensure all VMAs in the mm are unmapped */
30793105
unmap_vmas(&tlb, vma, 0, -1);
3080-
3081-
if (unlikely(mm_is_oom_victim(mm))) {
3082-
/*
3083-
* Wait for oom_reap_task() to stop working on this
3084-
* mm. Because MMF_OOM_SKIP is already set before
3085-
* calling down_read(), oom_reap_task() will not run
3086-
* on this "mm" post up_write().
3087-
*
3088-
* mm_is_oom_victim() cannot be set from under us
3089-
* either because victim->mm is already set to NULL
3090-
* under task_lock before calling mmput and oom_mm is
3091-
* set not NULL by the OOM killer only if victim->mm
3092-
* is found not NULL while holding the task_lock.
3093-
*/
3094-
set_bit(MMF_OOM_SKIP, &mm->flags);
3095-
down_write(&mm->mmap_sem);
3096-
up_write(&mm->mmap_sem);
3097-
}
30983106
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
30993107
tlb_finish_mmu(&tlb, 0, -1);
31003108

mm/oom_kill.c

Lines changed: 43 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
469469
return false;
470470
}
471471

472-
473472
#ifdef CONFIG_MMU
474473
/*
475474
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
480479
static struct task_struct *oom_reaper_list;
481480
static DEFINE_SPINLOCK(oom_reaper_lock);
482481

483-
static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
482+
void __oom_reap_task_mm(struct mm_struct *mm)
484483
{
485-
struct mmu_gather tlb;
486484
struct vm_area_struct *vma;
485+
486+
/*
487+
* Tell all users of get_user/copy_from_user etc... that the content
488+
* is no longer stable. No barriers really needed because unmapping
489+
* should imply barriers already and the reader would hit a page fault
490+
* if it stumbled over a reaped memory.
491+
*/
492+
set_bit(MMF_UNSTABLE, &mm->flags);
493+
494+
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
495+
if (!can_madv_dontneed_vma(vma))
496+
continue;
497+
498+
/*
499+
* Only anonymous pages have a good chance to be dropped
500+
* without additional steps which we cannot afford as we
501+
* are OOM already.
502+
*
503+
* We do not even care about fs backed pages because all
504+
* which are reclaimable have already been reclaimed and
505+
* we do not want to block exit_mmap by keeping mm ref
506+
* count elevated without a good reason.
507+
*/
508+
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
509+
const unsigned long start = vma->vm_start;
510+
const unsigned long end = vma->vm_end;
511+
struct mmu_gather tlb;
512+
513+
tlb_gather_mmu(&tlb, mm, start, end);
514+
mmu_notifier_invalidate_range_start(mm, start, end);
515+
unmap_page_range(&tlb, vma, start, end, NULL);
516+
mmu_notifier_invalidate_range_end(mm, start, end);
517+
tlb_finish_mmu(&tlb, start, end);
518+
}
519+
}
520+
}
521+
522+
static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
523+
{
487524
bool ret = true;
488525

489526
/*
490527
* We have to make sure to not race with the victim exit path
491528
* and cause premature new oom victim selection:
492-
* __oom_reap_task_mm exit_mm
529+
* oom_reap_task_mm exit_mm
493530
* mmget_not_zero
494531
* mmput
495532
* atomic_dec_and_test
@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
534571

535572
trace_start_task_reaping(tsk->pid);
536573

537-
/*
538-
* Tell all users of get_user/copy_from_user etc... that the content
539-
* is no longer stable. No barriers really needed because unmapping
540-
* should imply barriers already and the reader would hit a page fault
541-
* if it stumbled over a reaped memory.
542-
*/
543-
set_bit(MMF_UNSTABLE, &mm->flags);
544-
545-
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
546-
if (!can_madv_dontneed_vma(vma))
547-
continue;
574+
__oom_reap_task_mm(mm);
548575

549-
/*
550-
* Only anonymous pages have a good chance to be dropped
551-
* without additional steps which we cannot afford as we
552-
* are OOM already.
553-
*
554-
* We do not even care about fs backed pages because all
555-
* which are reclaimable have already been reclaimed and
556-
* we do not want to block exit_mmap by keeping mm ref
557-
* count elevated without a good reason.
558-
*/
559-
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
560-
const unsigned long start = vma->vm_start;
561-
const unsigned long end = vma->vm_end;
562-
563-
tlb_gather_mmu(&tlb, mm, start, end);
564-
mmu_notifier_invalidate_range_start(mm, start, end);
565-
unmap_page_range(&tlb, vma, start, end, NULL);
566-
mmu_notifier_invalidate_range_end(mm, start, end);
567-
tlb_finish_mmu(&tlb, start, end);
568-
}
569-
}
570576
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
571577
task_pid_nr(tsk), tsk->comm,
572578
K(get_mm_counter(mm, MM_ANONPAGES)),
@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
587593
struct mm_struct *mm = tsk->signal->oom_mm;
588594

589595
/* Retry the down_read_trylock(mmap_sem) a few times */
590-
while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
596+
while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
591597
schedule_timeout_idle(HZ/10);
592598

593599
if (attempts <= MAX_OOM_REAP_RETRIES ||
594600
test_bit(MMF_OOM_SKIP, &mm->flags))
595601
goto done;
596602

597-
598603
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
599604
task_pid_nr(tsk), tsk->comm);
600605
debug_show_all_locks();

mm/sparse.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
629629
unsigned long pfn;
630630

631631
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
632-
unsigned long section_nr = pfn_to_section_nr(start_pfn);
632+
unsigned long section_nr = pfn_to_section_nr(pfn);
633633
struct mem_section *ms;
634634

635635
/*

0 commit comments

Comments
 (0)