Skip to content

Commit e0a043a

Browse files
committed
mm, slub: protect put_cpu_partial() with disabled irqs instead of cmpxchg
Jann Horn reported [1] the following theoretically possible race: task A: put_cpu_partial() calls preempt_disable() task A: oldpage = this_cpu_read(s->cpu_slab->partial) interrupt: kfree() reaches unfreeze_partials() and discards the page task B (on another CPU): reallocates page as page cache task A: reads page->pages and page->pobjects, which are actually halves of the pointer page->lru.prev task B (on another CPU): frees page interrupt: allocates page as SLUB page and places it on the percpu partial list task A: this_cpu_cmpxchg() succeeds which would cause page->pages and page->pobjects to end up containing halves of pointers that would then influence when put_cpu_partial() happens and show up in root-only sysfs files. Maybe that's acceptable, I don't know. But there should probably at least be a comment for now to point out that we're reading union fields of a page that might be in a completely different state. Additionally, the this_cpu_cmpxchg() approach in put_cpu_partial() is only safe against s->cpu_slab->partial manipulation in ___slab_alloc() if the latter disables irqs, otherwise a __slab_free() in an irq handler could call put_cpu_partial() in the middle of ___slab_alloc() manipulating ->partial and corrupt it. This becomes an issue on RT after a local_lock is introduced in later patch. The fix means taking the local_lock also in put_cpu_partial() on RT. After debugging this issue, Mike Galbraith suggested [2] that to avoid different locking schemes on RT and !RT, we can just protect put_cpu_partial() with disabled irqs (to be converted to local_lock_irqsave() later) everywhere. This should be acceptable as it's not a fast path, and moving the actual partial unfreezing outside of the irq disabled section makes it short, and with the retry loop gone the code can be also simplified. In addition, the race reported by Jann should no longer be possible. [1] https://lore.kernel.org/lkml/CAG48ez1mvUuXwg0YPH5ANzhQLpbphqk-ZS+jbRz+H66fvm4FcA@mail.gmail.com/ [2] https://lore.kernel.org/linux-rt-users/[email protected]/ Reported-by: Jann Horn <[email protected]> Suggested-by: Mike Galbraith <[email protected]> Signed-off-by: Vlastimil Babka <[email protected]>
1 parent a2b4ae8 commit e0a043a

File tree

1 file changed

+44
-37
lines changed

1 file changed

+44
-37
lines changed

mm/slub.c

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2025,7 +2025,12 @@ static inline void *acquire_slab(struct kmem_cache *s,
20252025
return freelist;
20262026
}
20272027

2028+
#ifdef CONFIG_SLUB_CPU_PARTIAL
20282029
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
2030+
#else
2031+
static inline void put_cpu_partial(struct kmem_cache *s, struct page *page,
2032+
int drain) { }
2033+
#endif
20292034
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
20302035

20312036
/*
@@ -2459,14 +2464,6 @@ static void unfreeze_partials_cpu(struct kmem_cache *s,
24592464
__unfreeze_partials(s, partial_page);
24602465
}
24612466

2462-
#else /* CONFIG_SLUB_CPU_PARTIAL */
2463-
2464-
static inline void unfreeze_partials(struct kmem_cache *s) { }
2465-
static inline void unfreeze_partials_cpu(struct kmem_cache *s,
2466-
struct kmem_cache_cpu *c) { }
2467-
2468-
#endif /* CONFIG_SLUB_CPU_PARTIAL */
2469-
24702467
/*
24712468
* Put a page that was just frozen (in __slab_free|get_partial_node) into a
24722469
* partial page slot if available.
@@ -2476,46 +2473,56 @@ static inline void unfreeze_partials_cpu(struct kmem_cache *s,
24762473
*/
24772474
static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
24782475
{
2479-
#ifdef CONFIG_SLUB_CPU_PARTIAL
24802476
struct page *oldpage;
2481-
int pages;
2482-
int pobjects;
2477+
struct page *page_to_unfreeze = NULL;
2478+
unsigned long flags;
2479+
int pages = 0;
2480+
int pobjects = 0;
24832481

2484-
preempt_disable();
2485-
do {
2486-
pages = 0;
2487-
pobjects = 0;
2488-
oldpage = this_cpu_read(s->cpu_slab->partial);
2482+
local_irq_save(flags);
2483+
2484+
oldpage = this_cpu_read(s->cpu_slab->partial);
24892485

2490-
if (oldpage) {
2486+
if (oldpage) {
2487+
if (drain && oldpage->pobjects > slub_cpu_partial(s)) {
2488+
/*
2489+
* Partial array is full. Move the existing set to the
2490+
* per node partial list. Postpone the actual unfreezing
2491+
* outside of the critical section.
2492+
*/
2493+
page_to_unfreeze = oldpage;
2494+
oldpage = NULL;
2495+
} else {
24912496
pobjects = oldpage->pobjects;
24922497
pages = oldpage->pages;
2493-
if (drain && pobjects > slub_cpu_partial(s)) {
2494-
/*
2495-
* partial array is full. Move the existing
2496-
* set to the per node partial list.
2497-
*/
2498-
unfreeze_partials(s);
2499-
oldpage = NULL;
2500-
pobjects = 0;
2501-
pages = 0;
2502-
stat(s, CPU_PARTIAL_DRAIN);
2503-
}
25042498
}
2499+
}
25052500

2506-
pages++;
2507-
pobjects += page->objects - page->inuse;
2501+
pages++;
2502+
pobjects += page->objects - page->inuse;
25082503

2509-
page->pages = pages;
2510-
page->pobjects = pobjects;
2511-
page->next = oldpage;
2504+
page->pages = pages;
2505+
page->pobjects = pobjects;
2506+
page->next = oldpage;
25122507

2513-
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2514-
!= oldpage);
2515-
preempt_enable();
2516-
#endif /* CONFIG_SLUB_CPU_PARTIAL */
2508+
this_cpu_write(s->cpu_slab->partial, page);
2509+
2510+
local_irq_restore(flags);
2511+
2512+
if (page_to_unfreeze) {
2513+
__unfreeze_partials(s, page_to_unfreeze);
2514+
stat(s, CPU_PARTIAL_DRAIN);
2515+
}
25172516
}
25182517

2518+
#else /* CONFIG_SLUB_CPU_PARTIAL */
2519+
2520+
static inline void unfreeze_partials(struct kmem_cache *s) { }
2521+
static inline void unfreeze_partials_cpu(struct kmem_cache *s,
2522+
struct kmem_cache_cpu *c) { }
2523+
2524+
#endif /* CONFIG_SLUB_CPU_PARTIAL */
2525+
25192526
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
25202527
{
25212528
unsigned long flags;

0 commit comments

Comments
 (0)