Skip to content

Commit 2ad654b

Browse files
lizf-oshtejun
authored andcommitted
cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags
When we change cpuset.memory_spread_{page,slab}, cpuset will flip PF_SPREAD_{PAGE,SLAB} bit of tsk->flags for each task in that cpuset. This should be done using atomic bitops, but currently we don't, which is broken. Tetsuo reported a hard-to-reproduce kernel crash on RHEL6, which happened when one thread tried to clear PF_USED_MATH while at the same time another thread tried to flip PF_SPREAD_PAGE/PF_SPREAD_SLAB. They both operate on the same task. Here's the full report: https://lkml.org/lkml/2014/9/19/230 To fix this, we make PF_SPREAD_PAGE and PF_SPREAD_SLAB atomic flags. v4: - updated mm/slab.c. (Fengguang Wu) - updated Documentation. Cc: Peter Zijlstra <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Miao Xie <[email protected]> Cc: Kees Cook <[email protected]> Fixes: 950592f ("cpusets: update tasks' page/slab spread flags in time") Cc: <[email protected]> # 2.6.31+ Reported-by: Tetsuo Handa <[email protected]> Signed-off-by: Zefan Li <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent e0e5070 commit 2ad654b

File tree

5 files changed

+23
-13
lines changed

5 files changed

+23
-13
lines changed

Documentation/cgroups/cpusets.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,14 +345,14 @@ the named feature on.
345345
The implementation is simple.
346346

347347
Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
348-
PF_SPREAD_PAGE for each task that is in that cpuset or subsequently
348+
PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
349349
joins that cpuset. The page allocation calls for the page cache
350-
is modified to perform an inline check for this PF_SPREAD_PAGE task
350+
is modified to perform an inline check for this PFA_SPREAD_PAGE task
351351
flag, and if set, a call to a new routine cpuset_mem_spread_node()
352352
returns the node to prefer for the allocation.
353353

354354
Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
355-
PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
355+
PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
356356
pages from the node returned by cpuset_mem_spread_node().
357357

358358
The cpuset_mem_spread_node() routine is also simple. It uses the

include/linux/cpuset.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,12 @@ extern int cpuset_slab_spread_node(void);
9393

9494
static inline int cpuset_do_page_mem_spread(void)
9595
{
96-
return current->flags & PF_SPREAD_PAGE;
96+
return task_spread_page(current);
9797
}
9898

9999
static inline int cpuset_do_slab_mem_spread(void)
100100
{
101-
return current->flags & PF_SPREAD_SLAB;
101+
return task_spread_slab(current);
102102
}
103103

104104
extern int current_cpuset_is_being_rebound(void);

include/linux/sched.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1903,8 +1903,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
19031903
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
19041904
#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
19051905
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1906-
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
1907-
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
19081906
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
19091907
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
19101908
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
@@ -1958,6 +1956,9 @@ static inline void memalloc_noio_restore(unsigned int flags)
19581956

19591957
/* Per-process atomic flags. */
19601958
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
1959+
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
1960+
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
1961+
19611962

19621963
#define TASK_PFA_TEST(name, func) \
19631964
static inline bool task_##func(struct task_struct *p) \
@@ -1972,6 +1973,14 @@ static inline void memalloc_noio_restore(unsigned int flags)
19721973
TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
19731974
TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
19741975

1976+
TASK_PFA_TEST(SPREAD_PAGE, spread_page)
1977+
TASK_PFA_SET(SPREAD_PAGE, spread_page)
1978+
TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
1979+
1980+
TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
1981+
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
1982+
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1983+
19751984
/*
19761985
* task->jobctl flags
19771986
*/

kernel/cpuset.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -365,13 +365,14 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
365365
struct task_struct *tsk)
366366
{
367367
if (is_spread_page(cs))
368-
tsk->flags |= PF_SPREAD_PAGE;
368+
task_set_spread_page(tsk);
369369
else
370-
tsk->flags &= ~PF_SPREAD_PAGE;
370+
task_clear_spread_page(tsk);
371+
371372
if (is_spread_slab(cs))
372-
tsk->flags |= PF_SPREAD_SLAB;
373+
task_set_spread_slab(tsk);
373374
else
374-
tsk->flags &= ~PF_SPREAD_SLAB;
375+
task_clear_spread_slab(tsk);
375376
}
376377

377378
/*

mm/slab.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2994,7 +2994,7 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
29942994

29952995
#ifdef CONFIG_NUMA
29962996
/*
2997-
* Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
2997+
* Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
29982998
*
29992999
* If we are in_interrupt, then process context, including cpusets and
30003000
* mempolicy, may not apply and should not be used for allocation policy.
@@ -3226,7 +3226,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
32263226
{
32273227
void *objp;
32283228

3229-
if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
3229+
if (current->mempolicy || cpuset_do_slab_mem_spread()) {
32303230
objp = alternate_node_alloc(cache, flags);
32313231
if (objp)
32323232
goto out;

0 commit comments

Comments
 (0)