Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit aff0370

Browse files
surenbaghdasaryanPeter Zijlstra
authored andcommitted
sched/psi: use kernfs polling functions for PSI trigger polling
Destroying psi trigger in cgroup_file_release causes UAF issues when a cgroup is removed from under a polling process. This is happening because cgroup removal causes a call to cgroup_file_release while the actual file is still alive. Destroying the trigger at this point would also destroy its waitqueue head and if there is still a polling process on that file accessing the waitqueue, it will step on the freed pointer: do_select vfs_poll do_rmdir cgroup_rmdir kernfs_drain_open_files cgroup_file_release cgroup_pressure_release psi_trigger_destroy wake_up_pollfree(&t->event_wait) // vfs_poll is unblocked synchronize_rcu kfree(t) poll_freewait -> UAF access to the trigger's waitqueue head Patch [1] fixed this issue for epoll() case using wake_up_pollfree(), however the same issue exists for synchronous poll() case. The root cause of this issue is that the lifecycles of the psi trigger's waitqueue and of the file associated with the trigger are different. Fix this by using kernfs_generic_poll function when polling on cgroup-specific psi triggers. It internally uses kernfs_open_node->poll waitqueue head with its lifecycle tied to the file's lifecycle. This also renders the fix in [1] obsolete, so revert it. [1] commit c2dbe32 ("sched/psi: Fix use-after-free in ep_remove_wait_queue()") Fixes: 0e94682 ("psi: introduce psi monitor") Closes: https://lore.kernel.org/all/[email protected]/ Reported-by: Lu Jialin <[email protected]> Signed-off-by: Suren Baghdasaryan <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent ae2ad29 commit aff0370

File tree

4 files changed

+28
-11
lines changed

4 files changed

+28
-11
lines changed

include/linux/psi.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ void psi_memstall_enter(unsigned long *flags);
2323
void psi_memstall_leave(unsigned long *flags);
2424

2525
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
26-
struct psi_trigger *psi_trigger_create(struct psi_group *group,
27-
char *buf, enum psi_res res, struct file *file);
26+
struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
27+
enum psi_res res, struct file *file,
28+
struct kernfs_open_file *of);
2829
void psi_trigger_destroy(struct psi_trigger *t);
2930

3031
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,

include/linux/psi_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ struct psi_trigger {
137137
/* Wait queue for polling */
138138
wait_queue_head_t event_wait;
139139

140+
/* Kernfs file for cgroup triggers */
141+
struct kernfs_open_file *of;
142+
140143
/* Pending event flag */
141144
int event;
142145

kernel/cgroup/cgroup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3730,7 +3730,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
37303730
}
37313731

37323732
psi = cgroup_psi(cgrp);
3733-
new = psi_trigger_create(psi, buf, res, of->file);
3733+
new = psi_trigger_create(psi, buf, res, of->file, of);
37343734
if (IS_ERR(new)) {
37353735
cgroup_put(cgrp);
37363736
return PTR_ERR(new);

kernel/sched/psi.c

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
493493
continue;
494494

495495
/* Generate an event */
496-
if (cmpxchg(&t->event, 0, 1) == 0)
497-
wake_up_interruptible(&t->event_wait);
496+
if (cmpxchg(&t->event, 0, 1) == 0) {
497+
if (t->of)
498+
kernfs_notify(t->of->kn);
499+
else
500+
wake_up_interruptible(&t->event_wait);
501+
}
498502
t->last_event_time = now;
499503
/* Reset threshold breach flag once event got generated */
500504
t->pending_event = false;
@@ -1271,8 +1275,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
12711275
return 0;
12721276
}
12731277

1274-
struct psi_trigger *psi_trigger_create(struct psi_group *group,
1275-
char *buf, enum psi_res res, struct file *file)
1278+
struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
1279+
enum psi_res res, struct file *file,
1280+
struct kernfs_open_file *of)
12761281
{
12771282
struct psi_trigger *t;
12781283
enum psi_states state;
@@ -1331,7 +1336,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
13311336

13321337
t->event = 0;
13331338
t->last_event_time = 0;
1334-
init_waitqueue_head(&t->event_wait);
1339+
t->of = of;
1340+
if (!of)
1341+
init_waitqueue_head(&t->event_wait);
13351342
t->pending_event = false;
13361343
t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
13371344

@@ -1388,7 +1395,10 @@ void psi_trigger_destroy(struct psi_trigger *t)
13881395
* being accessed later. Can happen if cgroup is deleted from under a
13891396
* polling process.
13901397
*/
1391-
wake_up_pollfree(&t->event_wait);
1398+
if (t->of)
1399+
kernfs_notify(t->of->kn);
1400+
else
1401+
wake_up_interruptible(&t->event_wait);
13921402

13931403
if (t->aggregator == PSI_AVGS) {
13941404
mutex_lock(&group->avgs_lock);
@@ -1465,7 +1475,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
14651475
if (!t)
14661476
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
14671477

1468-
poll_wait(file, &t->event_wait, wait);
1478+
if (t->of)
1479+
kernfs_generic_poll(t->of, wait);
1480+
else
1481+
poll_wait(file, &t->event_wait, wait);
14691482

14701483
if (cmpxchg(&t->event, 1, 0) == 1)
14711484
ret |= EPOLLPRI;
@@ -1535,7 +1548,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
15351548
return -EBUSY;
15361549
}
15371550

1538-
new = psi_trigger_create(&psi_system, buf, res, file);
1551+
new = psi_trigger_create(&psi_system, buf, res, file, NULL);
15391552
if (IS_ERR(new)) {
15401553
mutex_unlock(&seq->lock);
15411554
return PTR_ERR(new);

0 commit comments

Comments
 (0)