Skip to content

Commit e4f8d81

Browse files
rostedthtejun
authored andcommitted
cgroup/tracing: Move taking of spin lock out of trace event handlers
It is unwise to take spin locks from the handlers of trace events. Mainly, because they can introduce lockups, because it introduces locks in places that are normally not tested. Worse yet, because trace events are tucked away in the include/trace/events/ directory, locks that are taken there are forgotten about. As a general rule, I tell people never to take any locks in a trace event handler. Several cgroup trace event handlers call cgroup_path() which eventually takes the kernfs_rename_lock spinlock. This injects the spinlock in the code without people realizing it. It also can cause issues for the PREEMPT_RT patch, as the spinlock becomes a mutex, and the trace event handlers are called with preemption disabled. By moving the calculation of the cgroup_path() out of the trace event handlers and into a macro (surrounded by a trace_cgroup_##type##_enabled()), then we could place the cgroup_path into a string, and pass that to the trace event. Not only does this remove the taking of the spinlock out of the trace event handler, but it also means that the cgroup_path() only needs to be called once (it is currently called twice, once to get the length to reserver the buffer for, and once again to get the path itself. Now it only needs to be done once. Reported-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Steven Rostedt (VMware) <[email protected]> Signed-off-by: Tejun Heo <[email protected]>
1 parent 1e09177 commit e4f8d81

File tree

4 files changed

+58
-31
lines changed

4 files changed

+58
-31
lines changed

include/trace/events/cgroup.h

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -53,24 +53,22 @@ DEFINE_EVENT(cgroup_root, cgroup_remount,
5353

5454
DECLARE_EVENT_CLASS(cgroup,
5555

56-
TP_PROTO(struct cgroup *cgrp),
56+
TP_PROTO(struct cgroup *cgrp, const char *path),
5757

58-
TP_ARGS(cgrp),
58+
TP_ARGS(cgrp, path),
5959

6060
TP_STRUCT__entry(
6161
__field( int, root )
6262
__field( int, id )
6363
__field( int, level )
64-
__dynamic_array(char, path,
65-
cgroup_path(cgrp, NULL, 0) + 1)
64+
__string( path, path )
6665
),
6766

6867
TP_fast_assign(
6968
__entry->root = cgrp->root->hierarchy_id;
7069
__entry->id = cgrp->id;
7170
__entry->level = cgrp->level;
72-
cgroup_path(cgrp, __get_dynamic_array(path),
73-
__get_dynamic_array_len(path));
71+
__assign_str(path, path);
7472
),
7573

7674
TP_printk("root=%d id=%d level=%d path=%s",
@@ -79,54 +77,53 @@ DECLARE_EVENT_CLASS(cgroup,
7977

8078
DEFINE_EVENT(cgroup, cgroup_mkdir,
8179

82-
TP_PROTO(struct cgroup *cgroup),
80+
TP_PROTO(struct cgroup *cgrp, const char *path),
8381

84-
TP_ARGS(cgroup)
82+
TP_ARGS(cgrp, path)
8583
);
8684

8785
DEFINE_EVENT(cgroup, cgroup_rmdir,
8886

89-
TP_PROTO(struct cgroup *cgroup),
87+
TP_PROTO(struct cgroup *cgrp, const char *path),
9088

91-
TP_ARGS(cgroup)
89+
TP_ARGS(cgrp, path)
9290
);
9391

9492
DEFINE_EVENT(cgroup, cgroup_release,
9593

96-
TP_PROTO(struct cgroup *cgroup),
94+
TP_PROTO(struct cgroup *cgrp, const char *path),
9795

98-
TP_ARGS(cgroup)
96+
TP_ARGS(cgrp, path)
9997
);
10098

10199
DEFINE_EVENT(cgroup, cgroup_rename,
102100

103-
TP_PROTO(struct cgroup *cgroup),
101+
TP_PROTO(struct cgroup *cgrp, const char *path),
104102

105-
TP_ARGS(cgroup)
103+
TP_ARGS(cgrp, path)
106104
);
107105

108106
DECLARE_EVENT_CLASS(cgroup_migrate,
109107

110-
TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
108+
TP_PROTO(struct cgroup *dst_cgrp, const char *path,
109+
struct task_struct *task, bool threadgroup),
111110

112-
TP_ARGS(dst_cgrp, task, threadgroup),
111+
TP_ARGS(dst_cgrp, path, task, threadgroup),
113112

114113
TP_STRUCT__entry(
115114
__field( int, dst_root )
116115
__field( int, dst_id )
117116
__field( int, dst_level )
118-
__dynamic_array(char, dst_path,
119-
cgroup_path(dst_cgrp, NULL, 0) + 1)
120117
__field( int, pid )
118+
__string( dst_path, path )
121119
__string( comm, task->comm )
122120
),
123121

124122
TP_fast_assign(
125123
__entry->dst_root = dst_cgrp->root->hierarchy_id;
126124
__entry->dst_id = dst_cgrp->id;
127125
__entry->dst_level = dst_cgrp->level;
128-
cgroup_path(dst_cgrp, __get_dynamic_array(dst_path),
129-
__get_dynamic_array_len(dst_path));
126+
__assign_str(dst_path, path);
130127
__entry->pid = task->pid;
131128
__assign_str(comm, task->comm);
132129
),
@@ -138,16 +135,18 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
138135

139136
DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
140137

141-
TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
138+
TP_PROTO(struct cgroup *dst_cgrp, const char *path,
139+
struct task_struct *task, bool threadgroup),
142140

143-
TP_ARGS(dst_cgrp, task, threadgroup)
141+
TP_ARGS(dst_cgrp, path, task, threadgroup)
144142
);
145143

146144
DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
147145

148-
TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
146+
TP_PROTO(struct cgroup *dst_cgrp, const char *path,
147+
struct task_struct *task, bool threadgroup),
149148

150-
TP_ARGS(dst_cgrp, task, threadgroup)
149+
TP_ARGS(dst_cgrp, path, task, threadgroup)
151150
);
152151

153152
#endif /* _TRACE_CGROUP_H */

kernel/cgroup/cgroup-internal.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,32 @@
88
#include <linux/list.h>
99
#include <linux/refcount.h>
1010

11+
#define TRACE_CGROUP_PATH_LEN 1024
12+
extern spinlock_t trace_cgroup_path_lock;
13+
extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
14+
15+
/*
16+
* cgroup_path() takes a spin lock. It is good practice not to take
17+
* spin locks within trace point handlers, as they are mostly hidden
18+
* from normal view. As cgroup_path() can take the kernfs_rename_lock
19+
* spin lock, it is best to not call that function from the trace event
20+
* handler.
21+
*
22+
* Note: trace_cgroup_##type##_enabled() is a static branch that will only
23+
* be set when the trace event is enabled.
24+
*/
25+
#define TRACE_CGROUP_PATH(type, cgrp, ...) \
26+
do { \
27+
if (trace_cgroup_##type##_enabled()) { \
28+
spin_lock(&trace_cgroup_path_lock); \
29+
cgroup_path(cgrp, trace_cgroup_path, \
30+
TRACE_CGROUP_PATH_LEN); \
31+
trace_cgroup_##type(cgrp, trace_cgroup_path, \
32+
##__VA_ARGS__); \
33+
spin_unlock(&trace_cgroup_path_lock); \
34+
} \
35+
} while (0)
36+
1137
/*
1238
* A cgroup can be associated with multiple css_sets as different tasks may
1339
* belong to different cgroups on different hierarchies. In the other

kernel/cgroup/cgroup-v1.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
135135
if (task) {
136136
ret = cgroup_migrate(task, false, &mgctx);
137137
if (!ret)
138-
trace_cgroup_transfer_tasks(to, task, false);
138+
TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
139139
put_task_struct(task);
140140
}
141141
} while (task && !ret);
@@ -865,7 +865,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
865865

866866
ret = kernfs_rename(kn, new_parent, new_name_str);
867867
if (!ret)
868-
trace_cgroup_rename(cgrp);
868+
TRACE_CGROUP_PATH(rename, cgrp);
869869

870870
mutex_unlock(&cgroup_mutex);
871871

kernel/cgroup/cgroup.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex);
8383
EXPORT_SYMBOL_GPL(css_set_lock);
8484
#endif
8585

86+
DEFINE_SPINLOCK(trace_cgroup_path_lock);
87+
char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
88+
8689
/*
8790
* Protects cgroup_idr and css_idr so that IDs can be released without
8891
* grabbing cgroup_mutex.
@@ -2638,7 +2641,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
26382641
cgroup_migrate_finish(&mgctx);
26392642

26402643
if (!ret)
2641-
trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
2644+
TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
26422645

26432646
return ret;
26442647
}
@@ -4634,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work)
46344637
struct cgroup *tcgrp;
46354638

46364639
/* cgroup release path */
4637-
trace_cgroup_release(cgrp);
4640+
TRACE_CGROUP_PATH(release, cgrp);
46384641

46394642
if (cgroup_on_dfl(cgrp))
46404643
cgroup_rstat_flush(cgrp);
@@ -4977,7 +4980,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
49774980
if (ret)
49784981
goto out_destroy;
49794982

4980-
trace_cgroup_mkdir(cgrp);
4983+
TRACE_CGROUP_PATH(mkdir, cgrp);
49814984

49824985
/* let's create and online css's */
49834986
kernfs_activate(kn);
@@ -5165,9 +5168,8 @@ int cgroup_rmdir(struct kernfs_node *kn)
51655168
return 0;
51665169

51675170
ret = cgroup_destroy_locked(cgrp);
5168-
51695171
if (!ret)
5170-
trace_cgroup_rmdir(cgrp);
5172+
TRACE_CGROUP_PATH(rmdir, cgrp);
51715173

51725174
cgroup_kn_unlock(kn);
51735175
return ret;

0 commit comments

Comments
 (0)