Skip to content

Commit 487f05e

Browse files
virtuosoIngo Molnar
authored andcommitted
perf/core: Optimize event rescheduling on active contexts
When new events are added to an active context, we go and reschedule all cpu groups and all task groups in order to preserve the priority (cpu pinned, task pinned, cpu flexible, task flexible), but in reality we only need to reschedule groups of the same priority as that of the events being added, and below. This patch changes the behavior so that only groups that need to be rescheduled are rescheduled. Reported-by: Adrian Hunter <[email protected]> Signed-off-by: Alexander Shishkin <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vince Weaver <[email protected]> Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent fe45baf commit 487f05e

File tree

1 file changed

+69
-11
lines changed

1 file changed

+69
-11
lines changed

kernel/events/core.c

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,8 @@ enum event_type_t {
355355
EVENT_FLEXIBLE = 0x1,
356356
EVENT_PINNED = 0x2,
357357
EVENT_TIME = 0x4,
358+
/* see ctx_resched() for details */
359+
EVENT_CPU = 0x8,
358360
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
359361
};
360362

@@ -1442,6 +1444,20 @@ static void update_group_times(struct perf_event *leader)
14421444
update_event_times(event);
14431445
}
14441446

1447+
static enum event_type_t get_event_type(struct perf_event *event)
1448+
{
1449+
struct perf_event_context *ctx = event->ctx;
1450+
enum event_type_t event_type;
1451+
1452+
lockdep_assert_held(&ctx->lock);
1453+
1454+
event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
1455+
if (!ctx->task)
1456+
event_type |= EVENT_CPU;
1457+
1458+
return event_type;
1459+
}
1460+
14451461
static struct list_head *
14461462
ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
14471463
{
@@ -2215,15 +2231,16 @@ ctx_sched_in(struct perf_event_context *ctx,
22152231
struct task_struct *task);
22162232

22172233
static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
2218-
struct perf_event_context *ctx)
2234+
struct perf_event_context *ctx,
2235+
enum event_type_t event_type)
22192236
{
22202237
if (!cpuctx->task_ctx)
22212238
return;
22222239

22232240
if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
22242241
return;
22252242

2226-
ctx_sched_out(ctx, cpuctx, EVENT_ALL);
2243+
ctx_sched_out(ctx, cpuctx, event_type);
22272244
}
22282245

22292246
static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
@@ -2238,13 +2255,51 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
22382255
ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
22392256
}
22402257

2258+
/*
2259+
* We want to maintain the following priority of scheduling:
2260+
* - CPU pinned (EVENT_CPU | EVENT_PINNED)
2261+
* - task pinned (EVENT_PINNED)
2262+
* - CPU flexible (EVENT_CPU | EVENT_FLEXIBLE)
2263+
* - task flexible (EVENT_FLEXIBLE).
2264+
*
2265+
* In order to avoid unscheduling and scheduling back in everything every
2266+
* time an event is added, only do it for the groups of equal priority and
2267+
* below.
2268+
*
2269+
* This can be called after a batch operation on task events, in which case
2270+
* event_type is a bit mask of the types of events involved. For CPU events,
2271+
* event_type is only either EVENT_PINNED or EVENT_FLEXIBLE.
2272+
*/
22412273
static void ctx_resched(struct perf_cpu_context *cpuctx,
2242-
struct perf_event_context *task_ctx)
2274+
struct perf_event_context *task_ctx,
2275+
enum event_type_t event_type)
22432276
{
2277+
enum event_type_t ctx_event_type = event_type & EVENT_ALL;
2278+
bool cpu_event = !!(event_type & EVENT_CPU);
2279+
2280+
/*
2281+
* If pinned groups are involved, flexible groups also need to be
2282+
* scheduled out.
2283+
*/
2284+
if (event_type & EVENT_PINNED)
2285+
event_type |= EVENT_FLEXIBLE;
2286+
22442287
perf_pmu_disable(cpuctx->ctx.pmu);
22452288
if (task_ctx)
2246-
task_ctx_sched_out(cpuctx, task_ctx);
2247-
cpu_ctx_sched_out(cpuctx, EVENT_ALL);
2289+
task_ctx_sched_out(cpuctx, task_ctx, event_type);
2290+
2291+
/*
2292+
* Decide which cpu ctx groups to schedule out based on the types
2293+
* of events that caused rescheduling:
2294+
* - EVENT_CPU: schedule out corresponding groups;
2295+
* - EVENT_PINNED task events: schedule out EVENT_FLEXIBLE groups;
2296+
* - otherwise, do nothing more.
2297+
*/
2298+
if (cpu_event)
2299+
cpu_ctx_sched_out(cpuctx, ctx_event_type);
2300+
else if (ctx_event_type & EVENT_PINNED)
2301+
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2302+
22482303
perf_event_sched_in(cpuctx, task_ctx, current);
22492304
perf_pmu_enable(cpuctx->ctx.pmu);
22502305
}
@@ -2291,7 +2346,7 @@ static int __perf_install_in_context(void *info)
22912346
if (reprogram) {
22922347
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
22932348
add_event_to_ctx(event, ctx);
2294-
ctx_resched(cpuctx, task_ctx);
2349+
ctx_resched(cpuctx, task_ctx, get_event_type(event));
22952350
} else {
22962351
add_event_to_ctx(event, ctx);
22972352
}
@@ -2458,7 +2513,7 @@ static void __perf_event_enable(struct perf_event *event,
24582513
if (ctx->task)
24592514
WARN_ON_ONCE(task_ctx != ctx);
24602515

2461-
ctx_resched(cpuctx, task_ctx);
2516+
ctx_resched(cpuctx, task_ctx, get_event_type(event));
24622517
}
24632518

24642519
/*
@@ -2885,7 +2940,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
28852940

28862941
if (do_switch) {
28872942
raw_spin_lock(&ctx->lock);
2888-
task_ctx_sched_out(cpuctx, ctx);
2943+
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
28892944
raw_spin_unlock(&ctx->lock);
28902945
}
28912946
}
@@ -3442,6 +3497,7 @@ static int event_enable_on_exec(struct perf_event *event,
34423497
static void perf_event_enable_on_exec(int ctxn)
34433498
{
34443499
struct perf_event_context *ctx, *clone_ctx = NULL;
3500+
enum event_type_t event_type = 0;
34453501
struct perf_cpu_context *cpuctx;
34463502
struct perf_event *event;
34473503
unsigned long flags;
@@ -3455,15 +3511,17 @@ static void perf_event_enable_on_exec(int ctxn)
34553511
cpuctx = __get_cpu_context(ctx);
34563512
perf_ctx_lock(cpuctx, ctx);
34573513
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
3458-
list_for_each_entry(event, &ctx->event_list, event_entry)
3514+
list_for_each_entry(event, &ctx->event_list, event_entry) {
34593515
enabled |= event_enable_on_exec(event, ctx);
3516+
event_type |= get_event_type(event);
3517+
}
34603518

34613519
/*
34623520
* Unclone and reschedule this context if we enabled any event.
34633521
*/
34643522
if (enabled) {
34653523
clone_ctx = unclone_ctx(ctx);
3466-
ctx_resched(cpuctx, ctx);
3524+
ctx_resched(cpuctx, ctx, event_type);
34673525
}
34683526
perf_ctx_unlock(cpuctx, ctx);
34693527

@@ -10224,7 +10282,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
1022410282
* in.
1022510283
*/
1022610284
raw_spin_lock_irq(&child_ctx->lock);
10227-
task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
10285+
task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx, EVENT_ALL);
1022810286

1022910287
/*
1023010288
* Now that the context is inactive, destroy the task <-> ctx relation

0 commit comments

Comments
 (0)