@@ -355,6 +355,8 @@ enum event_type_t {
355
355
EVENT_FLEXIBLE = 0x1 ,
356
356
EVENT_PINNED = 0x2 ,
357
357
EVENT_TIME = 0x4 ,
358
+ /* see ctx_resched() for details */
359
+ EVENT_CPU = 0x8 ,
358
360
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED ,
359
361
};
360
362
@@ -1442,6 +1444,20 @@ static void update_group_times(struct perf_event *leader)
1442
1444
update_event_times (event );
1443
1445
}
1444
1446
1447
+ static enum event_type_t get_event_type (struct perf_event * event )
1448
+ {
1449
+ struct perf_event_context * ctx = event -> ctx ;
1450
+ enum event_type_t event_type ;
1451
+
1452
+ lockdep_assert_held (& ctx -> lock );
1453
+
1454
+ event_type = event -> attr .pinned ? EVENT_PINNED : EVENT_FLEXIBLE ;
1455
+ if (!ctx -> task )
1456
+ event_type |= EVENT_CPU ;
1457
+
1458
+ return event_type ;
1459
+ }
1460
+
1445
1461
static struct list_head *
1446
1462
ctx_group_list (struct perf_event * event , struct perf_event_context * ctx )
1447
1463
{
@@ -2215,15 +2231,16 @@ ctx_sched_in(struct perf_event_context *ctx,
2215
2231
struct task_struct * task );
2216
2232
2217
2233
static void task_ctx_sched_out (struct perf_cpu_context * cpuctx ,
2218
- struct perf_event_context * ctx )
2234
+ struct perf_event_context * ctx ,
2235
+ enum event_type_t event_type )
2219
2236
{
2220
2237
if (!cpuctx -> task_ctx )
2221
2238
return ;
2222
2239
2223
2240
if (WARN_ON_ONCE (ctx != cpuctx -> task_ctx ))
2224
2241
return ;
2225
2242
2226
- ctx_sched_out (ctx , cpuctx , EVENT_ALL );
2243
+ ctx_sched_out (ctx , cpuctx , event_type );
2227
2244
}
2228
2245
2229
2246
static void perf_event_sched_in (struct perf_cpu_context * cpuctx ,
@@ -2238,13 +2255,51 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
2238
2255
ctx_sched_in (ctx , cpuctx , EVENT_FLEXIBLE , task );
2239
2256
}
2240
2257
2258
+ /*
2259
+ * We want to maintain the following priority of scheduling:
2260
+ * - CPU pinned (EVENT_CPU | EVENT_PINNED)
2261
+ * - task pinned (EVENT_PINNED)
2262
+ * - CPU flexible (EVENT_CPU | EVENT_FLEXIBLE)
2263
+ * - task flexible (EVENT_FLEXIBLE).
2264
+ *
2265
+ * In order to avoid unscheduling and scheduling back in everything every
2266
+ * time an event is added, only do it for the groups of equal priority and
2267
+ * below.
2268
+ *
2269
+ * This can be called after a batch operation on task events, in which case
2270
+ * event_type is a bit mask of the types of events involved. For CPU events,
2271
+ * event_type is only either EVENT_PINNED or EVENT_FLEXIBLE.
2272
+ */
2241
2273
static void ctx_resched (struct perf_cpu_context * cpuctx ,
2242
- struct perf_event_context * task_ctx )
2274
+ struct perf_event_context * task_ctx ,
2275
+ enum event_type_t event_type )
2243
2276
{
2277
+ enum event_type_t ctx_event_type = event_type & EVENT_ALL ;
2278
+ bool cpu_event = !!(event_type & EVENT_CPU );
2279
+
2280
+ /*
2281
+ * If pinned groups are involved, flexible groups also need to be
2282
+ * scheduled out.
2283
+ */
2284
+ if (event_type & EVENT_PINNED )
2285
+ event_type |= EVENT_FLEXIBLE ;
2286
+
2244
2287
perf_pmu_disable (cpuctx -> ctx .pmu );
2245
2288
if (task_ctx )
2246
- task_ctx_sched_out (cpuctx , task_ctx );
2247
- cpu_ctx_sched_out (cpuctx , EVENT_ALL );
2289
+ task_ctx_sched_out (cpuctx , task_ctx , event_type );
2290
+
2291
+ /*
2292
+ * Decide which cpu ctx groups to schedule out based on the types
2293
+ * of events that caused rescheduling:
2294
+ * - EVENT_CPU: schedule out corresponding groups;
2295
+ * - EVENT_PINNED task events: schedule out EVENT_FLEXIBLE groups;
2296
+ * - otherwise, do nothing more.
2297
+ */
2298
+ if (cpu_event )
2299
+ cpu_ctx_sched_out (cpuctx , ctx_event_type );
2300
+ else if (ctx_event_type & EVENT_PINNED )
2301
+ cpu_ctx_sched_out (cpuctx , EVENT_FLEXIBLE );
2302
+
2248
2303
perf_event_sched_in (cpuctx , task_ctx , current );
2249
2304
perf_pmu_enable (cpuctx -> ctx .pmu );
2250
2305
}
@@ -2291,7 +2346,7 @@ static int __perf_install_in_context(void *info)
2291
2346
if (reprogram ) {
2292
2347
ctx_sched_out (ctx , cpuctx , EVENT_TIME );
2293
2348
add_event_to_ctx (event , ctx );
2294
- ctx_resched (cpuctx , task_ctx );
2349
+ ctx_resched (cpuctx , task_ctx , get_event_type ( event ) );
2295
2350
} else {
2296
2351
add_event_to_ctx (event , ctx );
2297
2352
}
@@ -2458,7 +2513,7 @@ static void __perf_event_enable(struct perf_event *event,
2458
2513
if (ctx -> task )
2459
2514
WARN_ON_ONCE (task_ctx != ctx );
2460
2515
2461
- ctx_resched (cpuctx , task_ctx );
2516
+ ctx_resched (cpuctx , task_ctx , get_event_type ( event ) );
2462
2517
}
2463
2518
2464
2519
/*
@@ -2885,7 +2940,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
2885
2940
2886
2941
if (do_switch ) {
2887
2942
raw_spin_lock (& ctx -> lock );
2888
- task_ctx_sched_out (cpuctx , ctx );
2943
+ task_ctx_sched_out (cpuctx , ctx , EVENT_ALL );
2889
2944
raw_spin_unlock (& ctx -> lock );
2890
2945
}
2891
2946
}
@@ -3442,6 +3497,7 @@ static int event_enable_on_exec(struct perf_event *event,
3442
3497
static void perf_event_enable_on_exec (int ctxn )
3443
3498
{
3444
3499
struct perf_event_context * ctx , * clone_ctx = NULL ;
3500
+ enum event_type_t event_type = 0 ;
3445
3501
struct perf_cpu_context * cpuctx ;
3446
3502
struct perf_event * event ;
3447
3503
unsigned long flags ;
@@ -3455,15 +3511,17 @@ static void perf_event_enable_on_exec(int ctxn)
3455
3511
cpuctx = __get_cpu_context (ctx );
3456
3512
perf_ctx_lock (cpuctx , ctx );
3457
3513
ctx_sched_out (ctx , cpuctx , EVENT_TIME );
3458
- list_for_each_entry (event , & ctx -> event_list , event_entry )
3514
+ list_for_each_entry (event , & ctx -> event_list , event_entry ) {
3459
3515
enabled |= event_enable_on_exec (event , ctx );
3516
+ event_type |= get_event_type (event );
3517
+ }
3460
3518
3461
3519
/*
3462
3520
* Unclone and reschedule this context if we enabled any event.
3463
3521
*/
3464
3522
if (enabled ) {
3465
3523
clone_ctx = unclone_ctx (ctx );
3466
- ctx_resched (cpuctx , ctx );
3524
+ ctx_resched (cpuctx , ctx , event_type );
3467
3525
}
3468
3526
perf_ctx_unlock (cpuctx , ctx );
3469
3527
@@ -10224,7 +10282,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
10224
10282
* in.
10225
10283
*/
10226
10284
raw_spin_lock_irq (& child_ctx -> lock );
10227
- task_ctx_sched_out (__get_cpu_context (child_ctx ), child_ctx );
10285
+ task_ctx_sched_out (__get_cpu_context (child_ctx ), child_ctx , EVENT_ALL );
10228
10286
10229
10287
/*
10230
10288
* Now that the context is inactive, destroy the task <-> ctx relation
0 commit comments