Skip to content

Commit 39e3836

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner: "Four patches which all address lock inversions and deadlocks in the perf core code and the Intel debug store" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86: Fix perf,x86,cpuhp deadlock perf/core: Fix ctx::mutex deadlock perf/core: Fix another perf,trace,cpuhp lock inversion perf/core: Fix lock inversion between perf,trace,cpuhp
2 parents 8c76e31 + efe951d commit 39e3836

File tree

2 files changed

+60
-20
lines changed

2 files changed

+60
-20
lines changed

arch/x86/events/intel/ds.c

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -372,10 +372,9 @@ static int alloc_pebs_buffer(int cpu)
372372
static void release_pebs_buffer(int cpu)
373373
{
374374
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
375-
struct debug_store *ds = hwev->ds;
376375
void *cea;
377376

378-
if (!ds || !x86_pmu.pebs)
377+
if (!x86_pmu.pebs)
379378
return;
380379

381380
kfree(per_cpu(insn_buffer, cpu));
@@ -384,7 +383,6 @@ static void release_pebs_buffer(int cpu)
384383
/* Clear the fixmap */
385384
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
386385
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
387-
ds->pebs_buffer_base = 0;
388386
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
389387
hwev->ds_pebs_vaddr = NULL;
390388
}
@@ -419,16 +417,14 @@ static int alloc_bts_buffer(int cpu)
419417
static void release_bts_buffer(int cpu)
420418
{
421419
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
422-
struct debug_store *ds = hwev->ds;
423420
void *cea;
424421

425-
if (!ds || !x86_pmu.bts)
422+
if (!x86_pmu.bts)
426423
return;
427424

428425
/* Clear the fixmap */
429426
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
430427
ds_clear_cea(cea, BTS_BUFFER_SIZE);
431-
ds->bts_buffer_base = 0;
432428
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
433429
hwev->ds_bts_vaddr = NULL;
434430
}
@@ -454,16 +450,22 @@ void release_ds_buffers(void)
454450
if (!x86_pmu.bts && !x86_pmu.pebs)
455451
return;
456452

457-
get_online_cpus();
458-
for_each_online_cpu(cpu)
453+
for_each_possible_cpu(cpu)
454+
release_ds_buffer(cpu);
455+
456+
for_each_possible_cpu(cpu) {
457+
/*
458+
* Again, ignore errors from offline CPUs, they will no longer
459+
* observe cpu_hw_events.ds and not program the DS_AREA when
460+
* they come up.
461+
*/
459462
fini_debug_store_on_cpu(cpu);
463+
}
460464

461465
for_each_possible_cpu(cpu) {
462466
release_pebs_buffer(cpu);
463467
release_bts_buffer(cpu);
464-
release_ds_buffer(cpu);
465468
}
466-
put_online_cpus();
467469
}
468470

469471
void reserve_ds_buffers(void)
@@ -483,8 +485,6 @@ void reserve_ds_buffers(void)
483485
if (!x86_pmu.pebs)
484486
pebs_err = 1;
485487

486-
get_online_cpus();
487-
488488
for_each_possible_cpu(cpu) {
489489
if (alloc_ds_buffer(cpu)) {
490490
bts_err = 1;
@@ -521,11 +521,14 @@ void reserve_ds_buffers(void)
521521
if (x86_pmu.pebs && !pebs_err)
522522
x86_pmu.pebs_active = 1;
523523

524-
for_each_online_cpu(cpu)
524+
for_each_possible_cpu(cpu) {
525+
/*
526+
* Ignores wrmsr_on_cpu() errors for offline CPUs they
527+
* will get this call through intel_pmu_cpu_starting().
528+
*/
525529
init_debug_store_on_cpu(cpu);
530+
}
526531
}
527-
528-
put_online_cpus();
529532
}
530533

531534
/*

kernel/events/core.c

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,10 @@ static void put_ctx(struct perf_event_context *ctx)
12311231
* perf_event_context::lock
12321232
* perf_event::mmap_mutex
12331233
* mmap_sem
1234+
*
1235+
* cpu_hotplug_lock
1236+
* pmus_lock
1237+
* cpuctx->mutex / perf_event_context::mutex
12341238
*/
12351239
static struct perf_event_context *
12361240
perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
@@ -4196,6 +4200,7 @@ int perf_event_release_kernel(struct perf_event *event)
41964200
{
41974201
struct perf_event_context *ctx = event->ctx;
41984202
struct perf_event *child, *tmp;
4203+
LIST_HEAD(free_list);
41994204

42004205
/*
42014206
* If we got here through err_file: fput(event_file); we will not have
@@ -4268,8 +4273,7 @@ int perf_event_release_kernel(struct perf_event *event)
42684273
struct perf_event, child_list);
42694274
if (tmp == child) {
42704275
perf_remove_from_context(child, DETACH_GROUP);
4271-
list_del(&child->child_list);
4272-
free_event(child);
4276+
list_move(&child->child_list, &free_list);
42734277
/*
42744278
* This matches the refcount bump in inherit_event();
42754279
* this can't be the last reference.
@@ -4284,6 +4288,11 @@ int perf_event_release_kernel(struct perf_event *event)
42844288
}
42854289
mutex_unlock(&event->child_mutex);
42864290

4291+
list_for_each_entry_safe(child, tmp, &free_list, child_list) {
4292+
list_del(&child->child_list);
4293+
free_event(child);
4294+
}
4295+
42874296
no_ctx:
42884297
put_event(event); /* Must be the 'last' reference */
42894298
return 0;
@@ -8516,6 +8525,29 @@ perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
85168525
return ret;
85178526
}
85188527

8528+
static int
8529+
perf_tracepoint_set_filter(struct perf_event *event, char *filter_str)
8530+
{
8531+
struct perf_event_context *ctx = event->ctx;
8532+
int ret;
8533+
8534+
/*
8535+
* Beware, here be dragons!!
8536+
*
8537+
* the tracepoint muck will deadlock against ctx->mutex, but the tracepoint
8538+
* stuff does not actually need it. So temporarily drop ctx->mutex. As per
8539+
* perf_event_ctx_lock() we already have a reference on ctx.
8540+
*
8541+
* This can result in event getting moved to a different ctx, but that
8542+
* does not affect the tracepoint state.
8543+
*/
8544+
mutex_unlock(&ctx->mutex);
8545+
ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
8546+
mutex_lock(&ctx->mutex);
8547+
8548+
return ret;
8549+
}
8550+
85198551
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
85208552
{
85218553
char *filter_str;
@@ -8532,8 +8564,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
85328564

85338565
if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
85348566
event->attr.type == PERF_TYPE_TRACEPOINT)
8535-
ret = ftrace_profile_set_filter(event, event->attr.config,
8536-
filter_str);
8567+
ret = perf_tracepoint_set_filter(event, filter_str);
85378568
else if (has_addr_filter(event))
85388569
ret = perf_event_set_addr_filter(event, filter_str);
85398570

@@ -9168,7 +9199,13 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
91689199
if (!try_module_get(pmu->module))
91699200
return -ENODEV;
91709201

9171-
if (event->group_leader != event) {
9202+
/*
9203+
* A number of pmu->event_init() methods iterate the sibling_list to,
9204+
* for example, validate if the group fits on the PMU. Therefore,
9205+
* if this is a sibling event, acquire the ctx->mutex to protect
9206+
* the sibling_list.
9207+
*/
9208+
if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) {
91729209
/*
91739210
* This ctx->mutex can nest when we're called through
91749211
* inheritance. See the perf_event_ctx_lock_nested() comment.

0 commit comments

Comments
 (0)