Skip to content

Commit 6c869e7

Browse files
author
Ingo Molnar
committed
Merge branch 'perf/urgent' into perf/core
Conflicts: arch/x86/kernel/apic/hw_nmi.c Merge reason: Resolve conflict, queue up dependent patch. Signed-off-by: Ingo Molnar <[email protected]>
2 parents e4e91ac + ee6dcfa commit 6c869e7

File tree

12 files changed

+144
-46
lines changed

12 files changed

+144
-46
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ config X86
2121
select HAVE_UNSTABLE_SCHED_CLOCK
2222
select HAVE_IDE
2323
select HAVE_OPROFILE
24-
select HAVE_PERF_EVENTS if (!M386 && !M486)
24+
select HAVE_PERF_EVENTS
2525
select HAVE_IRQ_WORK
2626
select HAVE_IOREMAP_PROT
2727
select HAVE_KPROBES

arch/x86/kernel/apic/hw_nmi.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,17 @@
1717
#include <linux/nmi.h>
1818
#include <linux/module.h>
1919

20-
/* For reliability, we're prepared to waste bits here. */
21-
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22-
2320
#ifdef CONFIG_HARDLOCKUP_DETECTOR
2421
u64 hw_nmi_get_sample_period(void)
2522
{
2623
return (u64)(cpu_khz) * 1000 * 60;
2724
}
2825
#endif
2926

27+
28+
/* For reliability, we're prepared to waste bits here. */
29+
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
30+
3031
#ifdef arch_trigger_all_cpu_backtrace
3132
void arch_trigger_all_cpu_backtrace(void)
3233
{

arch/x86/kernel/cpu/perf_event.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,20 @@ static void release_pmc_hardware(void) {}
372372

373373
#endif
374374

375+
static bool check_hw_exists(void)
376+
{
377+
u64 val, val_new = 0;
378+
int ret = 0;
379+
380+
val = 0xabcdUL;
381+
ret |= checking_wrmsrl(x86_pmu.perfctr, val);
382+
ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
383+
if (ret || val != val_new)
384+
return false;
385+
386+
return true;
387+
}
388+
375389
static void reserve_ds_buffers(void);
376390
static void release_ds_buffers(void);
377391

@@ -1363,6 +1377,12 @@ void __init init_hw_perf_events(void)
13631377

13641378
pmu_check_apic();
13651379

1380+
/* sanity check that the hardware exists or is emulated */
1381+
if (!check_hw_exists()) {
1382+
pr_cont("Broken PMU hardware detected, software events only.\n");
1383+
return;
1384+
}
1385+
13661386
pr_cont("%s PMU driver.\n", x86_pmu.name);
13671387

13681388
if (x86_pmu.quirks)

arch/x86/kernel/entry_64.S

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
295295
.endm
296296

297297
/* save partial stack frame */
298+
.pushsection .kprobes.text, "ax"
298299
ENTRY(save_args)
299300
XCPT_FRAME
300301
cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
334335
ret
335336
CFI_ENDPROC
336337
END(save_args)
338+
.popsection
337339

338340
ENTRY(save_rest)
339341
PARTIAL_FRAME 1 REST_SKIP+8

arch/x86/kernel/hw_breakpoint.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
433433
dr6_p = (unsigned long *)ERR_PTR(args->err);
434434
dr6 = *dr6_p;
435435

436+
/* If it's a single step, TRAP bits are random */
437+
if (dr6 & DR_STEP)
438+
return NOTIFY_DONE;
439+
436440
/* Do an early return if no trap bits are set in DR6 */
437441
if ((dr6 & DR_TRAP_BITS) == 0)
438442
return NOTIFY_DONE;

include/linux/hw_breakpoint.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ enum bp_type_idx {
3333

3434
#ifdef CONFIG_HAVE_HW_BREAKPOINT
3535

36+
extern int __init init_hw_breakpoint(void);
37+
3638
static inline void hw_breakpoint_init(struct perf_event_attr *attr)
3739
{
3840
memset(attr, 0, sizeof(*attr));
@@ -108,6 +110,8 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
108110

109111
#else /* !CONFIG_HAVE_HW_BREAKPOINT */
110112

113+
static inline int __init init_hw_breakpoint(void) { return 0; }
114+
111115
static inline struct perf_event *
112116
register_user_hw_breakpoint(struct perf_event_attr *attr,
113117
perf_overflow_handler_t triggered,

include/linux/perf_event.h

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,7 @@ struct perf_event_context {
850850
int nr_active;
851851
int is_active;
852852
int nr_stat;
853+
int rotate_disable;
853854
atomic_t refcount;
854855
struct task_struct *task;
855856

@@ -908,20 +909,6 @@ extern int perf_num_counters(void);
908909
extern const char *perf_pmu_name(void);
909910
extern void __perf_event_task_sched_in(struct task_struct *task);
910911
extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
911-
912-
extern atomic_t perf_task_events;
913-
914-
static inline void perf_event_task_sched_in(struct task_struct *task)
915-
{
916-
COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
917-
}
918-
919-
static inline
920-
void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
921-
{
922-
COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
923-
}
924-
925912
extern int perf_event_init_task(struct task_struct *child);
926913
extern void perf_event_exit_task(struct task_struct *child);
927914
extern void perf_event_free_task(struct task_struct *task);
@@ -1030,6 +1017,21 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
10301017
__perf_sw_event(event_id, nr, nmi, regs, addr);
10311018
}
10321019

1020+
extern atomic_t perf_task_events;
1021+
1022+
static inline void perf_event_task_sched_in(struct task_struct *task)
1023+
{
1024+
COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
1025+
}
1026+
1027+
static inline
1028+
void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
1029+
{
1030+
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1031+
1032+
COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
1033+
}
1034+
10331035
extern void perf_event_mmap(struct vm_area_struct *vma);
10341036
extern struct perf_guest_info_callbacks *perf_guest_cbs;
10351037
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);

kernel/hw_breakpoint.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
620620
.read = hw_breakpoint_pmu_read,
621621
};
622622

623-
static int __init init_hw_breakpoint(void)
623+
int __init init_hw_breakpoint(void)
624624
{
625625
unsigned int **task_bp_pinned;
626626
int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
655655

656656
return -ENOMEM;
657657
}
658-
core_initcall(init_hw_breakpoint);
659658

660659

kernel/irq_work.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,9 @@ void irq_work_run(void)
145145
* Clear the BUSY bit and return to the free state if
146146
* no-one else claimed it meanwhile.
147147
*/
148-
cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
148+
(void)cmpxchg(&entry->next,
149+
next_flags(NULL, IRQ_WORK_BUSY),
150+
NULL);
149151
}
150152
}
151153
EXPORT_SYMBOL_GPL(irq_work_run);

kernel/perf_event.c

Lines changed: 77 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <linux/kernel_stat.h>
3232
#include <linux/perf_event.h>
3333
#include <linux/ftrace_event.h>
34+
#include <linux/hw_breakpoint.h>
3435

3536
#include <asm/irq_regs.h>
3637

@@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
12861287
{
12871288
int ctxn;
12881289

1289-
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1290-
12911290
for_each_task_context_nr(ctxn)
12921291
perf_event_context_sched_out(task, ctxn, next);
12931292
}
@@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
16211620
{
16221621
raw_spin_lock(&ctx->lock);
16231622

1624-
/* Rotate the first entry last of non-pinned groups */
1625-
list_rotate_left(&ctx->flexible_groups);
1623+
/*
1624+
* Rotate the first entry last of non-pinned groups. Rotation might be
1625+
* disabled by the inheritance code.
1626+
*/
1627+
if (!ctx->rotate_disable)
1628+
list_rotate_left(&ctx->flexible_groups);
16261629

16271630
raw_spin_unlock(&ctx->lock);
16281631
}
@@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
22342237
raw_spin_unlock_irq(&ctx->lock);
22352238
mutex_unlock(&ctx->mutex);
22362239

2237-
mutex_lock(&event->owner->perf_event_mutex);
2238-
list_del_init(&event->owner_entry);
2239-
mutex_unlock(&event->owner->perf_event_mutex);
2240-
put_task_struct(event->owner);
2241-
22422240
free_event(event);
22432241

22442242
return 0;
@@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
22512249
static int perf_release(struct inode *inode, struct file *file)
22522250
{
22532251
struct perf_event *event = file->private_data;
2252+
struct task_struct *owner;
22542253

22552254
file->private_data = NULL;
22562255

2256+
rcu_read_lock();
2257+
owner = ACCESS_ONCE(event->owner);
2258+
/*
2259+
* Matches the smp_wmb() in perf_event_exit_task(). If we observe
2260+
* !owner it means the list deletion is complete and we can indeed
2261+
* free this event, otherwise we need to serialize on
2262+
* owner->perf_event_mutex.
2263+
*/
2264+
smp_read_barrier_depends();
2265+
if (owner) {
2266+
/*
2267+
* Since delayed_put_task_struct() also drops the last
2268+
* task reference we can safely take a new reference
2269+
* while holding the rcu_read_lock().
2270+
*/
2271+
get_task_struct(owner);
2272+
}
2273+
rcu_read_unlock();
2274+
2275+
if (owner) {
2276+
mutex_lock(&owner->perf_event_mutex);
2277+
/*
2278+
* We have to re-check the event->owner field, if it is cleared
2279+
* we raced with perf_event_exit_task(), acquiring the mutex
2280+
* ensured they're done, and we can proceed with freeing the
2281+
* event.
2282+
*/
2283+
if (event->owner)
2284+
list_del_init(&event->owner_entry);
2285+
mutex_unlock(&owner->perf_event_mutex);
2286+
put_task_struct(owner);
2287+
}
2288+
22572289
return perf_event_release_kernel(event);
22582290
}
22592291

@@ -5668,7 +5700,7 @@ SYSCALL_DEFINE5(perf_event_open,
56685700
mutex_unlock(&ctx->mutex);
56695701

56705702
event->owner = current;
5671-
get_task_struct(current);
5703+
56725704
mutex_lock(&current->perf_event_mutex);
56735705
list_add_tail(&event->owner_entry, &current->perf_event_list);
56745706
mutex_unlock(&current->perf_event_mutex);
@@ -5736,12 +5768,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
57365768
++ctx->generation;
57375769
mutex_unlock(&ctx->mutex);
57385770

5739-
event->owner = current;
5740-
get_task_struct(current);
5741-
mutex_lock(&current->perf_event_mutex);
5742-
list_add_tail(&event->owner_entry, &current->perf_event_list);
5743-
mutex_unlock(&current->perf_event_mutex);
5744-
57455771
return event;
57465772

57475773
err_free:
@@ -5892,8 +5918,24 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
58925918
*/
58935919
void perf_event_exit_task(struct task_struct *child)
58945920
{
5921+
struct perf_event *event, *tmp;
58955922
int ctxn;
58965923

5924+
mutex_lock(&child->perf_event_mutex);
5925+
list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5926+
owner_entry) {
5927+
list_del_init(&event->owner_entry);
5928+
5929+
/*
5930+
* Ensure the list deletion is visible before we clear
5931+
* the owner, closes a race against perf_release() where
5932+
* we need to serialize on the owner->perf_event_mutex.
5933+
*/
5934+
smp_wmb();
5935+
event->owner = NULL;
5936+
}
5937+
mutex_unlock(&child->perf_event_mutex);
5938+
58975939
for_each_task_context_nr(ctxn)
58985940
perf_event_exit_task_context(child, ctxn);
58995941
}
@@ -6113,6 +6155,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
61136155
struct perf_event *event;
61146156
struct task_struct *parent = current;
61156157
int inherited_all = 1;
6158+
unsigned long flags;
61166159
int ret = 0;
61176160

61186161
child->perf_event_ctxp[ctxn] = NULL;
@@ -6153,13 +6196,26 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
61536196
break;
61546197
}
61556198

6199+
/*
6200+
* We can't hold ctx->lock when iterating the ->flexible_group list due
6201+
* to allocations, but we need to prevent rotation because
6202+
* rotate_ctx() will change the list from interrupt context.
6203+
*/
6204+
raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6205+
parent_ctx->rotate_disable = 1;
6206+
raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6207+
61566208
list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
61576209
ret = inherit_task_group(event, parent, parent_ctx,
61586210
child, ctxn, &inherited_all);
61596211
if (ret)
61606212
break;
61616213
}
61626214

6215+
raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6216+
parent_ctx->rotate_disable = 0;
6217+
raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6218+
61636219
child_ctx = child->perf_event_ctxp[ctxn];
61646220

61656221
if (child_ctx && inherited_all) {
@@ -6312,11 +6368,16 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
63126368

63136369
void __init perf_event_init(void)
63146370
{
6371+
int ret;
6372+
63156373
perf_event_init_all_cpus();
63166374
init_srcu_struct(&pmus_srcu);
63176375
perf_pmu_register(&perf_swevent);
63186376
perf_pmu_register(&perf_cpu_clock);
63196377
perf_pmu_register(&perf_task_clock);
63206378
perf_tp_register();
63216379
perf_cpu_notifier(perf_cpu_notify);
6380+
6381+
ret = init_hw_breakpoint();
6382+
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
63226383
}

0 commit comments

Comments
 (0)