Skip to content

Commit 0b20e59

Browse files
author
Ingo Molnar
committed
Merge branch 'perf/urgent' into perf/core, to resolve conflict
Conflicts: arch/x86/events/intel/pt.c Signed-off-by: Ingo Molnar <[email protected]>
2 parents a8944c5 + cf3beb7 commit 0b20e59

File tree

7 files changed

+116
-30
lines changed

7 files changed

+116
-30
lines changed

arch/x86/events/amd/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ static __initconst const u64 amd_hw_cache_event_ids
115115
/*
116116
* AMD Performance Monitor K7 and later.
117117
*/
118-
static const u64 amd_perfmon_event_map[] =
118+
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
119119
{
120120
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
121121
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,

arch/x86/events/intel/lbr.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ static enum {
6464

6565
#define LBR_PLM (LBR_KERNEL | LBR_USER)
6666

67-
#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
67+
#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
6868
#define LBR_NOT_SUPP -1 /* LBR filter not supported */
6969
#define LBR_IGN 0 /* ignored */
7070

@@ -621,8 +621,10 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
621621
* The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
622622
* in suppress mode. So LBR_SELECT should be set to
623623
* (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
624+
* But the 10th bit LBR_CALL_STACK does not operate
625+
* in suppress mode.
624626
*/
625-
reg->config = mask ^ x86_pmu.lbr_sel_mask;
627+
reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
626628

627629
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
628630
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&

arch/x86/events/intel/pt.c

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
136136
struct dev_ext_attribute *de_attrs;
137137
struct attribute **attrs;
138138
size_t size;
139+
u64 reg;
139140
int ret;
140141
long i;
141142

143+
if (boot_cpu_has(X86_FEATURE_VMX)) {
144+
/*
145+
* Intel SDM, 36.5 "Tracing post-VMXON" says that
146+
* "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
147+
* post-VMXON.
148+
*/
149+
rdmsrl(MSR_IA32_VMX_MISC, reg);
150+
if (reg & BIT(14))
151+
pt_pmu.vmx = true;
152+
}
153+
142154
attrs = NULL;
143155

144156
for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
269281

270282
reg |= (event->attr.config & PT_CONFIG_MASK);
271283

284+
event->hw.config = reg;
272285
wrmsrl(MSR_IA32_RTIT_CTL, reg);
273286
}
274287

275-
static void pt_config_start(bool start)
288+
static void pt_config_stop(struct perf_event *event)
276289
{
277-
u64 ctl;
290+
u64 ctl = READ_ONCE(event->hw.config);
291+
292+
/* may be already stopped by a PMI */
293+
if (!(ctl & RTIT_CTL_TRACEEN))
294+
return;
278295

279-
rdmsrl(MSR_IA32_RTIT_CTL, ctl);
280-
if (start)
281-
ctl |= RTIT_CTL_TRACEEN;
282-
else
283-
ctl &= ~RTIT_CTL_TRACEEN;
296+
ctl &= ~RTIT_CTL_TRACEEN;
284297
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
285298

299+
WRITE_ONCE(event->hw.config, ctl);
300+
286301
/*
287302
* A wrmsr that disables trace generation serializes other PT
288303
* registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
291306
* The below WMB, separating data store and aux_head store matches
292307
* the consumer's RMB that separates aux_head load and data load.
293308
*/
294-
if (!start)
295-
wmb();
309+
wmb();
296310
}
297311

298312
static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +936,17 @@ void intel_pt_interrupt(void)
922936
if (!ACCESS_ONCE(pt->handle_nmi))
923937
return;
924938

925-
pt_config_start(false);
939+
/*
940+
* If VMX is on and PT does not support it, don't touch anything.
941+
*/
942+
if (READ_ONCE(pt->vmx_on))
943+
return;
926944

927945
if (!event)
928946
return;
929947

948+
pt_config_stop(event);
949+
930950
buf = perf_get_aux(&pt->handle);
931951
if (!buf)
932952
return;
@@ -963,6 +983,35 @@ void intel_pt_interrupt(void)
963983
}
964984
}
965985

986+
void intel_pt_handle_vmx(int on)
987+
{
988+
struct pt *pt = this_cpu_ptr(&pt_ctx);
989+
struct perf_event *event;
990+
unsigned long flags;
991+
992+
/* PT plays nice with VMX, do nothing */
993+
if (pt_pmu.vmx)
994+
return;
995+
996+
/*
997+
* VMXON will clear RTIT_CTL.TraceEn; we need to make
998+
* sure to not try to set it while VMX is on. Disable
999+
* interrupts to avoid racing with pmu callbacks;
1000+
* concurrent PMI should be handled fine.
1001+
*/
1002+
local_irq_save(flags);
1003+
WRITE_ONCE(pt->vmx_on, on);
1004+
1005+
if (on) {
1006+
/* prevent pt_config_stop() from writing RTIT_CTL */
1007+
event = pt->handle.event;
1008+
if (event)
1009+
event->hw.config = 0;
1010+
}
1011+
local_irq_restore(flags);
1012+
}
1013+
EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
1014+
9661015
/*
9671016
* PMU callbacks
9681017
*/
@@ -973,6 +1022,9 @@ static void pt_event_start(struct perf_event *event, int mode)
9731022
struct pt *pt = this_cpu_ptr(&pt_ctx);
9741023
struct pt_buffer *buf;
9751024

1025+
if (READ_ONCE(pt->vmx_on))
1026+
return;
1027+
9761028
buf = perf_aux_output_begin(&pt->handle, event);
9771029
if (!buf)
9781030
goto fail_stop;
@@ -1007,7 +1059,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
10071059
* see comment in intel_pt_interrupt().
10081060
*/
10091061
ACCESS_ONCE(pt->handle_nmi) = 0;
1010-
pt_config_start(false);
1062+
1063+
pt_config_stop(event);
10111064

10121065
if (event->hw.state == PERF_HES_STOPPED)
10131066
return;

arch/x86/events/intel/pt.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ enum pt_capabilities {
6565
struct pt_pmu {
6666
struct pmu pmu;
6767
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
68+
bool vmx;
6869
};
6970

7071
/**
@@ -107,10 +108,12 @@ struct pt_buffer {
107108
* struct pt - per-cpu pt context
108109
* @handle: perf output handle
109110
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
111+
* @vmx_on: 1 if VMX is ON on this cpu
110112
*/
111113
struct pt {
112114
struct perf_output_handle handle;
113115
int handle_nmi;
116+
int vmx_on;
114117
};
115118

116119
#endif /* __INTEL_PT_H__ */

arch/x86/include/asm/perf_event.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
285285
static inline void perf_check_microcode(void) { }
286286
#endif
287287

288+
#ifdef CONFIG_CPU_SUP_INTEL
289+
extern void intel_pt_handle_vmx(int on);
290+
#endif
291+
288292
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
289293
extern void amd_pmu_enable_virt(void);
290294
extern void amd_pmu_disable_virt(void);

arch/x86/kvm/vmx.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3103,6 +3103,8 @@ static __init int vmx_disabled_by_bios(void)
31033103

31043104
static void kvm_cpu_vmxon(u64 addr)
31053105
{
3106+
intel_pt_handle_vmx(1);
3107+
31063108
asm volatile (ASM_VMX_VMXON_RAX
31073109
: : "a"(&addr), "m"(addr)
31083110
: "memory", "cc");
@@ -3172,6 +3174,8 @@ static void vmclear_local_loaded_vmcss(void)
31723174
static void kvm_cpu_vmxoff(void)
31733175
{
31743176
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
3177+
3178+
intel_pt_handle_vmx(0);
31753179
}
31763180

31773181
static void hardware_disable(void)

kernel/events/core.c

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,7 @@ static void put_ctx(struct perf_event_context *ctx)
11061106
* function.
11071107
*
11081108
* Lock order:
1109+
* cred_guard_mutex
11091110
* task_struct::perf_event_mutex
11101111
* perf_event_context::mutex
11111112
* perf_event::child_mutex;
@@ -3449,7 +3450,6 @@ static struct task_struct *
34493450
find_lively_task_by_vpid(pid_t vpid)
34503451
{
34513452
struct task_struct *task;
3452-
int err;
34533453

34543454
rcu_read_lock();
34553455
if (!vpid)
@@ -3463,16 +3463,7 @@ find_lively_task_by_vpid(pid_t vpid)
34633463
if (!task)
34643464
return ERR_PTR(-ESRCH);
34653465

3466-
/* Reuse ptrace permission checks for now. */
3467-
err = -EACCES;
3468-
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
3469-
goto errout;
3470-
34713466
return task;
3472-
errout:
3473-
put_task_struct(task);
3474-
return ERR_PTR(err);
3475-
34763467
}
34773468

34783469
/*
@@ -8592,14 +8583,32 @@ SYSCALL_DEFINE5(perf_event_open,
85928583

85938584
get_online_cpus();
85948585

8586+
if (task) {
8587+
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
8588+
if (err)
8589+
goto err_cpus;
8590+
8591+
/*
8592+
* Reuse ptrace permission checks for now.
8593+
*
8594+
* We must hold cred_guard_mutex across this and any potential
8595+
* perf_install_in_context() call for this new event to
8596+
* serialize against exec() altering our credentials (and the
8597+
* perf_event_exit_task() that could imply).
8598+
*/
8599+
err = -EACCES;
8600+
if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
8601+
goto err_cred;
8602+
}
8603+
85958604
if (flags & PERF_FLAG_PID_CGROUP)
85968605
cgroup_fd = pid;
85978606

85988607
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
85998608
NULL, NULL, cgroup_fd);
86008609
if (IS_ERR(event)) {
86018610
err = PTR_ERR(event);
8602-
goto err_cpus;
8611+
goto err_cred;
86038612
}
86048613

86058614
if (is_sampling_event(event)) {
@@ -8658,11 +8667,6 @@ SYSCALL_DEFINE5(perf_event_open,
86588667
goto err_context;
86598668
}
86608669

8661-
if (task) {
8662-
put_task_struct(task);
8663-
task = NULL;
8664-
}
8665-
86668670
/*
86678671
* Look up the group leader (we will attach this event to it):
86688672
*/
@@ -8760,6 +8764,11 @@ SYSCALL_DEFINE5(perf_event_open,
87608764

87618765
WARN_ON_ONCE(ctx->parent_ctx);
87628766

8767+
/*
8768+
* This is the point on no return; we cannot fail hereafter. This is
8769+
* where we start modifying current state.
8770+
*/
8771+
87638772
if (move_group) {
87648773
/*
87658774
* See perf_event_ctx_lock() for comments on the details
@@ -8831,6 +8840,11 @@ SYSCALL_DEFINE5(perf_event_open,
88318840
mutex_unlock(&gctx->mutex);
88328841
mutex_unlock(&ctx->mutex);
88338842

8843+
if (task) {
8844+
mutex_unlock(&task->signal->cred_guard_mutex);
8845+
put_task_struct(task);
8846+
}
8847+
88348848
put_online_cpus();
88358849

88368850
mutex_lock(&current->perf_event_mutex);
@@ -8863,6 +8877,9 @@ SYSCALL_DEFINE5(perf_event_open,
88638877
*/
88648878
if (!event_file)
88658879
free_event(event);
8880+
err_cred:
8881+
if (task)
8882+
mutex_unlock(&task->signal->cred_guard_mutex);
88668883
err_cpus:
88678884
put_online_cpus();
88688885
err_task:
@@ -9147,6 +9164,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
91479164

91489165
/*
91499166
* When a child task exits, feed back event values to parent events.
9167+
*
9168+
* Can be called with cred_guard_mutex held when called from
9169+
* install_exec_creds().
91509170
*/
91519171
void perf_event_exit_task(struct task_struct *child)
91529172
{

0 commit comments

Comments
 (0)