Skip to content

Commit 74eedeb

Browse files
committed
Merge tag 'perf-urgent-2021-08-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner: "A set of perf fixes: - Correct the permission checks for perf event which send SIGTRAP to a different process and clean up that code to be more readable. - Prevent an out of bound MSR access in the x86 perf code which happened due to an incomplete limiting to the actually available hardware counters. - Prevent access to the AMD64_EVENTSEL_HOSTONLY bit when running inside a guest. - Handle small core counter re-enabling correctly by issuing an ACK right before reenabling it to prevent a stale PEBS record being kept around" * tag 'perf-urgent-2021-08-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel: Apply mid ACK for small core perf/x86/amd: Don't touch the AMD64_EVENTSEL_HOSTONLY bit inside the guest perf/x86: Fix out of bound MSR access perf: Refactor permissions check into perf_check_permission() perf: Fix required permissions if sigtrap is requested
2 parents 6674586 + acade63 commit 74eedeb

File tree

4 files changed

+71
-17
lines changed

4 files changed

+71
-17
lines changed

arch/x86/events/core.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void)
24892489
return;
24902490

24912491
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
2492-
/* Metrics and fake events don't have corresponding HW counters. */
2493-
if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
2494-
continue;
2495-
else if (i >= INTEL_PMC_IDX_FIXED)
2492+
if (i >= INTEL_PMC_IDX_FIXED) {
2493+
/* Metrics and fake events don't have corresponding HW counters. */
2494+
if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
2495+
continue;
2496+
24962497
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
2497-
else
2498+
} else {
24982499
wrmsrl(x86_pmu_event_addr(i), 0);
2500+
}
24992501
}
25002502

25012503
bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);

arch/x86/events/intel/core.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
29042904
*/
29052905
static int intel_pmu_handle_irq(struct pt_regs *regs)
29062906
{
2907-
struct cpu_hw_events *cpuc;
2907+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2908+
bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
2909+
bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
29082910
int loops;
29092911
u64 status;
29102912
int handled;
29112913
int pmu_enabled;
29122914

2913-
cpuc = this_cpu_ptr(&cpu_hw_events);
2914-
29152915
/*
29162916
* Save the PMU state.
29172917
* It needs to be restored when leaving the handler.
29182918
*/
29192919
pmu_enabled = cpuc->enabled;
29202920
/*
2921-
* No known reason to not always do late ACK,
2922-
* but just in case do it opt-in.
2921+
* In general, the early ACK is only applied for old platforms.
2922+
* For the big core starts from Haswell, the late ACK should be
2923+
* applied.
2924+
* For the small core after Tremont, we have to do the ACK right
2925+
* before re-enabling counters, which is in the middle of the
2926+
* NMI handler.
29232927
*/
2924-
if (!x86_pmu.late_ack)
2928+
if (!late_ack && !mid_ack)
29252929
apic_write(APIC_LVTPC, APIC_DM_NMI);
29262930
intel_bts_disable_local();
29272931
cpuc->enabled = 0;
@@ -2958,6 +2962,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
29582962
goto again;
29592963

29602964
done:
2965+
if (mid_ack)
2966+
apic_write(APIC_LVTPC, APIC_DM_NMI);
29612967
/* Only restore PMU state when it's active. See x86_pmu_disable(). */
29622968
cpuc->enabled = pmu_enabled;
29632969
if (pmu_enabled)
@@ -2969,7 +2975,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
29692975
* have been reset. This avoids spurious NMIs on
29702976
* Haswell CPUs.
29712977
*/
2972-
if (x86_pmu.late_ack)
2978+
if (late_ack)
29732979
apic_write(APIC_LVTPC, APIC_DM_NMI);
29742980
return handled;
29752981
}
@@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void)
61296135
static_branch_enable(&perf_is_hybrid);
61306136
x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
61316137

6132-
x86_pmu.late_ack = true;
61336138
x86_pmu.pebs_aliases = NULL;
61346139
x86_pmu.pebs_prec_dist = true;
61356140
x86_pmu.pebs_block = true;
@@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void)
61676172
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
61686173
pmu->name = "cpu_core";
61696174
pmu->cpu_type = hybrid_big;
6175+
pmu->late_ack = true;
61706176
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
61716177
pmu->num_counters = x86_pmu.num_counters + 2;
61726178
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
@@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void)
61926198
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
61936199
pmu->name = "cpu_atom";
61946200
pmu->cpu_type = hybrid_small;
6201+
pmu->mid_ack = true;
61956202
pmu->num_counters = x86_pmu.num_counters;
61966203
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
61976204
pmu->max_pebs_events = x86_pmu.max_pebs_events;

arch/x86/events/perf_event.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,10 @@ struct x86_hybrid_pmu {
656656
struct event_constraint *event_constraints;
657657
struct event_constraint *pebs_constraints;
658658
struct extra_reg *extra_regs;
659+
660+
unsigned int late_ack :1,
661+
mid_ack :1,
662+
enabled_ack :1;
659663
};
660664

661665
static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
@@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid;
686690
__Fp; \
687691
}))
688692

693+
#define hybrid_bit(_pmu, _field) \
694+
({ \
695+
bool __Fp = x86_pmu._field; \
696+
\
697+
if (is_hybrid() && (_pmu)) \
698+
__Fp = hybrid_pmu(_pmu)->_field; \
699+
\
700+
__Fp; \
701+
})
702+
689703
enum hybrid_pmu_type {
690704
hybrid_big = 0x40,
691705
hybrid_small = 0x20,
@@ -755,6 +769,7 @@ struct x86_pmu {
755769

756770
/* PMI handler bits */
757771
unsigned int late_ack :1,
772+
mid_ack :1,
758773
enabled_ack :1;
759774
/*
760775
* sysfs attrs
@@ -1115,9 +1130,10 @@ void x86_pmu_stop(struct perf_event *event, int flags);
11151130

11161131
static inline void x86_pmu_disable_event(struct perf_event *event)
11171132
{
1133+
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
11181134
struct hw_perf_event *hwc = &event->hw;
11191135

1120-
wrmsrl(hwc->config_base, hwc->config);
1136+
wrmsrl(hwc->config_base, hwc->config & ~disable_mask);
11211137

11221138
if (is_counter_pair(hwc))
11231139
wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);

kernel/events/core.c

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11917,6 +11917,37 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader,
1191711917
return gctx;
1191811918
}
1191911919

11920+
static bool
11921+
perf_check_permission(struct perf_event_attr *attr, struct task_struct *task)
11922+
{
11923+
unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
11924+
bool is_capable = perfmon_capable();
11925+
11926+
if (attr->sigtrap) {
11927+
/*
11928+
* perf_event_attr::sigtrap sends signals to the other task.
11929+
* Require the current task to also have CAP_KILL.
11930+
*/
11931+
rcu_read_lock();
11932+
is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
11933+
rcu_read_unlock();
11934+
11935+
/*
11936+
* If the required capabilities aren't available, checks for
11937+
* ptrace permissions: upgrade to ATTACH, since sending signals
11938+
* can effectively change the target task.
11939+
*/
11940+
ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
11941+
}
11942+
11943+
/*
11944+
* Preserve ptrace permission check for backwards compatibility. The
11945+
* ptrace check also includes checks that the current task and other
11946+
* task have matching uids, and is therefore not done here explicitly.
11947+
*/
11948+
return is_capable || ptrace_may_access(task, ptrace_mode);
11949+
}
11950+
1192011951
/**
1192111952
* sys_perf_event_open - open a performance event, associate it to a task/cpu
1192211953
*
@@ -12163,15 +12194,13 @@ SYSCALL_DEFINE5(perf_event_open,
1216312194
goto err_file;
1216412195

1216512196
/*
12166-
* Preserve ptrace permission check for backwards compatibility.
12167-
*
1216812197
* We must hold exec_update_lock across this and any potential
1216912198
* perf_install_in_context() call for this new event to
1217012199
* serialize against exec() altering our credentials (and the
1217112200
* perf_event_exit_task() that could imply).
1217212201
*/
1217312202
err = -EACCES;
12174-
if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
12203+
if (!perf_check_permission(&attr, task))
1217512204
goto err_cred;
1217612205
}
1217712206

0 commit comments

Comments
 (0)