Skip to content

Commit c22ac2a

Browse files
liu-song-6Alexei Starovoitov
authored andcommitted
perf: Enable branch record for software events
The typical way to access branch record (e.g. Intel LBR) is via hardware perf_event. For CPUs with FREEZE_LBRS_ON_PMI support, PMI could capture reliable LBR. On the other hand, LBR could also be useful in non-PMI scenario. For example, in kretprobe or bpf fexit program, LBR could provide a lot of information on what happened with the function. Add API to use branch record for software use. Note that, when the software event triggers, it is necessary to stop the branch record hardware asap. Therefore, static_call is used to remove some branch instructions in this process. Suggested-by: Peter Zijlstra <[email protected]> Signed-off-by: Song Liu <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: John Fastabend <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Acked-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 3384c7c commit c22ac2a

File tree

6 files changed

+111
-22
lines changed

6 files changed

+111
-22
lines changed

arch/x86/events/intel/core.c

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2143,19 +2143,19 @@ static __initconst const u64 knl_hw_cache_extra_regs
21432143
* However, there are some cases which may change PEBS status, e.g. PMI
21442144
* throttle. The PEBS_ENABLE should be updated where the status changes.
21452145
*/
2146-
static void __intel_pmu_disable_all(void)
2146+
static __always_inline void __intel_pmu_disable_all(bool bts)
21472147
{
21482148
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
21492149

21502150
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
21512151

2152-
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
2152+
if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
21532153
intel_pmu_disable_bts();
21542154
}
21552155

2156-
static void intel_pmu_disable_all(void)
2156+
static __always_inline void intel_pmu_disable_all(void)
21572157
{
2158-
__intel_pmu_disable_all();
2158+
__intel_pmu_disable_all(true);
21592159
intel_pmu_pebs_disable_all();
21602160
intel_pmu_lbr_disable_all();
21612161
}
@@ -2186,6 +2186,49 @@ static void intel_pmu_enable_all(int added)
21862186
__intel_pmu_enable_all(added, false);
21872187
}
21882188

2189+
static noinline int
2190+
__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries,
2191+
unsigned int cnt, unsigned long flags)
2192+
{
2193+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2194+
2195+
intel_pmu_lbr_read();
2196+
cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr);
2197+
2198+
memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
2199+
intel_pmu_enable_all(0);
2200+
local_irq_restore(flags);
2201+
return cnt;
2202+
}
2203+
2204+
static int
2205+
intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
2206+
{
2207+
unsigned long flags;
2208+
2209+
/* must not have branches... */
2210+
local_irq_save(flags);
2211+
__intel_pmu_disable_all(false); /* we don't care about BTS */
2212+
__intel_pmu_pebs_disable_all();
2213+
__intel_pmu_lbr_disable();
2214+
/* ... until here */
2215+
return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
2216+
}
2217+
2218+
static int
2219+
intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
2220+
{
2221+
unsigned long flags;
2222+
2223+
/* must not have branches... */
2224+
local_irq_save(flags);
2225+
__intel_pmu_disable_all(false); /* we don't care about BTS */
2226+
__intel_pmu_pebs_disable_all();
2227+
__intel_pmu_arch_lbr_disable();
2228+
/* ... until here */
2229+
return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
2230+
}
2231+
21892232
/*
21902233
* Workaround for:
21912234
* Intel Errata AAK100 (model 26)
@@ -2929,7 +2972,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
29292972
apic_write(APIC_LVTPC, APIC_DM_NMI);
29302973
intel_bts_disable_local();
29312974
cpuc->enabled = 0;
2932-
__intel_pmu_disable_all();
2975+
__intel_pmu_disable_all(true);
29332976
handled = intel_pmu_drain_bts_buffer();
29342977
handled += intel_bts_interrupt();
29352978
status = intel_pmu_get_status();
@@ -6283,9 +6326,21 @@ __init int intel_pmu_init(void)
62836326
x86_pmu.lbr_nr = 0;
62846327
}
62856328

6286-
if (x86_pmu.lbr_nr)
6329+
if (x86_pmu.lbr_nr) {
62876330
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
62886331

6332+
/* only support branch_stack snapshot for perfmon >= v2 */
6333+
if (x86_pmu.disable_all == intel_pmu_disable_all) {
6334+
if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) {
6335+
static_call_update(perf_snapshot_branch_stack,
6336+
intel_pmu_snapshot_arch_branch_stack);
6337+
} else {
6338+
static_call_update(perf_snapshot_branch_stack,
6339+
intel_pmu_snapshot_branch_stack);
6340+
}
6341+
}
6342+
}
6343+
62896344
intel_pmu_check_extra_regs(x86_pmu.extra_regs);
62906345

62916346
/* Support full width counters using alternative MSR range */

arch/x86/events/intel/ds.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,7 @@ void intel_pmu_pebs_disable_all(void)
13011301
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
13021302

13031303
if (cpuc->pebs_enabled)
1304-
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
1304+
__intel_pmu_pebs_disable_all();
13051305
}
13061306

13071307
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)

arch/x86/events/intel/lbr.c

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -228,20 +228,6 @@ static void __intel_pmu_lbr_enable(bool pmi)
228228
wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
229229
}
230230

231-
static void __intel_pmu_lbr_disable(void)
232-
{
233-
u64 debugctl;
234-
235-
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
236-
wrmsrl(MSR_ARCH_LBR_CTL, 0);
237-
return;
238-
}
239-
240-
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
241-
debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
242-
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
243-
}
244-
245231
void intel_pmu_lbr_reset_32(void)
246232
{
247233
int i;
@@ -779,8 +765,12 @@ void intel_pmu_lbr_disable_all(void)
779765
{
780766
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
781767

782-
if (cpuc->lbr_users && !vlbr_exclude_host())
768+
if (cpuc->lbr_users && !vlbr_exclude_host()) {
769+
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
770+
return __intel_pmu_arch_lbr_disable();
771+
783772
__intel_pmu_lbr_disable();
773+
}
784774
}
785775

786776
void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)

arch/x86/events/perf_event.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,25 @@ static inline bool intel_pmu_has_bts(struct perf_event *event)
12401240
return intel_pmu_has_bts_period(event, hwc->sample_period);
12411241
}
12421242

1243+
static __always_inline void __intel_pmu_pebs_disable_all(void)
1244+
{
1245+
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
1246+
}
1247+
1248+
static __always_inline void __intel_pmu_arch_lbr_disable(void)
1249+
{
1250+
wrmsrl(MSR_ARCH_LBR_CTL, 0);
1251+
}
1252+
1253+
static __always_inline void __intel_pmu_lbr_disable(void)
1254+
{
1255+
u64 debugctl;
1256+
1257+
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
1258+
debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
1259+
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
1260+
}
1261+
12431262
int intel_pmu_save_and_restart(struct perf_event *event);
12441263

12451264
struct event_constraint *

include/linux/perf_event.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct perf_guest_info_callbacks {
5757
#include <linux/cgroup.h>
5858
#include <linux/refcount.h>
5959
#include <linux/security.h>
60+
#include <linux/static_call.h>
6061
#include <asm/local.h>
6162

6263
struct perf_callchain_entry {
@@ -1612,4 +1613,26 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event,
16121613
extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
16131614
#endif
16141615

1616+
/*
1617+
* Snapshot branch stack on software events.
1618+
*
1619+
* Branch stack can be very useful in understanding software events. For
1620+
* example, when a long function, e.g. sys_perf_event_open, returns an
1621+
* errno, it is not obvious why the function failed. Branch stack could
1622+
* provide very helpful information in this type of scenarios.
1623+
*
1624+
* On software event, it is necessary to stop the hardware branch recorder
1625+
* fast. Otherwise, the hardware register/buffer will be flushed with
1626+
* entries of the triggering event. Therefore, static call is used to
1627+
* stop the hardware recorder.
1628+
*/
1629+
1630+
/*
1631+
* cnt is the number of entries allocated for entries.
1632+
* Return number of entries copied to .
1633+
*/
1634+
typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
1635+
unsigned int cnt);
1636+
DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
1637+
16151638
#endif /* _LINUX_PERF_EVENT_H */

kernel/events/core.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13435,3 +13435,5 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
1343513435
.threaded = true,
1343613436
};
1343713437
#endif /* CONFIG_CGROUP_PERF */
13438+
13439+
DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);

0 commit comments

Comments
 (0)