Skip to content

Commit ca5b7c0

Browse files
sandip4nPeter Zijlstra
authored andcommitted
perf/x86/amd/lbr: Add LbrExtV2 branch record support
If AMD Last Branch Record Extension Version 2 (LbrExtV2) is detected, enable it alongside LBR Freeze on PMI when an event requests branch stack i.e. PERF_SAMPLE_BRANCH_STACK. Each branch record is represented by a pair of registers, LBR From and LBR To. The freeze feature prevents any updates to these registers once a PMC overflows. The contents remain unchanged until the freeze bit is cleared by the PMI handler. The branch records are read and copied to sample data before unfreezing. However, only valid entries are copied. There is no additional register to denote which of the register pairs represent the top of the stack (TOS) since internal register renaming always ensures that the first pair (i.e. index 0) is the one representing the most recent branch and so on. The LBR registers are per-thread resources and are cleared explicitly whenever a new task is scheduled in. There are no special implications on the contents of these registers when transitioning to deep C-states. Signed-off-by: Sandipan Das <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/r/d3b8500a3627a0d4d0259b005891ee248f248d91.1660211399.git.sandipan.das@amd.com
1 parent 703fb76 commit ca5b7c0

File tree

4 files changed

+252
-11
lines changed

4 files changed

+252
-11
lines changed

arch/x86/events/amd/core.c

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ static inline u64 amd_pmu_get_global_status(void)
620620
/* PerfCntrGlobalStatus is read-only */
621621
rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
622622

623-
return status & amd_pmu_global_cntr_mask;
623+
return status;
624624
}
625625

626626
static inline void amd_pmu_ack_global_status(u64 status)
@@ -631,8 +631,6 @@ static inline void amd_pmu_ack_global_status(u64 status)
631631
* clears the same bit in PerfCntrGlobalStatus
632632
*/
633633

634-
/* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
635-
status &= amd_pmu_global_cntr_mask;
636634
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
637635
}
638636

@@ -742,11 +740,17 @@ static void amd_pmu_v2_enable_event(struct perf_event *event)
742740
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
743741
}
744742

745-
static void amd_pmu_v2_enable_all(int added)
743+
static __always_inline void amd_pmu_core_enable_all(void)
746744
{
747745
amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
748746
}
749747

748+
static void amd_pmu_v2_enable_all(int added)
749+
{
750+
amd_pmu_lbr_enable_all();
751+
amd_pmu_core_enable_all();
752+
}
753+
750754
static void amd_pmu_disable_event(struct perf_event *event)
751755
{
752756
x86_pmu_disable_event(event);
@@ -771,10 +775,15 @@ static void amd_pmu_disable_all(void)
771775
amd_pmu_check_overflow();
772776
}
773777

774-
static void amd_pmu_v2_disable_all(void)
778+
static __always_inline void amd_pmu_core_disable_all(void)
775779
{
776-
/* Disable all PMCs */
777780
amd_pmu_set_global_ctl(0);
781+
}
782+
783+
static void amd_pmu_v2_disable_all(void)
784+
{
785+
amd_pmu_core_disable_all();
786+
amd_pmu_lbr_disable_all();
778787
amd_pmu_check_overflow();
779788
}
780789

@@ -877,15 +886,21 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
877886
pmu_enabled = cpuc->enabled;
878887
cpuc->enabled = 0;
879888

880-
/* Stop counting */
881-
amd_pmu_v2_disable_all();
889+
/* Stop counting but do not disable LBR */
890+
amd_pmu_core_disable_all();
882891

883892
status = amd_pmu_get_global_status();
884893

885894
/* Check if any overflows are pending */
886895
if (!status)
887896
goto done;
888897

898+
/* Read branch records before unfreezing */
899+
if (status & GLOBAL_STATUS_LBRS_FROZEN) {
900+
amd_pmu_lbr_read();
901+
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
902+
}
903+
889904
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
890905
if (!test_bit(idx, cpuc->active_mask))
891906
continue;
@@ -905,6 +920,9 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
905920
if (!x86_perf_event_set_period(event))
906921
continue;
907922

923+
if (has_branch_stack(event))
924+
data.br_stack = &cpuc->lbr_stack;
925+
908926
if (perf_event_overflow(event, &data, regs))
909927
x86_pmu_stop(event, 0);
910928

@@ -918,7 +936,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
918936
*/
919937
WARN_ON(status > 0);
920938

921-
/* Clear overflow bits */
939+
/* Clear overflow and freeze bits */
922940
amd_pmu_ack_global_status(~status);
923941

924942
/*
@@ -932,7 +950,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
932950

933951
/* Resume counting only if PMU is active */
934952
if (pmu_enabled)
935-
amd_pmu_v2_enable_all(0);
953+
amd_pmu_core_enable_all();
936954

937955
return amd_pmu_adjust_nmi_window(handled);
938956
}
@@ -1375,7 +1393,14 @@ static int __init amd_core_pmu_init(void)
13751393
}
13761394

13771395
/* LBR and BRS are mutually exclusive features */
1378-
if (amd_pmu_lbr_init() && !amd_brs_init()) {
1396+
if (!amd_pmu_lbr_init()) {
1397+
/* LBR requires flushing on context switch */
1398+
x86_pmu.sched_task = amd_pmu_lbr_sched_task;
1399+
static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config);
1400+
static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
1401+
static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
1402+
static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
1403+
} else if (!amd_brs_init()) {
13791404
/*
13801405
* BRS requires special event constraints and flushing on ctxsw.
13811406
*/

arch/x86/events/amd/lbr.c

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,209 @@
44

55
#include "../perf_event.h"
66

7+
struct branch_entry {
8+
union {
9+
struct {
10+
u64 ip:58;
11+
u64 ip_sign_ext:5;
12+
u64 mispredict:1;
13+
} split;
14+
u64 full;
15+
} from;
16+
17+
union {
18+
struct {
19+
u64 ip:58;
20+
u64 ip_sign_ext:3;
21+
u64 reserved:1;
22+
u64 spec:1;
23+
u64 valid:1;
24+
} split;
25+
u64 full;
26+
} to;
27+
};
28+
29+
static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
30+
{
31+
wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
32+
}
33+
34+
static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
35+
{
36+
wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
37+
}
38+
39+
static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
40+
{
41+
u64 val;
42+
43+
rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
44+
45+
return val;
46+
}
47+
48+
static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
49+
{
50+
u64 val;
51+
52+
rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
53+
54+
return val;
55+
}
56+
57+
static __always_inline u64 sign_ext_branch_ip(u64 ip)
58+
{
59+
u32 shift = 64 - boot_cpu_data.x86_virt_bits;
60+
61+
return (u64)(((s64)ip << shift) >> shift);
62+
}
63+
64+
void amd_pmu_lbr_read(void)
65+
{
66+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
67+
struct perf_branch_entry *br = cpuc->lbr_entries;
68+
struct branch_entry entry;
69+
int out = 0, i;
70+
71+
if (!cpuc->lbr_users)
72+
return;
73+
74+
for (i = 0; i < x86_pmu.lbr_nr; i++) {
75+
entry.from.full = amd_pmu_lbr_get_from(i);
76+
entry.to.full = amd_pmu_lbr_get_to(i);
77+
78+
/* Check if a branch has been logged */
79+
if (!entry.to.split.valid)
80+
continue;
81+
82+
perf_clear_branch_entry_bitfields(br + out);
83+
84+
br[out].from = sign_ext_branch_ip(entry.from.split.ip);
85+
br[out].to = sign_ext_branch_ip(entry.to.split.ip);
86+
br[out].mispred = entry.from.split.mispredict;
87+
br[out].predicted = !br[out].mispred;
88+
out++;
89+
}
90+
91+
cpuc->lbr_stack.nr = out;
92+
93+
/*
94+
* Internal register renaming always ensures that LBR From[0] and
95+
* LBR To[0] always represent the TOS
96+
*/
97+
cpuc->lbr_stack.hw_idx = 0;
98+
}
99+
100+
static int amd_pmu_lbr_setup_filter(struct perf_event *event)
101+
{
102+
/* No LBR support */
103+
if (!x86_pmu.lbr_nr)
104+
return -EOPNOTSUPP;
105+
106+
return 0;
107+
}
108+
109+
int amd_pmu_lbr_hw_config(struct perf_event *event)
110+
{
111+
int ret = 0;
112+
113+
/* LBR is not recommended in counting mode */
114+
if (!is_sampling_event(event))
115+
return -EINVAL;
116+
117+
ret = amd_pmu_lbr_setup_filter(event);
118+
if (!ret)
119+
event->attach_state |= PERF_ATTACH_SCHED_CB;
120+
121+
return ret;
122+
}
123+
124+
void amd_pmu_lbr_reset(void)
125+
{
126+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
127+
int i;
128+
129+
if (!x86_pmu.lbr_nr)
130+
return;
131+
132+
/* Reset all branch records individually */
133+
for (i = 0; i < x86_pmu.lbr_nr; i++) {
134+
amd_pmu_lbr_set_from(i, 0);
135+
amd_pmu_lbr_set_to(i, 0);
136+
}
137+
138+
cpuc->last_task_ctx = NULL;
139+
cpuc->last_log_id = 0;
140+
}
141+
142+
void amd_pmu_lbr_add(struct perf_event *event)
143+
{
144+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
145+
146+
if (!x86_pmu.lbr_nr)
147+
return;
148+
149+
perf_sched_cb_inc(event->ctx->pmu);
150+
151+
if (!cpuc->lbr_users++ && !event->total_time_running)
152+
amd_pmu_lbr_reset();
153+
}
154+
155+
void amd_pmu_lbr_del(struct perf_event *event)
156+
{
157+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
158+
159+
if (!x86_pmu.lbr_nr)
160+
return;
161+
162+
cpuc->lbr_users--;
163+
WARN_ON_ONCE(cpuc->lbr_users < 0);
164+
perf_sched_cb_dec(event->ctx->pmu);
165+
}
166+
167+
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
168+
{
169+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
170+
171+
/*
172+
* A context switch can flip the address space and LBR entries are
173+
* not tagged with an identifier. Hence, branches cannot be resolved
174+
* from the old address space and the LBR records should be wiped.
175+
*/
176+
if (cpuc->lbr_users && sched_in)
177+
amd_pmu_lbr_reset();
178+
}
179+
180+
void amd_pmu_lbr_enable_all(void)
181+
{
182+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
183+
u64 dbg_ctl, dbg_extn_cfg;
184+
185+
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
186+
return;
187+
188+
rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
189+
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
190+
191+
wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
192+
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
193+
}
194+
195+
void amd_pmu_lbr_disable_all(void)
196+
{
197+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
198+
u64 dbg_ctl, dbg_extn_cfg;
199+
200+
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
201+
return;
202+
203+
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
204+
rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
205+
206+
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
207+
wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
208+
}
209+
7210
__init int amd_pmu_lbr_init(void)
8211
{
9212
union cpuid_0x80000022_ebx ebx;

arch/x86/events/perf_event.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,6 +1233,14 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
12331233
int amd_pmu_init(void);
12341234

12351235
int amd_pmu_lbr_init(void);
1236+
void amd_pmu_lbr_reset(void);
1237+
void amd_pmu_lbr_read(void);
1238+
void amd_pmu_lbr_add(struct perf_event *event);
1239+
void amd_pmu_lbr_del(struct perf_event *event);
1240+
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
1241+
void amd_pmu_lbr_enable_all(void);
1242+
void amd_pmu_lbr_disable_all(void);
1243+
int amd_pmu_lbr_hw_config(struct perf_event *event);
12361244

12371245
#ifdef CONFIG_PERF_EVENTS_AMD_BRS
12381246

arch/x86/include/asm/msr-index.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,9 @@
585585
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
586586
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
587587

588+
/* AMD Last Branch Record MSRs */
589+
#define MSR_AMD64_LBR_SELECT 0xc000010e
590+
588591
/* Fam 17h MSRs */
589592
#define MSR_F17H_IRPERF 0xc00000e9
590593

@@ -756,6 +759,8 @@
756759
#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
757760
#define MSR_AMD_SAMP_BR_FROM 0xc0010300
758761

762+
#define DBG_EXTN_CFG_LBRV2EN BIT_ULL(6)
763+
759764
#define MSR_IA32_MPERF 0x000000e7
760765
#define MSR_IA32_APERF 0x000000e8
761766

0 commit comments

Comments
 (0)