Skip to content

Commit 1ac7fd8

Browse files
author
Peter Zijlstra (Intel)
committed
perf/x86/intel/lbr: Support LBR format V7
The Goldmont plus and Tremont have LBR format V7. The V7 has LBR_INFO, which is the same as LBR format V5. But V7 doesn't support TSX. Without the patch, the associated misprediction and cycles information in the LBR_INFO may be lost on a Goldmont plus platform. For Tremont, the patch only impacts the non-PEBS events. Because of the adaptive PEBS, the LBR_INFO is always processed for a PEBS event. Currently, two different ways are used to check the LBR capabilities, which make the codes complex and confusing. For the LBR format V4 and earlier, the global static lbr_desc array is used to store the flags for the LBR capabilities in each LBR format. For LBR format V5 and V6, the current code checks the version number for the LBR capabilities. There are common LBR capabilities among LBR format versions. Several flags for the LBR capabilities are introduced into the struct x86_pmu. The flags, which can be shared among LBR formats, are used to check the LBR capabilities. Add intel_pmu_lbr_init() to set the flags accordingly at boot time. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: Kan Liang <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 0036fb0 commit 1ac7fd8

File tree

3 files changed

+75
-51
lines changed

3 files changed

+75
-51
lines changed

arch/x86/events/intel/core.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6353,6 +6353,8 @@ __init int intel_pmu_init(void)
63536353
}
63546354

63556355
if (x86_pmu.lbr_nr) {
6356+
intel_pmu_lbr_init();
6357+
63566358
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
63576359

63586360
/* only support branch_stack snapshot for perfmon >= v2 */

arch/x86/events/intel/lbr.c

Lines changed: 64 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,6 @@
88

99
#include "../perf_event.h"
1010

11-
static const enum {
12-
LBR_EIP_FLAGS = 1,
13-
LBR_TSX = 2,
14-
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
15-
[LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
16-
[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
17-
};
18-
1911
/*
2012
* Intel LBR_SELECT bits
2113
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
243235
for (i = 0; i < x86_pmu.lbr_nr; i++) {
244236
wrmsrl(x86_pmu.lbr_from + i, 0);
245237
wrmsrl(x86_pmu.lbr_to + i, 0);
246-
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
238+
if (x86_pmu.lbr_has_info)
247239
wrmsrl(x86_pmu.lbr_info + i, 0);
248240
}
249241
}
@@ -305,11 +297,10 @@ enum {
305297
*/
306298
static inline bool lbr_from_signext_quirk_needed(void)
307299
{
308-
int lbr_format = x86_pmu.intel_cap.lbr_format;
309300
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
310301
boot_cpu_has(X86_FEATURE_RTM);
311302

312-
return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
303+
return !tsx_support && x86_pmu.lbr_has_tsx;
313304
}
314305

315306
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
427418

428419
void intel_pmu_lbr_restore(void *ctx)
429420
{
430-
bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
431421
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
432422
struct x86_perf_task_context *task_ctx = ctx;
433-
int i;
434-
unsigned lbr_idx, mask;
423+
bool need_info = x86_pmu.lbr_has_info;
435424
u64 tos = task_ctx->tos;
425+
unsigned lbr_idx, mask;
426+
int i;
436427

437428
mask = x86_pmu.lbr_nr - 1;
438429
for (i = 0; i < task_ctx->valid_lbrs; i++) {
@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
444435
lbr_idx = (tos - i) & mask;
445436
wrlbr_from(lbr_idx, 0);
446437
wrlbr_to(lbr_idx, 0);
447-
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
438+
if (need_info)
448439
wrlbr_info(lbr_idx, 0);
449440
}
450441

@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
519510

520511
void intel_pmu_lbr_save(void *ctx)
521512
{
522-
bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
523513
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
524514
struct x86_perf_task_context *task_ctx = ctx;
515+
bool need_info = x86_pmu.lbr_has_info;
525516
unsigned lbr_idx, mask;
526517
u64 tos;
527518
int i;
@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
816807
{
817808
bool need_info = false, call_stack = false;
818809
unsigned long mask = x86_pmu.lbr_nr - 1;
819-
int lbr_format = x86_pmu.intel_cap.lbr_format;
820810
u64 tos = intel_pmu_lbr_tos();
821811
int i;
822812
int out = 0;
@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
831821
for (i = 0; i < num; i++) {
832822
unsigned long lbr_idx = (tos - i) & mask;
833823
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
834-
int skip = 0;
835824
u16 cycles = 0;
836-
int lbr_flags = lbr_desc[lbr_format];
837825

838826
from = rdlbr_from(lbr_idx, NULL);
839827
to = rdlbr_to(lbr_idx, NULL);
@@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
845833
if (call_stack && !from)
846834
break;
847835

848-
if (lbr_format == LBR_FORMAT_INFO && need_info) {
849-
u64 info;
850-
851-
info = rdlbr_info(lbr_idx, NULL);
852-
mis = !!(info & LBR_INFO_MISPRED);
853-
pred = !mis;
854-
in_tx = !!(info & LBR_INFO_IN_TX);
855-
abort = !!(info & LBR_INFO_ABORT);
856-
cycles = (info & LBR_INFO_CYCLES);
857-
}
858-
859-
if (lbr_format == LBR_FORMAT_TIME) {
860-
mis = !!(from & LBR_FROM_FLAG_MISPRED);
861-
pred = !mis;
862-
skip = 1;
863-
cycles = ((to >> 48) & LBR_INFO_CYCLES);
864-
865-
to = (u64)((((s64)to) << 16) >> 16);
866-
}
867-
868-
if (lbr_flags & LBR_EIP_FLAGS) {
869-
mis = !!(from & LBR_FROM_FLAG_MISPRED);
870-
pred = !mis;
871-
skip = 1;
872-
}
873-
if (lbr_flags & LBR_TSX) {
874-
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
875-
abort = !!(from & LBR_FROM_FLAG_ABORT);
876-
skip = 3;
836+
if (x86_pmu.lbr_has_info) {
837+
if (need_info) {
838+
u64 info;
839+
840+
info = rdlbr_info(lbr_idx, NULL);
841+
mis = !!(info & LBR_INFO_MISPRED);
842+
pred = !mis;
843+
cycles = (info & LBR_INFO_CYCLES);
844+
if (x86_pmu.lbr_has_tsx) {
845+
in_tx = !!(info & LBR_INFO_IN_TX);
846+
abort = !!(info & LBR_INFO_ABORT);
847+
}
848+
}
849+
} else {
850+
int skip = 0;
851+
852+
if (x86_pmu.lbr_from_flags) {
853+
mis = !!(from & LBR_FROM_FLAG_MISPRED);
854+
pred = !mis;
855+
skip = 1;
856+
}
857+
if (x86_pmu.lbr_has_tsx) {
858+
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
859+
abort = !!(from & LBR_FROM_FLAG_ABORT);
860+
skip = 3;
861+
}
862+
from = (u64)((((s64)from) << skip) >> skip);
863+
864+
if (x86_pmu.lbr_to_cycles) {
865+
cycles = ((to >> 48) & LBR_INFO_CYCLES);
866+
to = (u64)((((s64)to) << 16) >> 16);
867+
}
877868
}
878-
from = (u64)((((s64)from) << skip) >> skip);
879869

880870
/*
881871
* Some CPUs report duplicated abort records,
@@ -1120,7 +1110,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
11201110

11211111
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
11221112
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
1123-
(x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
1113+
x86_pmu.lbr_has_info)
11241114
reg->config |= LBR_NO_INFO;
11251115

11261116
return 0;
@@ -1706,6 +1696,30 @@ void intel_pmu_lbr_init_knl(void)
17061696
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
17071697
}
17081698

1699+
void intel_pmu_lbr_init(void)
1700+
{
1701+
switch (x86_pmu.intel_cap.lbr_format) {
1702+
case LBR_FORMAT_EIP_FLAGS2:
1703+
x86_pmu.lbr_has_tsx = 1;
1704+
fallthrough;
1705+
case LBR_FORMAT_EIP_FLAGS:
1706+
x86_pmu.lbr_from_flags = 1;
1707+
break;
1708+
1709+
case LBR_FORMAT_INFO:
1710+
x86_pmu.lbr_has_tsx = 1;
1711+
fallthrough;
1712+
case LBR_FORMAT_INFO2:
1713+
x86_pmu.lbr_has_info = 1;
1714+
break;
1715+
1716+
case LBR_FORMAT_TIME:
1717+
x86_pmu.lbr_from_flags = 1;
1718+
x86_pmu.lbr_to_cycles = 1;
1719+
break;
1720+
}
1721+
}
1722+
17091723
/*
17101724
* LBR state size is variable based on the max number of registers.
17111725
* This calculates the expected state size, which should match

arch/x86/events/perf_event.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ enum {
215215
LBR_FORMAT_EIP_FLAGS2 = 0x04,
216216
LBR_FORMAT_INFO = 0x05,
217217
LBR_FORMAT_TIME = 0x06,
218-
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
218+
LBR_FORMAT_INFO2 = 0x07,
219+
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2,
219220
};
220221

221222
enum {
@@ -840,6 +841,11 @@ struct x86_pmu {
840841
bool lbr_double_abort; /* duplicated lbr aborts */
841842
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
842843

844+
unsigned int lbr_has_info:1;
845+
unsigned int lbr_has_tsx:1;
846+
unsigned int lbr_from_flags:1;
847+
unsigned int lbr_to_cycles:1;
848+
843849
/*
844850
* Intel Architectural LBR CPUID Enumeration
845851
*/
@@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void);
13921398

13931399
void intel_pmu_lbr_init_knl(void);
13941400

1401+
void intel_pmu_lbr_init(void);
1402+
13951403
void intel_pmu_arch_lbr_init(void);
13961404

13971405
void intel_pmu_pebs_data_source_nhm(void);

0 commit comments

Comments
 (0)