Skip to content

Commit 86a0446

Browse files
Andi KleenIngo Molnar
authored andcommitted
perf/x86: Revamp PEBS event selection
The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. We need to only list events that something special, like supporting load or store addresses. This vastly simplifies the PEBS event selection. It also speeds up the scheduling because the scheduler doesn't have to walk as many constraints. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - Correct DataLA and load/store/na flags reporting on Haswell [Requires a followon patch] - We did not allow all PEBS events on Haswell: We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) This includes the changes proposed by Stephane earlier and obsoletes his patchkit (except for some changes on pre Sandy Bridge/Silvermont CPUs) I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Also I do not believe the older CPUs have any missing events in their PEBS list, so there's no pressing need to change them. I did not implement the flag proposed by Peter to allow setting forbidden flags. If really needed this could be implemented on to of this patch. v2: Fix broken store events on SNB/IVB (Stephane Eranian) v3: More fixes. Rename some arguments (Stephane Eranian) v4: List most Haswell events individually again to report memory operation type correctly. Add new flags to describe load/store/na for datala. Update description. Signed-off-by: Andi Kleen <[email protected]> Reviewed-by: Stephane Eranian <[email protected]> Signed-off-by: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Kan Liang <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Maria Dimakopoulou <[email protected]> Cc: Mark Davies <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Yan, Zheng <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
1 parent 03de874 commit 86a0446

File tree

3 files changed

+85
-78
lines changed

3 files changed

+85
-78
lines changed

arch/x86/include/asm/perf_event.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@
5151
ARCH_PERFMON_EVENTSEL_EDGE | \
5252
ARCH_PERFMON_EVENTSEL_INV | \
5353
ARCH_PERFMON_EVENTSEL_CMASK)
54+
#define X86_ALL_EVENT_FLAGS \
55+
(ARCH_PERFMON_EVENTSEL_EDGE | \
56+
ARCH_PERFMON_EVENTSEL_INV | \
57+
ARCH_PERFMON_EVENTSEL_CMASK | \
58+
ARCH_PERFMON_EVENTSEL_ANY | \
59+
ARCH_PERFMON_EVENTSEL_PIN_CONTROL | \
60+
HSW_IN_TX | \
61+
HSW_IN_TX_CHECKPOINTED)
5462
#define AMD64_RAW_EVENT_MASK \
5563
(X86_RAW_EVENT_MASK | \
5664
AMD64_EVENTSEL_EVENT)

arch/x86/kernel/cpu/perf_event.h

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,10 @@ struct event_constraint {
6767
*/
6868
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
6969
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
70-
#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
70+
#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style datala, store */
7171
#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
72+
#define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */
73+
#define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */
7274

7375
struct amd_nb {
7476
int nb_id; /* NorthBridge id */
@@ -252,18 +254,52 @@ struct cpu_hw_events {
252254
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
253255

254256
#define INTEL_PLD_CONSTRAINT(c, n) \
255-
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
257+
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
256258
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
257259

258260
#define INTEL_PST_CONSTRAINT(c, n) \
259-
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
261+
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
260262
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
261263

262-
/* DataLA version of store sampling without extra enable bit. */
263-
#define INTEL_PST_HSW_CONSTRAINT(c, n) \
264-
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
264+
/* Event constraint, but match on all event flags too. */
265+
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
266+
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
267+
268+
/* Check only flags, but allow all event/umask */
269+
#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
270+
EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
271+
272+
/* Check flags and event code, and set the HSW store flag */
273+
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
274+
__EVENT_CONSTRAINT(code, n, \
275+
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
276+
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
277+
278+
/* Check flags and event code, and set the HSW load flag */
279+
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
280+
__EVENT_CONSTRAINT(code, n, \
281+
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
282+
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
283+
284+
/* Check flags and event code/umask, and set the HSW store flag */
285+
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
286+
__EVENT_CONSTRAINT(code, n, \
287+
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
265288
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
266289

290+
/* Check flags and event code/umask, and set the HSW load flag */
291+
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
292+
__EVENT_CONSTRAINT(code, n, \
293+
INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
294+
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
295+
296+
/* Check flags and event code/umask, and set the HSW N/A flag */
297+
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
298+
__EVENT_CONSTRAINT(code, n, \
299+
INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \
300+
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
301+
302+
267303
/*
268304
* We define the end marker as having a weight of -1
269305
* to enable blacklisting of events using a counter bitmask

arch/x86/kernel/cpu/perf_event_intel_ds.c

Lines changed: 35 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -569,28 +569,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
569569
};
570570

571571
struct event_constraint intel_slm_pebs_event_constraints[] = {
572-
INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
573-
INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
574-
INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
575-
INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
576-
INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
577-
INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
578-
INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
579-
INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
580-
INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
581-
INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
582-
INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
583-
INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
584-
INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
585-
INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
586-
INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
587-
INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
588-
INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
589-
INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
590-
INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
591-
INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
592-
INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
593-
INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
572+
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
573+
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
574+
/* Allow all events as PEBS with no flags */
575+
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
594576
EVENT_CONSTRAINT_END
595577
};
596578

@@ -626,68 +608,44 @@ struct event_constraint intel_westmere_pebs_event_constraints[] = {
626608

627609
struct event_constraint intel_snb_pebs_event_constraints[] = {
628610
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
629-
INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
630-
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
631-
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
632-
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
633611
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
634612
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
635-
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
636-
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
637-
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
638-
INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
639-
INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
613+
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
614+
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
615+
/* Allow all events as PEBS with no flags */
616+
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
640617
EVENT_CONSTRAINT_END
641618
};
642619

643620
struct event_constraint intel_ivb_pebs_event_constraints[] = {
644621
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
645-
INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
646-
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
647-
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
648-
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
649622
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
650623
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
651-
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
652-
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
653-
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
654-
INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
624+
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
625+
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
626+
/* Allow all events as PEBS with no flags */
627+
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
655628
EVENT_CONSTRAINT_END
656629
};
657630

658631
struct event_constraint intel_hsw_pebs_event_constraints[] = {
659632
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
660-
INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
661-
INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
662-
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
663-
INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
664-
INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
665-
INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
666-
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
667-
/* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
668-
INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
669-
/* MEM_UOPS_RETIRED.STLB_MISS_STORES */
670-
INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
671-
INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
672-
INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
673-
/* MEM_UOPS_RETIRED.SPLIT_STORES */
674-
INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
675-
INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
676-
INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
677-
INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
678-
INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
679-
INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
680-
/* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
681-
INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
682-
/* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
683-
INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
684-
/* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
685-
INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
686-
/* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
687-
INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
688-
INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
689-
INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
690-
633+
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
634+
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
635+
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
636+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
637+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
638+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
639+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
640+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
641+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
642+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
643+
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
644+
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
645+
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
646+
INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
647+
/* Allow all events as PEBS with no flags */
648+
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
691649
EVENT_CONSTRAINT_END
692650
};
693651

@@ -880,7 +838,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
880838

881839
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
882840
fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
883-
PERF_X86_EVENT_PEBS_ST_HSW);
841+
PERF_X86_EVENT_PEBS_ST_HSW |
842+
PERF_X86_EVENT_PEBS_LD_HSW |
843+
PERF_X86_EVENT_PEBS_NA_HSW);
884844

885845
perf_sample_data_init(&data, 0, event->hw.last_period);
886846

@@ -903,7 +863,10 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
903863
if (sample_type & PERF_SAMPLE_DATA_SRC) {
904864
if (fll)
905865
data.data_src.val = load_latency_data(pebs->dse);
906-
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
866+
else if (event->hw.flags &
867+
(PERF_X86_EVENT_PEBS_ST_HSW|
868+
PERF_X86_EVENT_PEBS_LD_HSW|
869+
PERF_X86_EVENT_PEBS_NA_HSW))
907870
data.data_src.val =
908871
precise_store_data_hsw(event, pebs->dse);
909872
else

0 commit comments

Comments
 (0)