Skip to content

Commit dd0b06b

Browse files
kliang2Ingo Molnar
authored andcommitted
perf/x86/intel: Add Goldmont Plus CPU PMU support
Add perf core PMU support for Intel Goldmont Plus CPU cores: - The init code is based on Goldmont. - There is a new cache event list, based on the Goldmont cache event list. - All four general-purpose performance counters support PEBS. - The first general-purpose performance counter is for reduced skid PEBS mechanism. Using :ppp to indicate the event which want to do reduced skid PEBS. - Goldmont Plus has 4-wide pipeline for Topdown Signed-off-by: Kan Liang <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vince Weaver <[email protected]> Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 5c10b04 commit dd0b06b

File tree

3 files changed

+166
-0
lines changed

3 files changed

+166
-0
lines changed

arch/x86/events/intel/core.c

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,120 @@ static __initconst const u64 glm_hw_cache_extra_regs
17081708
},
17091709
};
17101710

1711+
static __initconst const u64 glp_hw_cache_event_ids
1712+
[PERF_COUNT_HW_CACHE_MAX]
1713+
[PERF_COUNT_HW_CACHE_OP_MAX]
1714+
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1715+
[C(L1D)] = {
1716+
[C(OP_READ)] = {
1717+
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
1718+
[C(RESULT_MISS)] = 0x0,
1719+
},
1720+
[C(OP_WRITE)] = {
1721+
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
1722+
[C(RESULT_MISS)] = 0x0,
1723+
},
1724+
[C(OP_PREFETCH)] = {
1725+
[C(RESULT_ACCESS)] = 0x0,
1726+
[C(RESULT_MISS)] = 0x0,
1727+
},
1728+
},
1729+
[C(L1I)] = {
1730+
[C(OP_READ)] = {
1731+
[C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
1732+
[C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
1733+
},
1734+
[C(OP_WRITE)] = {
1735+
[C(RESULT_ACCESS)] = -1,
1736+
[C(RESULT_MISS)] = -1,
1737+
},
1738+
[C(OP_PREFETCH)] = {
1739+
[C(RESULT_ACCESS)] = 0x0,
1740+
[C(RESULT_MISS)] = 0x0,
1741+
},
1742+
},
1743+
[C(LL)] = {
1744+
[C(OP_READ)] = {
1745+
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
1746+
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
1747+
},
1748+
[C(OP_WRITE)] = {
1749+
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
1750+
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
1751+
},
1752+
[C(OP_PREFETCH)] = {
1753+
[C(RESULT_ACCESS)] = 0x0,
1754+
[C(RESULT_MISS)] = 0x0,
1755+
},
1756+
},
1757+
[C(DTLB)] = {
1758+
[C(OP_READ)] = {
1759+
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
1760+
[C(RESULT_MISS)] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
1761+
},
1762+
[C(OP_WRITE)] = {
1763+
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
1764+
[C(RESULT_MISS)] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
1765+
},
1766+
[C(OP_PREFETCH)] = {
1767+
[C(RESULT_ACCESS)] = 0x0,
1768+
[C(RESULT_MISS)] = 0x0,
1769+
},
1770+
},
1771+
[C(ITLB)] = {
1772+
[C(OP_READ)] = {
1773+
[C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
1774+
[C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
1775+
},
1776+
[C(OP_WRITE)] = {
1777+
[C(RESULT_ACCESS)] = -1,
1778+
[C(RESULT_MISS)] = -1,
1779+
},
1780+
[C(OP_PREFETCH)] = {
1781+
[C(RESULT_ACCESS)] = -1,
1782+
[C(RESULT_MISS)] = -1,
1783+
},
1784+
},
1785+
[C(BPU)] = {
1786+
[C(OP_READ)] = {
1787+
[C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
1788+
[C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
1789+
},
1790+
[C(OP_WRITE)] = {
1791+
[C(RESULT_ACCESS)] = -1,
1792+
[C(RESULT_MISS)] = -1,
1793+
},
1794+
[C(OP_PREFETCH)] = {
1795+
[C(RESULT_ACCESS)] = -1,
1796+
[C(RESULT_MISS)] = -1,
1797+
},
1798+
},
1799+
};
1800+
1801+
static __initconst const u64 glp_hw_cache_extra_regs
1802+
[PERF_COUNT_HW_CACHE_MAX]
1803+
[PERF_COUNT_HW_CACHE_OP_MAX]
1804+
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1805+
[C(LL)] = {
1806+
[C(OP_READ)] = {
1807+
[C(RESULT_ACCESS)] = GLM_DEMAND_READ|
1808+
GLM_LLC_ACCESS,
1809+
[C(RESULT_MISS)] = GLM_DEMAND_READ|
1810+
GLM_LLC_MISS,
1811+
},
1812+
[C(OP_WRITE)] = {
1813+
[C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
1814+
GLM_LLC_ACCESS,
1815+
[C(RESULT_MISS)] = GLM_DEMAND_WRITE|
1816+
GLM_LLC_MISS,
1817+
},
1818+
[C(OP_PREFETCH)] = {
1819+
[C(RESULT_ACCESS)] = 0x0,
1820+
[C(RESULT_MISS)] = 0x0,
1821+
},
1822+
},
1823+
};
1824+
17111825
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
17121826
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
17131827
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -3016,6 +3130,9 @@ static int hsw_hw_config(struct perf_event *event)
30163130
return 0;
30173131
}
30183132

3133+
static struct event_constraint counter0_constraint =
3134+
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
3135+
30193136
static struct event_constraint counter2_constraint =
30203137
EVENT_CONSTRAINT(0, 0x4, 0);
30213138

@@ -3037,6 +3154,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
30373154
return c;
30383155
}
30393156

3157+
static struct event_constraint *
3158+
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3159+
struct perf_event *event)
3160+
{
3161+
struct event_constraint *c;
3162+
3163+
/* :ppp means to do reduced skid PEBS which is PMC0 only. */
3164+
if (event->attr.precise_ip == 3)
3165+
return &counter0_constraint;
3166+
3167+
c = intel_get_event_constraints(cpuc, idx, event);
3168+
3169+
return c;
3170+
}
3171+
30403172
/*
30413173
* Broadwell:
30423174
*
@@ -3838,6 +3970,32 @@ __init int intel_pmu_init(void)
38383970
pr_cont("Goldmont events, ");
38393971
break;
38403972

3973+
case INTEL_FAM6_ATOM_GEMINI_LAKE:
3974+
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
3975+
sizeof(hw_cache_event_ids));
3976+
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
3977+
sizeof(hw_cache_extra_regs));
3978+
3979+
intel_pmu_lbr_init_skl();
3980+
3981+
x86_pmu.event_constraints = intel_slm_event_constraints;
3982+
x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints;
3983+
x86_pmu.extra_regs = intel_glm_extra_regs;
3984+
/*
3985+
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
3986+
* for precise cycles.
3987+
*/
3988+
x86_pmu.pebs_aliases = NULL;
3989+
x86_pmu.pebs_prec_dist = true;
3990+
x86_pmu.lbr_pt_coexist = true;
3991+
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
3992+
x86_pmu.get_event_constraints = glp_get_event_constraints;
3993+
x86_pmu.cpu_events = glm_events_attrs;
3994+
/* Goldmont Plus has 4-wide pipeline */
3995+
event_attr_td_total_slots_scale_glm.event_str = "4";
3996+
pr_cont("Goldmont plus events, ");
3997+
break;
3998+
38413999
case INTEL_FAM6_WESTMERE:
38424000
case INTEL_FAM6_WESTMERE_EP:
38434001
case INTEL_FAM6_WESTMERE_EX:

arch/x86/events/intel/ds.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,12 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
651651
EVENT_CONSTRAINT_END
652652
};
653653

654+
struct event_constraint intel_glp_pebs_event_constraints[] = {
655+
/* Allow all events as PEBS with no flags */
656+
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
657+
EVENT_CONSTRAINT_END
658+
};
659+
654660
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
655661
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
656662
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */

arch/x86/events/perf_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,8 @@ extern struct event_constraint intel_slm_pebs_event_constraints[];
879879

880880
extern struct event_constraint intel_glm_pebs_event_constraints[];
881881

882+
extern struct event_constraint intel_glp_pebs_event_constraints[];
883+
882884
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
883885

884886
extern struct event_constraint intel_westmere_pebs_event_constraints[];

0 commit comments

Comments
 (0)