Skip to content

Commit 63e6053

Browse files
committed
Merge tag 'perf-core-2022-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar: - Fix Intel Alder Lake PEBS memory access latency & data source profiling info bugs. - Use Intel large-PEBS hardware feature in more circumstances, to reduce PMI overhead & reduce sampling data. - Extend the lost-sample profiling output with the PERF_FORMAT_LOST ABI variant, which tells tooling the exact number of samples lost. - Add new IBS register bits definitions. - AMD uncore events: Add PerfMonV2 DF (Data Fabric) enhancements. * tag 'perf-core-2022-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/ibs: Add new IBS register bits into header perf/x86/intel: Fix PEBS data source encoding for ADL perf/x86/intel: Fix PEBS memory access info encoding for ADL perf/core: Add a new read format to get a number of lost samples perf/x86/amd/uncore: Add PerfMonV2 RDPMC assignments perf/x86/amd/uncore: Add PerfMonV2 DF event format perf/x86/amd/uncore: Detect available DF counters perf/x86/amd/uncore: Use attr_update for format attributes perf/x86/amd/uncore: Use dynamic events array x86/events/intel/ds: Enable large PEBS for PERF_SAMPLE_WEIGHT_TYPE
2 parents 22a39c3 + 326ecc1 commit 63e6053

File tree

10 files changed

+280
-84
lines changed

10 files changed

+280
-84
lines changed

arch/x86/events/amd/uncore.c

Lines changed: 120 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
#define NUM_COUNTERS_NB 4
2222
#define NUM_COUNTERS_L2 4
2323
#define NUM_COUNTERS_L3 6
24-
#define MAX_COUNTERS 6
2524

2625
#define RDPMC_BASE_NB 6
2726
#define RDPMC_BASE_LLC 10
@@ -31,6 +30,7 @@
3130
#undef pr_fmt
3231
#define pr_fmt(fmt) "amd_uncore: " fmt
3332

33+
static int pmu_version;
3434
static int num_counters_llc;
3535
static int num_counters_nb;
3636
static bool l3_mask;
@@ -46,7 +46,7 @@ struct amd_uncore {
4646
u32 msr_base;
4747
cpumask_t *active_mask;
4848
struct pmu *pmu;
49-
struct perf_event *events[MAX_COUNTERS];
49+
struct perf_event **events;
5050
struct hlist_node node;
5151
};
5252

@@ -158,6 +158,16 @@ static int amd_uncore_add(struct perf_event *event, int flags)
158158
hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
159159
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
160160

161+
/*
162+
* The first four DF counters are accessible via RDPMC index 6 to 9
163+
* followed by the L3 counters from index 10 to 15. For processors
164+
* with more than four DF counters, the DF RDPMC assignments become
165+
* discontiguous as the additional counters are accessible starting
166+
* from index 16.
167+
*/
168+
if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
169+
hwc->event_base_rdpmc += NUM_COUNTERS_L3;
170+
161171
if (flags & PERF_EF_START)
162172
amd_uncore_start(event, PERF_EF_RELOAD);
163173

@@ -209,10 +219,14 @@ static int amd_uncore_event_init(struct perf_event *event)
209219
{
210220
struct amd_uncore *uncore;
211221
struct hw_perf_event *hwc = &event->hw;
222+
u64 event_mask = AMD64_RAW_EVENT_MASK_NB;
212223

213224
if (event->attr.type != event->pmu->type)
214225
return -ENOENT;
215226

227+
if (pmu_version >= 2 && is_nb_event(event))
228+
event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;
229+
216230
/*
217231
* NB and Last level cache counters (MSRs) are shared across all cores
218232
* that share the same NB / Last level cache. On family 16h and below,
@@ -221,7 +235,7 @@ static int amd_uncore_event_init(struct perf_event *event)
221235
* out. So we do not support sampling and per-thread events via
222236
* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
223237
*/
224-
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
238+
hwc->config = event->attr.config & event_mask;
225239
hwc->idx = -1;
226240

227241
if (event->cpu < 0)
@@ -247,6 +261,19 @@ static int amd_uncore_event_init(struct perf_event *event)
247261
return 0;
248262
}
249263

264+
static umode_t
265+
amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
266+
{
267+
return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
268+
attr->mode : 0;
269+
}
270+
271+
static umode_t
272+
amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
273+
{
274+
return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
275+
}
276+
250277
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
251278
struct device_attribute *attr,
252279
char *buf)
@@ -287,8 +314,10 @@ static struct device_attribute format_attr_##_var = \
287314

288315
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
289316
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
317+
DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
290318
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
291-
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
319+
DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
320+
DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
292321
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
293322
DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
294323
DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
@@ -297,20 +326,33 @@ DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3
297326
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
298327
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
299328

329+
/* Common DF and NB attributes */
300330
static struct attribute *amd_uncore_df_format_attr[] = {
301-
&format_attr_event12.attr, /* event14 if F17h+ */
302-
&format_attr_umask.attr,
331+
&format_attr_event12.attr, /* event */
332+
&format_attr_umask8.attr, /* umask */
303333
NULL,
304334
};
305335

336+
/* Common L2 and L3 attributes */
306337
static struct attribute *amd_uncore_l3_format_attr[] = {
307-
&format_attr_event12.attr, /* event8 if F17h+ */
308-
&format_attr_umask.attr,
309-
NULL, /* slicemask if F17h, coreid if F19h */
310-
NULL, /* threadmask8 if F17h, enallslices if F19h */
311-
NULL, /* enallcores if F19h */
312-
NULL, /* sliceid if F19h */
313-
NULL, /* threadmask2 if F19h */
338+
&format_attr_event12.attr, /* event */
339+
&format_attr_umask8.attr, /* umask */
340+
NULL, /* threadmask */
341+
NULL,
342+
};
343+
344+
/* F17h unique L3 attributes */
345+
static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
346+
&format_attr_slicemask.attr, /* slicemask */
347+
NULL,
348+
};
349+
350+
/* F19h unique L3 attributes */
351+
static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
352+
&format_attr_coreid.attr, /* coreid */
353+
&format_attr_enallslices.attr, /* enallslices */
354+
&format_attr_enallcores.attr, /* enallcores */
355+
&format_attr_sliceid.attr, /* sliceid */
314356
NULL,
315357
};
316358

@@ -324,6 +366,18 @@ static struct attribute_group amd_uncore_l3_format_group = {
324366
.attrs = amd_uncore_l3_format_attr,
325367
};
326368

369+
static struct attribute_group amd_f17h_uncore_l3_format_group = {
370+
.name = "format",
371+
.attrs = amd_f17h_uncore_l3_format_attr,
372+
.is_visible = amd_f17h_uncore_is_visible,
373+
};
374+
375+
static struct attribute_group amd_f19h_uncore_l3_format_group = {
376+
.name = "format",
377+
.attrs = amd_f19h_uncore_l3_format_attr,
378+
.is_visible = amd_f19h_uncore_is_visible,
379+
};
380+
327381
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
328382
&amd_uncore_attr_group,
329383
&amd_uncore_df_format_group,
@@ -336,6 +390,12 @@ static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
336390
NULL,
337391
};
338392

393+
static const struct attribute_group *amd_uncore_l3_attr_update[] = {
394+
&amd_f17h_uncore_l3_format_group,
395+
&amd_f19h_uncore_l3_format_group,
396+
NULL,
397+
};
398+
339399
static struct pmu amd_nb_pmu = {
340400
.task_ctx_nr = perf_invalid_context,
341401
.attr_groups = amd_uncore_df_attr_groups,
@@ -353,6 +413,7 @@ static struct pmu amd_nb_pmu = {
353413
static struct pmu amd_llc_pmu = {
354414
.task_ctx_nr = perf_invalid_context,
355415
.attr_groups = amd_uncore_l3_attr_groups,
416+
.attr_update = amd_uncore_l3_attr_update,
356417
.name = "amd_l2",
357418
.event_init = amd_uncore_event_init,
358419
.add = amd_uncore_add,
@@ -370,11 +431,19 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
370431
cpu_to_node(cpu));
371432
}
372433

434+
static inline struct perf_event **
435+
amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
436+
{
437+
return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL,
438+
cpu_to_node(cpu));
439+
}
440+
373441
static int amd_uncore_cpu_up_prepare(unsigned int cpu)
374442
{
375-
struct amd_uncore *uncore_nb = NULL, *uncore_llc;
443+
struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL;
376444

377445
if (amd_uncore_nb) {
446+
*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
378447
uncore_nb = amd_uncore_alloc(cpu);
379448
if (!uncore_nb)
380449
goto fail;
@@ -384,11 +453,15 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
384453
uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
385454
uncore_nb->active_mask = &amd_nb_active_mask;
386455
uncore_nb->pmu = &amd_nb_pmu;
456+
uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu);
457+
if (!uncore_nb->events)
458+
goto fail;
387459
uncore_nb->id = -1;
388460
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
389461
}
390462

391463
if (amd_uncore_llc) {
464+
*per_cpu_ptr(amd_uncore_llc, cpu) = NULL;
392465
uncore_llc = amd_uncore_alloc(cpu);
393466
if (!uncore_llc)
394467
goto fail;
@@ -398,16 +471,26 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
398471
uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
399472
uncore_llc->active_mask = &amd_llc_active_mask;
400473
uncore_llc->pmu = &amd_llc_pmu;
474+
uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu);
475+
if (!uncore_llc->events)
476+
goto fail;
401477
uncore_llc->id = -1;
402478
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
403479
}
404480

405481
return 0;
406482

407483
fail:
408-
if (amd_uncore_nb)
409-
*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
410-
kfree(uncore_nb);
484+
if (uncore_nb) {
485+
kfree(uncore_nb->events);
486+
kfree(uncore_nb);
487+
}
488+
489+
if (uncore_llc) {
490+
kfree(uncore_llc->events);
491+
kfree(uncore_llc);
492+
}
493+
411494
return -ENOMEM;
412495
}
413496

@@ -540,8 +623,11 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
540623
if (cpu == uncore->cpu)
541624
cpumask_clear_cpu(cpu, uncore->active_mask);
542625

543-
if (!--uncore->refcnt)
626+
if (!--uncore->refcnt) {
627+
kfree(uncore->events);
544628
kfree(uncore);
629+
}
630+
545631
*per_cpu_ptr(uncores, cpu) = NULL;
546632
}
547633

@@ -560,6 +646,7 @@ static int __init amd_uncore_init(void)
560646
{
561647
struct attribute **df_attr = amd_uncore_df_format_attr;
562648
struct attribute **l3_attr = amd_uncore_l3_format_attr;
649+
union cpuid_0x80000022_ebx ebx;
563650
int ret = -ENODEV;
564651

565652
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
@@ -569,6 +656,9 @@ static int __init amd_uncore_init(void)
569656
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
570657
return -ENODEV;
571658

659+
if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
660+
pmu_version = 2;
661+
572662
num_counters_nb = NUM_COUNTERS_NB;
573663
num_counters_llc = NUM_COUNTERS_L2;
574664
if (boot_cpu_data.x86 >= 0x17) {
@@ -585,8 +675,12 @@ static int __init amd_uncore_init(void)
585675
}
586676

587677
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
588-
if (boot_cpu_data.x86 >= 0x17)
678+
if (pmu_version >= 2) {
679+
*df_attr++ = &format_attr_event14v2.attr;
680+
*df_attr++ = &format_attr_umask12.attr;
681+
} else if (boot_cpu_data.x86 >= 0x17) {
589682
*df_attr = &format_attr_event14.attr;
683+
}
590684

591685
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
592686
if (!amd_uncore_nb) {
@@ -597,6 +691,11 @@ static int __init amd_uncore_init(void)
597691
if (ret)
598692
goto fail_nb;
599693

694+
if (pmu_version >= 2) {
695+
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
696+
num_counters_nb = ebx.split.num_df_pmc;
697+
}
698+
600699
pr_info("%d %s %s counters detected\n", num_counters_nb,
601700
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
602701
amd_nb_pmu.name);
@@ -607,16 +706,11 @@ static int __init amd_uncore_init(void)
607706
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
608707
if (boot_cpu_data.x86 >= 0x19) {
609708
*l3_attr++ = &format_attr_event8.attr;
610-
*l3_attr++ = &format_attr_umask.attr;
611-
*l3_attr++ = &format_attr_coreid.attr;
612-
*l3_attr++ = &format_attr_enallslices.attr;
613-
*l3_attr++ = &format_attr_enallcores.attr;
614-
*l3_attr++ = &format_attr_sliceid.attr;
709+
*l3_attr++ = &format_attr_umask8.attr;
615710
*l3_attr++ = &format_attr_threadmask2.attr;
616711
} else if (boot_cpu_data.x86 >= 0x17) {
617712
*l3_attr++ = &format_attr_event8.attr;
618-
*l3_attr++ = &format_attr_umask.attr;
619-
*l3_attr++ = &format_attr_slicemask.attr;
713+
*l3_attr++ = &format_attr_umask8.attr;
620714
*l3_attr++ = &format_attr_threadmask8.attr;
621715
}
622716

arch/x86/events/intel/core.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4141,6 +4141,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
41414141
{
41424142
struct event_constraint *c;
41434143

4144+
c = intel_get_event_constraints(cpuc, idx, event);
4145+
41444146
/*
41454147
* :ppp means to do reduced skid PEBS,
41464148
* which is available on PMC0 and fixed counter 0.
@@ -4153,8 +4155,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
41534155
return &counter0_constraint;
41544156
}
41554157

4156-
c = intel_get_event_constraints(cpuc, idx, event);
4157-
41584158
return c;
41594159
}
41604160

@@ -6241,7 +6241,8 @@ __init int intel_pmu_init(void)
62416241
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
62426242
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
62436243
x86_pmu.lbr_pt_coexist = true;
6244-
intel_pmu_pebs_data_source_skl(false);
6244+
intel_pmu_pebs_data_source_adl();
6245+
x86_pmu.pebs_latency_data = adl_latency_data_small;
62456246
x86_pmu.num_topdown_events = 8;
62466247
x86_pmu.update_topdown_event = adl_update_topdown_event;
62476248
x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;

0 commit comments

Comments
 (0)