21
21
#define NUM_COUNTERS_NB 4
22
22
#define NUM_COUNTERS_L2 4
23
23
#define NUM_COUNTERS_L3 6
24
- #define MAX_COUNTERS 6
25
24
26
25
#define RDPMC_BASE_NB 6
27
26
#define RDPMC_BASE_LLC 10
31
30
#undef pr_fmt
32
31
#define pr_fmt (fmt ) "amd_uncore: " fmt
33
32
33
+ static int pmu_version ;
34
34
static int num_counters_llc ;
35
35
static int num_counters_nb ;
36
36
static bool l3_mask ;
@@ -46,7 +46,7 @@ struct amd_uncore {
46
46
u32 msr_base ;
47
47
cpumask_t * active_mask ;
48
48
struct pmu * pmu ;
49
- struct perf_event * events [ MAX_COUNTERS ] ;
49
+ struct perf_event * * events ;
50
50
struct hlist_node node ;
51
51
};
52
52
@@ -158,6 +158,16 @@ static int amd_uncore_add(struct perf_event *event, int flags)
158
158
hwc -> event_base_rdpmc = uncore -> rdpmc_base + hwc -> idx ;
159
159
hwc -> state = PERF_HES_UPTODATE | PERF_HES_STOPPED ;
160
160
161
+ /*
162
+ * The first four DF counters are accessible via RDPMC index 6 to 9
163
+ * followed by the L3 counters from index 10 to 15. For processors
164
+ * with more than four DF counters, the DF RDPMC assignments become
165
+ * discontiguous as the additional counters are accessible starting
166
+ * from index 16.
167
+ */
168
+ if (is_nb_event (event ) && hwc -> idx >= NUM_COUNTERS_NB )
169
+ hwc -> event_base_rdpmc += NUM_COUNTERS_L3 ;
170
+
161
171
if (flags & PERF_EF_START )
162
172
amd_uncore_start (event , PERF_EF_RELOAD );
163
173
@@ -209,10 +219,14 @@ static int amd_uncore_event_init(struct perf_event *event)
209
219
{
210
220
struct amd_uncore * uncore ;
211
221
struct hw_perf_event * hwc = & event -> hw ;
222
+ u64 event_mask = AMD64_RAW_EVENT_MASK_NB ;
212
223
213
224
if (event -> attr .type != event -> pmu -> type )
214
225
return - ENOENT ;
215
226
227
+ if (pmu_version >= 2 && is_nb_event (event ))
228
+ event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB ;
229
+
216
230
/*
217
231
* NB and Last level cache counters (MSRs) are shared across all cores
218
232
* that share the same NB / Last level cache. On family 16h and below,
@@ -221,7 +235,7 @@ static int amd_uncore_event_init(struct perf_event *event)
221
235
* out. So we do not support sampling and per-thread events via
222
236
* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
223
237
*/
224
- hwc -> config = event -> attr .config & AMD64_RAW_EVENT_MASK_NB ;
238
+ hwc -> config = event -> attr .config & event_mask ;
225
239
hwc -> idx = -1 ;
226
240
227
241
if (event -> cpu < 0 )
@@ -247,6 +261,19 @@ static int amd_uncore_event_init(struct perf_event *event)
247
261
return 0 ;
248
262
}
249
263
264
+ static umode_t
265
+ amd_f17h_uncore_is_visible (struct kobject * kobj , struct attribute * attr , int i )
266
+ {
267
+ return boot_cpu_data .x86 >= 0x17 && boot_cpu_data .x86 < 0x19 ?
268
+ attr -> mode : 0 ;
269
+ }
270
+
271
+ static umode_t
272
+ amd_f19h_uncore_is_visible (struct kobject * kobj , struct attribute * attr , int i )
273
+ {
274
+ return boot_cpu_data .x86 >= 0x19 ? attr -> mode : 0 ;
275
+ }
276
+
250
277
static ssize_t amd_uncore_attr_show_cpumask (struct device * dev ,
251
278
struct device_attribute * attr ,
252
279
char * buf )
@@ -287,8 +314,10 @@ static struct device_attribute format_attr_##_var = \
287
314
288
315
DEFINE_UNCORE_FORMAT_ATTR (event12 , event , "config:0-7,32-35" );
289
316
DEFINE_UNCORE_FORMAT_ATTR (event14 , event , "config:0-7,32-35,59-60" ); /* F17h+ DF */
317
+ DEFINE_UNCORE_FORMAT_ATTR (event14v2 , event , "config:0-7,32-37" ); /* PerfMonV2 DF */
290
318
DEFINE_UNCORE_FORMAT_ATTR (event8 , event , "config:0-7" ); /* F17h+ L3 */
291
- DEFINE_UNCORE_FORMAT_ATTR (umask , umask , "config:8-15" );
319
+ DEFINE_UNCORE_FORMAT_ATTR (umask8 , umask , "config:8-15" );
320
+ DEFINE_UNCORE_FORMAT_ATTR (umask12 , umask , "config:8-15,24-27" ); /* PerfMonV2 DF */
292
321
DEFINE_UNCORE_FORMAT_ATTR (coreid , coreid , "config:42-44" ); /* F19h L3 */
293
322
DEFINE_UNCORE_FORMAT_ATTR (slicemask , slicemask , "config:48-51" ); /* F17h L3 */
294
323
DEFINE_UNCORE_FORMAT_ATTR (threadmask8 , threadmask , "config:56-63" ); /* F17h L3 */
@@ -297,20 +326,33 @@ DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3
297
326
DEFINE_UNCORE_FORMAT_ATTR (enallcores , enallcores , "config:47" ); /* F19h L3 */
298
327
DEFINE_UNCORE_FORMAT_ATTR (sliceid , sliceid , "config:48-50" ); /* F19h L3 */
299
328
329
+ /* Common DF and NB attributes */
300
330
static struct attribute * amd_uncore_df_format_attr [] = {
301
- & format_attr_event12 .attr , /* event14 if F17h+ */
302
- & format_attr_umask .attr ,
331
+ & format_attr_event12 .attr , /* event */
332
+ & format_attr_umask8 .attr , /* umask */
303
333
NULL ,
304
334
};
305
335
336
+ /* Common L2 and L3 attributes */
306
337
static struct attribute * amd_uncore_l3_format_attr [] = {
307
- & format_attr_event12 .attr , /* event8 if F17h+ */
308
- & format_attr_umask .attr ,
309
- NULL , /* slicemask if F17h, coreid if F19h */
310
- NULL , /* threadmask8 if F17h, enallslices if F19h */
311
- NULL , /* enallcores if F19h */
312
- NULL , /* sliceid if F19h */
313
- NULL , /* threadmask2 if F19h */
338
+ & format_attr_event12 .attr , /* event */
339
+ & format_attr_umask8 .attr , /* umask */
340
+ NULL , /* threadmask */
341
+ NULL ,
342
+ };
343
+
344
+ /* F17h unique L3 attributes */
345
+ static struct attribute * amd_f17h_uncore_l3_format_attr [] = {
346
+ & format_attr_slicemask .attr , /* slicemask */
347
+ NULL ,
348
+ };
349
+
350
+ /* F19h unique L3 attributes */
351
+ static struct attribute * amd_f19h_uncore_l3_format_attr [] = {
352
+ & format_attr_coreid .attr , /* coreid */
353
+ & format_attr_enallslices .attr , /* enallslices */
354
+ & format_attr_enallcores .attr , /* enallcores */
355
+ & format_attr_sliceid .attr , /* sliceid */
314
356
NULL ,
315
357
};
316
358
@@ -324,6 +366,18 @@ static struct attribute_group amd_uncore_l3_format_group = {
324
366
.attrs = amd_uncore_l3_format_attr ,
325
367
};
326
368
369
+ static struct attribute_group amd_f17h_uncore_l3_format_group = {
370
+ .name = "format" ,
371
+ .attrs = amd_f17h_uncore_l3_format_attr ,
372
+ .is_visible = amd_f17h_uncore_is_visible ,
373
+ };
374
+
375
+ static struct attribute_group amd_f19h_uncore_l3_format_group = {
376
+ .name = "format" ,
377
+ .attrs = amd_f19h_uncore_l3_format_attr ,
378
+ .is_visible = amd_f19h_uncore_is_visible ,
379
+ };
380
+
327
381
static const struct attribute_group * amd_uncore_df_attr_groups [] = {
328
382
& amd_uncore_attr_group ,
329
383
& amd_uncore_df_format_group ,
@@ -336,6 +390,12 @@ static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
336
390
NULL ,
337
391
};
338
392
393
+ static const struct attribute_group * amd_uncore_l3_attr_update [] = {
394
+ & amd_f17h_uncore_l3_format_group ,
395
+ & amd_f19h_uncore_l3_format_group ,
396
+ NULL ,
397
+ };
398
+
339
399
static struct pmu amd_nb_pmu = {
340
400
.task_ctx_nr = perf_invalid_context ,
341
401
.attr_groups = amd_uncore_df_attr_groups ,
@@ -353,6 +413,7 @@ static struct pmu amd_nb_pmu = {
353
413
static struct pmu amd_llc_pmu = {
354
414
.task_ctx_nr = perf_invalid_context ,
355
415
.attr_groups = amd_uncore_l3_attr_groups ,
416
+ .attr_update = amd_uncore_l3_attr_update ,
356
417
.name = "amd_l2" ,
357
418
.event_init = amd_uncore_event_init ,
358
419
.add = amd_uncore_add ,
@@ -370,11 +431,19 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
370
431
cpu_to_node (cpu ));
371
432
}
372
433
434
+ static inline struct perf_event * *
435
+ amd_uncore_events_alloc (unsigned int num , unsigned int cpu )
436
+ {
437
+ return kzalloc_node (sizeof (struct perf_event * ) * num , GFP_KERNEL ,
438
+ cpu_to_node (cpu ));
439
+ }
440
+
373
441
static int amd_uncore_cpu_up_prepare (unsigned int cpu )
374
442
{
375
- struct amd_uncore * uncore_nb = NULL , * uncore_llc ;
443
+ struct amd_uncore * uncore_nb = NULL , * uncore_llc = NULL ;
376
444
377
445
if (amd_uncore_nb ) {
446
+ * per_cpu_ptr (amd_uncore_nb , cpu ) = NULL ;
378
447
uncore_nb = amd_uncore_alloc (cpu );
379
448
if (!uncore_nb )
380
449
goto fail ;
@@ -384,11 +453,15 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
384
453
uncore_nb -> msr_base = MSR_F15H_NB_PERF_CTL ;
385
454
uncore_nb -> active_mask = & amd_nb_active_mask ;
386
455
uncore_nb -> pmu = & amd_nb_pmu ;
456
+ uncore_nb -> events = amd_uncore_events_alloc (num_counters_nb , cpu );
457
+ if (!uncore_nb -> events )
458
+ goto fail ;
387
459
uncore_nb -> id = -1 ;
388
460
* per_cpu_ptr (amd_uncore_nb , cpu ) = uncore_nb ;
389
461
}
390
462
391
463
if (amd_uncore_llc ) {
464
+ * per_cpu_ptr (amd_uncore_llc , cpu ) = NULL ;
392
465
uncore_llc = amd_uncore_alloc (cpu );
393
466
if (!uncore_llc )
394
467
goto fail ;
@@ -398,16 +471,26 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
398
471
uncore_llc -> msr_base = MSR_F16H_L2I_PERF_CTL ;
399
472
uncore_llc -> active_mask = & amd_llc_active_mask ;
400
473
uncore_llc -> pmu = & amd_llc_pmu ;
474
+ uncore_llc -> events = amd_uncore_events_alloc (num_counters_llc , cpu );
475
+ if (!uncore_llc -> events )
476
+ goto fail ;
401
477
uncore_llc -> id = -1 ;
402
478
* per_cpu_ptr (amd_uncore_llc , cpu ) = uncore_llc ;
403
479
}
404
480
405
481
return 0 ;
406
482
407
483
fail :
408
- if (amd_uncore_nb )
409
- * per_cpu_ptr (amd_uncore_nb , cpu ) = NULL ;
410
- kfree (uncore_nb );
484
+ if (uncore_nb ) {
485
+ kfree (uncore_nb -> events );
486
+ kfree (uncore_nb );
487
+ }
488
+
489
+ if (uncore_llc ) {
490
+ kfree (uncore_llc -> events );
491
+ kfree (uncore_llc );
492
+ }
493
+
411
494
return - ENOMEM ;
412
495
}
413
496
@@ -540,8 +623,11 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
540
623
if (cpu == uncore -> cpu )
541
624
cpumask_clear_cpu (cpu , uncore -> active_mask );
542
625
543
- if (!-- uncore -> refcnt )
626
+ if (!-- uncore -> refcnt ) {
627
+ kfree (uncore -> events );
544
628
kfree (uncore );
629
+ }
630
+
545
631
* per_cpu_ptr (uncores , cpu ) = NULL ;
546
632
}
547
633
@@ -560,6 +646,7 @@ static int __init amd_uncore_init(void)
560
646
{
561
647
struct attribute * * df_attr = amd_uncore_df_format_attr ;
562
648
struct attribute * * l3_attr = amd_uncore_l3_format_attr ;
649
+ union cpuid_0x80000022_ebx ebx ;
563
650
int ret = - ENODEV ;
564
651
565
652
if (boot_cpu_data .x86_vendor != X86_VENDOR_AMD &&
@@ -569,6 +656,9 @@ static int __init amd_uncore_init(void)
569
656
if (!boot_cpu_has (X86_FEATURE_TOPOEXT ))
570
657
return - ENODEV ;
571
658
659
+ if (boot_cpu_has (X86_FEATURE_PERFMON_V2 ))
660
+ pmu_version = 2 ;
661
+
572
662
num_counters_nb = NUM_COUNTERS_NB ;
573
663
num_counters_llc = NUM_COUNTERS_L2 ;
574
664
if (boot_cpu_data .x86 >= 0x17 ) {
@@ -585,8 +675,12 @@ static int __init amd_uncore_init(void)
585
675
}
586
676
587
677
if (boot_cpu_has (X86_FEATURE_PERFCTR_NB )) {
588
- if (boot_cpu_data .x86 >= 0x17 )
678
+ if (pmu_version >= 2 ) {
679
+ * df_attr ++ = & format_attr_event14v2 .attr ;
680
+ * df_attr ++ = & format_attr_umask12 .attr ;
681
+ } else if (boot_cpu_data .x86 >= 0x17 ) {
589
682
* df_attr = & format_attr_event14 .attr ;
683
+ }
590
684
591
685
amd_uncore_nb = alloc_percpu (struct amd_uncore * );
592
686
if (!amd_uncore_nb ) {
@@ -597,6 +691,11 @@ static int __init amd_uncore_init(void)
597
691
if (ret )
598
692
goto fail_nb ;
599
693
694
+ if (pmu_version >= 2 ) {
695
+ ebx .full = cpuid_ebx (EXT_PERFMON_DEBUG_FEATURES );
696
+ num_counters_nb = ebx .split .num_df_pmc ;
697
+ }
698
+
600
699
pr_info ("%d %s %s counters detected\n" , num_counters_nb ,
601
700
boot_cpu_data .x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "" ,
602
701
amd_nb_pmu .name );
@@ -607,16 +706,11 @@ static int __init amd_uncore_init(void)
607
706
if (boot_cpu_has (X86_FEATURE_PERFCTR_LLC )) {
608
707
if (boot_cpu_data .x86 >= 0x19 ) {
609
708
* l3_attr ++ = & format_attr_event8 .attr ;
610
- * l3_attr ++ = & format_attr_umask .attr ;
611
- * l3_attr ++ = & format_attr_coreid .attr ;
612
- * l3_attr ++ = & format_attr_enallslices .attr ;
613
- * l3_attr ++ = & format_attr_enallcores .attr ;
614
- * l3_attr ++ = & format_attr_sliceid .attr ;
709
+ * l3_attr ++ = & format_attr_umask8 .attr ;
615
710
* l3_attr ++ = & format_attr_threadmask2 .attr ;
616
711
} else if (boot_cpu_data .x86 >= 0x17 ) {
617
712
* l3_attr ++ = & format_attr_event8 .attr ;
618
- * l3_attr ++ = & format_attr_umask .attr ;
619
- * l3_attr ++ = & format_attr_slicemask .attr ;
713
+ * l3_attr ++ = & format_attr_umask8 .attr ;
620
714
* l3_attr ++ = & format_attr_threadmask8 .attr ;
621
715
}
622
716
0 commit comments