Skip to content

Commit b381e63

Browse files
author
Ingo Molnar
committed
Merge branch 'perf/core' into perf/timer, before applying new changes
Signed-off-by: Ingo Molnar <[email protected]>
2 parents 4e6d7c2 + ccd41c8 commit b381e63

File tree

236 files changed

+7925
-2408
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

236 files changed

+7925
-2408
lines changed

arch/arm/kernel/hw_breakpoint.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
648648
* Per-cpu breakpoints are not supported by our stepping
649649
* mechanism.
650650
*/
651-
if (!bp->hw.bp_target)
651+
if (!bp->hw.target)
652652
return -EINVAL;
653653

654654
/*

arch/arm64/kernel/hw_breakpoint.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
527527
* Disallow per-task kernel breakpoints since these would
528528
* complicate the stepping code.
529529
*/
530-
if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target)
530+
if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
531531
return -EINVAL;
532532

533533
return 0;

arch/powerpc/perf/core-book3s.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
124124

125125
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
126126
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
127-
static void power_pmu_flush_branch_stack(void) {}
127+
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
128128
static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
129129
static void pmao_restore_workaround(bool ebb) { }
130130
#endif /* CONFIG_PPC32 */
@@ -350,6 +350,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
350350
cpuhw->bhrb_context = event->ctx;
351351
}
352352
cpuhw->bhrb_users++;
353+
perf_sched_cb_inc(event->ctx->pmu);
353354
}
354355

355356
static void power_pmu_bhrb_disable(struct perf_event *event)
@@ -361,6 +362,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
361362

362363
cpuhw->bhrb_users--;
363364
WARN_ON_ONCE(cpuhw->bhrb_users < 0);
365+
perf_sched_cb_dec(event->ctx->pmu);
364366

365367
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
366368
/* BHRB cannot be turned off when other
@@ -375,9 +377,12 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
375377
/* Called from ctxsw to prevent one process's branch entries to
376378
* mingle with the other process's entries during context switch.
377379
*/
378-
static void power_pmu_flush_branch_stack(void)
380+
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
379381
{
380-
if (ppmu->bhrb_nr)
382+
if (!ppmu->bhrb_nr)
383+
return;
384+
385+
if (sched_in)
381386
power_pmu_bhrb_reset();
382387
}
383388
/* Calculate the to address for a branch */
@@ -1901,7 +1906,7 @@ static struct pmu power_pmu = {
19011906
.cancel_txn = power_pmu_cancel_txn,
19021907
.commit_txn = power_pmu_commit_txn,
19031908
.event_idx = power_pmu_event_idx,
1904-
.flush_branch_stack = power_pmu_flush_branch_stack,
1909+
.sched_task = power_pmu_sched_task,
19051910
};
19061911

19071912
/*

arch/x86/include/asm/cpufeature.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include <asm/disabled-features.h>
1313
#endif
1414

15-
#define NCAPINTS 11 /* N 32-bit words worth of info */
15+
#define NCAPINTS 13 /* N 32-bit words worth of info */
1616
#define NBUGINTS 1 /* N 32-bit bug flags */
1717

1818
/*
@@ -226,6 +226,7 @@
226226
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
227227
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
228228
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
229+
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
229230
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
230231
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
231232
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
@@ -242,6 +243,12 @@
242243
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
243244
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
244245

246+
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
247+
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
248+
249+
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
250+
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
251+
245252
/*
246253
* BUG word(s)
247254
*/

arch/x86/include/asm/processor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ struct cpuinfo_x86 {
109109
/* in KB - valid for CPUS which support this call: */
110110
int x86_cache_size;
111111
int x86_cache_alignment; /* In bytes */
112+
/* Cache QoS architectural values: */
113+
int x86_cache_max_rmid; /* max index */
114+
int x86_cache_occ_scale; /* scale to bytes */
112115
int x86_power;
113116
unsigned long loops_per_jiffy;
114117
/* cpuid returned max cores value: */

arch/x86/kernel/cpu/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
3939
endif
4040
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
4141
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
42-
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o
42+
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
4343

4444
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
4545
perf_event_intel_uncore_snb.o \

arch/x86/kernel/cpu/common.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,30 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
646646
c->x86_capability[10] = eax;
647647
}
648648

649+
/* Additional Intel-defined flags: level 0x0000000F */
650+
if (c->cpuid_level >= 0x0000000F) {
651+
u32 eax, ebx, ecx, edx;
652+
653+
/* QoS sub-leaf, EAX=0Fh, ECX=0 */
654+
cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
655+
c->x86_capability[11] = edx;
656+
if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
657+
/* will be overridden if occupancy monitoring exists */
658+
c->x86_cache_max_rmid = ebx;
659+
660+
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
661+
cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
662+
c->x86_capability[12] = edx;
663+
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
664+
c->x86_cache_max_rmid = ecx;
665+
c->x86_cache_occ_scale = ebx;
666+
}
667+
} else {
668+
c->x86_cache_max_rmid = -1;
669+
c->x86_cache_occ_scale = -1;
670+
}
671+
}
672+
649673
/* AMD-defined flags: level 0x80000001 */
650674
xlvl = cpuid_eax(0x80000000);
651675
c->extended_cpuid_level = xlvl;
@@ -834,6 +858,20 @@ static void generic_identify(struct cpuinfo_x86 *c)
834858
detect_nopl(c);
835859
}
836860

861+
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
862+
{
863+
/*
864+
* The heavy lifting of max_rmid and cache_occ_scale are handled
865+
* in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
866+
* in case CQM bits really aren't there in this CPU.
867+
*/
868+
if (c != &boot_cpu_data) {
869+
boot_cpu_data.x86_cache_max_rmid =
870+
min(boot_cpu_data.x86_cache_max_rmid,
871+
c->x86_cache_max_rmid);
872+
}
873+
}
874+
837875
/*
838876
* This does the hard work of actually picking apart the CPU stuff...
839877
*/
@@ -923,6 +961,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
923961

924962
init_hypervisor(c);
925963
x86_init_rdrand(c);
964+
x86_init_cache_qos(c);
926965

927966
/*
928967
* Clear/Set all flags overriden by options, need do it

arch/x86/kernel/cpu/perf_event.c

Lines changed: 46 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -399,39 +399,41 @@ int x86_pmu_hw_config(struct perf_event *event)
399399

400400
if (event->attr.precise_ip > precise)
401401
return -EOPNOTSUPP;
402-
/*
403-
* check that PEBS LBR correction does not conflict with
404-
* whatever the user is asking with attr->branch_sample_type
405-
*/
406-
if (event->attr.precise_ip > 1 &&
407-
x86_pmu.intel_cap.pebs_format < 2) {
408-
u64 *br_type = &event->attr.branch_sample_type;
409-
410-
if (has_branch_stack(event)) {
411-
if (!precise_br_compat(event))
412-
return -EOPNOTSUPP;
413-
414-
/* branch_sample_type is compatible */
415-
416-
} else {
417-
/*
418-
* user did not specify branch_sample_type
419-
*
420-
* For PEBS fixups, we capture all
421-
* the branches at the priv level of the
422-
* event.
423-
*/
424-
*br_type = PERF_SAMPLE_BRANCH_ANY;
425-
426-
if (!event->attr.exclude_user)
427-
*br_type |= PERF_SAMPLE_BRANCH_USER;
428-
429-
if (!event->attr.exclude_kernel)
430-
*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
431-
}
402+
}
403+
/*
404+
* check that PEBS LBR correction does not conflict with
405+
* whatever the user is asking with attr->branch_sample_type
406+
*/
407+
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
408+
u64 *br_type = &event->attr.branch_sample_type;
409+
410+
if (has_branch_stack(event)) {
411+
if (!precise_br_compat(event))
412+
return -EOPNOTSUPP;
413+
414+
/* branch_sample_type is compatible */
415+
416+
} else {
417+
/*
418+
* user did not specify branch_sample_type
419+
*
420+
* For PEBS fixups, we capture all
421+
* the branches at the priv level of the
422+
* event.
423+
*/
424+
*br_type = PERF_SAMPLE_BRANCH_ANY;
425+
426+
if (!event->attr.exclude_user)
427+
*br_type |= PERF_SAMPLE_BRANCH_USER;
428+
429+
if (!event->attr.exclude_kernel)
430+
*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
432431
}
433432
}
434433

434+
if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
435+
event->attach_state |= PERF_ATTACH_TASK_DATA;
436+
435437
/*
436438
* Generate PMC IRQs:
437439
* (keep 'enabled' bit clear for now)
@@ -449,6 +451,12 @@ int x86_pmu_hw_config(struct perf_event *event)
449451
if (event->attr.type == PERF_TYPE_RAW)
450452
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
451453

454+
if (event->attr.sample_period && x86_pmu.limit_period) {
455+
if (x86_pmu.limit_period(event, event->attr.sample_period) >
456+
event->attr.sample_period)
457+
return -EINVAL;
458+
}
459+
452460
return x86_setup_perfctr(event);
453461
}
454462

@@ -986,6 +994,9 @@ int x86_perf_event_set_period(struct perf_event *event)
986994
if (left > x86_pmu.max_period)
987995
left = x86_pmu.max_period;
988996

997+
if (x86_pmu.limit_period)
998+
left = x86_pmu.limit_period(event, left);
999+
9891000
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
9901001

9911002
/*
@@ -1033,7 +1044,6 @@ static int x86_pmu_add(struct perf_event *event, int flags)
10331044

10341045
hwc = &event->hw;
10351046

1036-
perf_pmu_disable(event->pmu);
10371047
n0 = cpuc->n_events;
10381048
ret = n = collect_events(cpuc, event, false);
10391049
if (ret < 0)
@@ -1071,7 +1081,6 @@ static int x86_pmu_add(struct perf_event *event, int flags)
10711081

10721082
ret = 0;
10731083
out:
1074-
perf_pmu_enable(event->pmu);
10751084
return ret;
10761085
}
10771086

@@ -1914,10 +1923,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
19141923
NULL,
19151924
};
19161925

1917-
static void x86_pmu_flush_branch_stack(void)
1926+
static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
19181927
{
1919-
if (x86_pmu.flush_branch_stack)
1920-
x86_pmu.flush_branch_stack();
1928+
if (x86_pmu.sched_task)
1929+
x86_pmu.sched_task(ctx, sched_in);
19211930
}
19221931

19231932
void perf_check_microcode(void)
@@ -1949,7 +1958,8 @@ static struct pmu pmu = {
19491958
.commit_txn = x86_pmu_commit_txn,
19501959

19511960
.event_idx = x86_pmu_event_idx,
1952-
.flush_branch_stack = x86_pmu_flush_branch_stack,
1961+
.sched_task = x86_pmu_sched_task,
1962+
.task_ctx_size = sizeof(struct x86_perf_task_context),
19531963
};
19541964

19551965
void arch_perf_update_userpage(struct perf_event *event,

arch/x86/kernel/cpu/perf_event.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ struct x86_pmu {
451451
struct x86_pmu_quirk *quirks;
452452
int perfctr_second_write;
453453
bool late_ack;
454+
unsigned (*limit_period)(struct perf_event *event, unsigned l);
454455

455456
/*
456457
* sysfs attrs
@@ -472,7 +473,8 @@ struct x86_pmu {
472473
void (*cpu_dead)(int cpu);
473474

474475
void (*check_microcode)(void);
475-
void (*flush_branch_stack)(void);
476+
void (*sched_task)(struct perf_event_context *ctx,
477+
bool sched_in);
476478

477479
/*
478480
* Intel Arch Perfmon v2+
@@ -515,6 +517,13 @@ struct x86_pmu {
515517
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
516518
};
517519

520+
struct x86_perf_task_context {
521+
u64 lbr_from[MAX_LBR_ENTRIES];
522+
u64 lbr_to[MAX_LBR_ENTRIES];
523+
int lbr_callstack_users;
524+
int lbr_stack_state;
525+
};
526+
518527
#define x86_add_quirk(func_) \
519528
do { \
520529
static struct x86_pmu_quirk __quirk __initdata = { \
@@ -546,6 +555,12 @@ static struct perf_pmu_events_attr event_attr_##v = { \
546555

547556
extern struct x86_pmu x86_pmu __read_mostly;
548557

558+
static inline bool x86_pmu_has_lbr_callstack(void)
559+
{
560+
return x86_pmu.lbr_sel_map &&
561+
x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
562+
}
563+
549564
DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
550565

551566
int x86_perf_event_set_period(struct perf_event *event);
@@ -727,6 +742,8 @@ void intel_pmu_pebs_disable_all(void);
727742

728743
void intel_ds_init(void);
729744

745+
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
746+
730747
void intel_pmu_lbr_reset(void);
731748

732749
void intel_pmu_lbr_enable(struct perf_event *event);
@@ -747,6 +764,8 @@ void intel_pmu_lbr_init_atom(void);
747764

748765
void intel_pmu_lbr_init_snb(void);
749766

767+
void intel_pmu_lbr_init_hsw(void);
768+
750769
int intel_pmu_setup_lbr_filter(struct perf_event *event);
751770

752771
int p4_pmu_init(void);

0 commit comments

Comments
 (0)