Skip to content

Commit 09e61b4

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
perf/x86/intel: Rework the large PEBS setup code
In order to allow optimizing perf_pmu_sched_task() we must ensure perf_sched_cb_{inc,dec}() are no longer called from NMI context; this means that pmu::{start,stop}() can no longer use them. Prepare for this by reworking the whole large PEBS setup code. The current code relied on the cpuc->pebs_enabled state, however since that reflects the current active state as per pmu::{start,stop}() we can no longer rely on this. Introduce two counters: cpuc->n_pebs and cpuc->n_large_pebs which count the total number of PEBS events and the number of PEBS events that have FREERUNNING set, resp.. With this we can tell if the current setup requires a single record interrupt threshold or can use a larger buffer. This also improves the code in that it re-enables the large threshold once the PEBS event that required single record gets removed. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vince Weaver <[email protected]> Cc: [email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 3f005e7 commit 09e61b4

File tree

3 files changed

+73
-35
lines changed

3 files changed

+73
-35
lines changed

arch/x86/events/intel/ds.c

Lines changed: 67 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -806,22 +806,67 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
806806
return &emptyconstraint;
807807
}
808808

809-
static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
809+
/*
810+
* We need the sched_task callback even for per-cpu events when we use
811+
* the large interrupt threshold, such that we can provide PID and TID
812+
* to PEBS samples.
813+
*/
814+
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
815+
{
816+
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
817+
}
818+
819+
static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
820+
{
821+
struct debug_store *ds = cpuc->ds;
822+
u64 threshold;
823+
824+
if (cpuc->n_pebs == cpuc->n_large_pebs) {
825+
threshold = ds->pebs_absolute_maximum -
826+
x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
827+
} else {
828+
threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
829+
}
830+
831+
ds->pebs_interrupt_threshold = threshold;
832+
}
833+
834+
static void
835+
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
836+
{
837+
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
838+
if (!needed_cb)
839+
perf_sched_cb_inc(pmu);
840+
else
841+
perf_sched_cb_dec(pmu);
842+
843+
pebs_update_threshold(cpuc);
844+
}
845+
}
846+
847+
static void intel_pmu_pebs_add(struct perf_event *event)
810848
{
811-
return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
849+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
850+
struct hw_perf_event *hwc = &event->hw;
851+
bool needed_cb = pebs_needs_sched_cb(cpuc);
852+
853+
cpuc->n_pebs++;
854+
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
855+
cpuc->n_large_pebs++;
856+
857+
pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
812858
}
813859

814860
void intel_pmu_pebs_enable(struct perf_event *event)
815861
{
816862
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
817863
struct hw_perf_event *hwc = &event->hw;
818864
struct debug_store *ds = cpuc->ds;
819-
bool first_pebs;
820-
u64 threshold;
865+
866+
intel_pmu_pebs_add(event);
821867

822868
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
823869

824-
first_pebs = !pebs_is_enabled(cpuc);
825870
cpuc->pebs_enabled |= 1ULL << hwc->idx;
826871

827872
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -830,46 +875,34 @@ void intel_pmu_pebs_enable(struct perf_event *event)
830875
cpuc->pebs_enabled |= 1ULL << 63;
831876

832877
/*
833-
* When the event is constrained enough we can use a larger
834-
* threshold and run the event with less frequent PMI.
878+
* Use auto-reload if possible to save a MSR write in the PMI.
879+
* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
835880
*/
836-
if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
837-
threshold = ds->pebs_absolute_maximum -
838-
x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
839-
840-
if (first_pebs)
841-
perf_sched_cb_inc(event->ctx->pmu);
842-
} else {
843-
threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
844-
845-
/*
846-
* If not all events can use larger buffer,
847-
* roll back to threshold = 1
848-
*/
849-
if (!first_pebs &&
850-
(ds->pebs_interrupt_threshold > threshold))
851-
perf_sched_cb_dec(event->ctx->pmu);
852-
}
853-
854-
/* Use auto-reload if possible to save a MSR write in the PMI */
855881
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
856882
ds->pebs_event_reset[hwc->idx] =
857883
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
858884
}
885+
}
886+
887+
static void intel_pmu_pebs_del(struct perf_event *event)
888+
{
889+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
890+
struct hw_perf_event *hwc = &event->hw;
891+
bool needed_cb = pebs_needs_sched_cb(cpuc);
892+
893+
cpuc->n_pebs--;
894+
if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
895+
cpuc->n_large_pebs--;
859896

860-
if (first_pebs || ds->pebs_interrupt_threshold > threshold)
861-
ds->pebs_interrupt_threshold = threshold;
897+
pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
862898
}
863899

864900
void intel_pmu_pebs_disable(struct perf_event *event)
865901
{
866902
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
867903
struct hw_perf_event *hwc = &event->hw;
868-
struct debug_store *ds = cpuc->ds;
869-
bool large_pebs = ds->pebs_interrupt_threshold >
870-
ds->pebs_buffer_base + x86_pmu.pebs_record_size;
871904

872-
if (large_pebs)
905+
if (cpuc->n_pebs == cpuc->n_large_pebs)
873906
intel_pmu_drain_pebs_buffer();
874907

875908
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -879,13 +912,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
879912
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
880913
cpuc->pebs_enabled &= ~(1ULL << 63);
881914

882-
if (large_pebs && !pebs_is_enabled(cpuc))
883-
perf_sched_cb_dec(event->ctx->pmu);
884-
885915
if (cpuc->enabled)
886916
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
887917

888918
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
919+
920+
intel_pmu_pebs_del(event);
889921
}
890922

891923
void intel_pmu_pebs_enable_all(void)

arch/x86/events/perf_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ struct cpu_hw_events {
194194
*/
195195
struct debug_store *ds;
196196
u64 pebs_enabled;
197+
int n_pebs;
198+
int n_large_pebs;
197199

198200
/*
199201
* Intel LBR bits

kernel/events/core.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2818,6 +2818,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
28182818
/*
28192819
* This function provides the context switch callback to the lower code
28202820
* layer. It is invoked ONLY when the context switch callback is enabled.
2821+
*
2822+
* This callback is relevant even to per-cpu events; for example multi event
2823+
* PEBS requires this to provide PID/TID information. This requires we flush
2824+
* all queued PEBS records before we context switch to a new task.
28212825
*/
28222826
static void perf_pmu_sched_task(struct task_struct *prev,
28232827
struct task_struct *next,

0 commit comments

Comments
 (0)