Skip to content

Commit b752ea0

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel/ds: Flush PEBS DS when changing PEBS_DATA_CFG
Several similar kernel warnings can be triggered, [56605.607840] CPU0 PEBS record size 0, expected 32, config 0 cpuc->record_size=208 when the below commands are running in parallel for a while on SPR. while true; do perf record --no-buildid -a --intr-regs=AX \ -e cpu/event=0xd0,umask=0x81/pp \ -c 10003 -o /dev/null ./triad; done & while true; do perf record -o /tmp/out -W -d \ -e '{ld_blocks.store_forward:period=1000000, \ MEM_TRANS_RETIRED.LOAD_LATENCY:u:precise=2:ldlat=4}' \ -c 1037 ./triad; done The triad program is just the generation of loads/stores. The warnings are triggered when an unexpected PEBS record (with a different config and size) is found. A system-wide PEBS event with the large PEBS config may be enabled during a context switch. Some PEBS records for the system-wide PEBS may be generated while the old task is sched out but the new one hasn't been sched in yet. When the new task is sched in, the cpuc->pebs_record_size may be updated for the per-task PEBS events. So the existing system-wide PEBS records have a different size from the later PEBS records. The PEBS buffer should be flushed right before the hardware is reprogrammed. The new size and threshold should be updated after the old buffer has been flushed. Reported-by: Stephane Eranian <[email protected]> Suggested-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 90befef commit b752ea0

File tree

2 files changed

+35
-24
lines changed

2 files changed

+35
-24
lines changed

arch/x86/events/intel/ds.c

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,20 +1229,22 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
12291229
struct perf_event *event, bool add)
12301230
{
12311231
struct pmu *pmu = event->pmu;
1232+
12321233
/*
12331234
* Make sure we get updated with the first PEBS
12341235
* event. It will trigger also during removal, but
12351236
* that does not hurt:
12361237
*/
1237-
bool update = cpuc->n_pebs == 1;
1238+
if (cpuc->n_pebs == 1)
1239+
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
12381240

12391241
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
12401242
if (!needed_cb)
12411243
perf_sched_cb_inc(pmu);
12421244
else
12431245
perf_sched_cb_dec(pmu);
12441246

1245-
update = true;
1247+
cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
12461248
}
12471249

12481250
/*
@@ -1252,24 +1254,13 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
12521254
if (x86_pmu.intel_cap.pebs_baseline && add) {
12531255
u64 pebs_data_cfg;
12541256

1255-
/* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
1256-
if (cpuc->n_pebs == 1) {
1257-
cpuc->pebs_data_cfg = 0;
1258-
cpuc->pebs_record_size = sizeof(struct pebs_basic);
1259-
}
1260-
12611257
pebs_data_cfg = pebs_update_adaptive_cfg(event);
1262-
1263-
/* Update pebs_record_size if new event requires more data. */
1264-
if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
1265-
cpuc->pebs_data_cfg |= pebs_data_cfg;
1266-
adaptive_pebs_record_size_update();
1267-
update = true;
1268-
}
1258+
/*
1259+
* Be sure to update the thresholds when we change the record.
1260+
*/
1261+
if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
1262+
cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
12691263
}
1270-
1271-
if (update)
1272-
pebs_update_threshold(cpuc);
12731264
}
12741265

12751266
void intel_pmu_pebs_add(struct perf_event *event)
@@ -1326,9 +1317,17 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
13261317
wrmsrl(base + idx, value);
13271318
}
13281319

1320+
static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
1321+
{
1322+
if (cpuc->n_pebs == cpuc->n_large_pebs &&
1323+
cpuc->n_pebs != cpuc->n_pebs_via_pt)
1324+
intel_pmu_drain_pebs_buffer();
1325+
}
1326+
13291327
void intel_pmu_pebs_enable(struct perf_event *event)
13301328
{
13311329
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1330+
u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
13321331
struct hw_perf_event *hwc = &event->hw;
13331332
struct debug_store *ds = cpuc->ds;
13341333
unsigned int idx = hwc->idx;
@@ -1344,11 +1343,22 @@ void intel_pmu_pebs_enable(struct perf_event *event)
13441343

13451344
if (x86_pmu.intel_cap.pebs_baseline) {
13461345
hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1347-
if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1348-
wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
1349-
cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
1346+
if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1347+
/*
1348+
* drain_pebs() assumes uniform record size;
1349+
* hence we need to drain when changing said
1350+
* size.
1351+
*/
1352+
intel_pmu_drain_large_pebs(cpuc);
1353+
adaptive_pebs_record_size_update();
1354+
wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
1355+
cpuc->active_pebs_data_cfg = pebs_data_cfg;
13501356
}
13511357
}
1358+
if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
1359+
cpuc->pebs_data_cfg = pebs_data_cfg;
1360+
pebs_update_threshold(cpuc);
1361+
}
13521362

13531363
if (idx >= INTEL_PMC_IDX_FIXED) {
13541364
if (x86_pmu.intel_cap.pebs_format < 5)
@@ -1391,9 +1401,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
13911401
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
13921402
struct hw_perf_event *hwc = &event->hw;
13931403

1394-
if (cpuc->n_pebs == cpuc->n_large_pebs &&
1395-
cpuc->n_pebs != cpuc->n_pebs_via_pt)
1396-
intel_pmu_drain_pebs_buffer();
1404+
intel_pmu_drain_large_pebs(cpuc);
13971405

13981406
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
13991407

arch/x86/include/asm/perf_event.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@
121121
#define PEBS_DATACFG_LBRS BIT_ULL(3)
122122
#define PEBS_DATACFG_LBR_SHIFT 24
123123

124+
/* Steal the highest bit of pebs_data_cfg for SW usage */
125+
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
126+
124127
/*
125128
* Intel "Architectural Performance Monitoring" CPUID
126129
* detection/enumeration details:

0 commit comments

Comments
 (0)