Skip to content

Commit 5006921

Browse files
author
Ingo Molnar
committed
Merge branch 'perf/urgent' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <[email protected]>
2 parents 14520d6 + 8ef9b84 commit 5006921

File tree

6 files changed

+168
-51
lines changed

6 files changed

+168
-51
lines changed

arch/x86/events/amd/uncore.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929

3030
#define COUNTER_SHIFT 16
3131

32+
static HLIST_HEAD(uncore_unused_list);
33+
3234
struct amd_uncore {
3335
int id;
3436
int refcnt;
@@ -39,7 +41,7 @@ struct amd_uncore {
3941
cpumask_t *active_mask;
4042
struct pmu *pmu;
4143
struct perf_event *events[MAX_COUNTERS];
42-
struct amd_uncore *free_when_cpu_online;
44+
struct hlist_node node;
4345
};
4446

4547
static struct amd_uncore * __percpu *amd_uncore_nb;
@@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
306308
uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
307309
uncore_nb->active_mask = &amd_nb_active_mask;
308310
uncore_nb->pmu = &amd_nb_pmu;
311+
uncore_nb->id = -1;
309312
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
310313
}
311314

@@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
319322
uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
320323
uncore_l2->active_mask = &amd_l2_active_mask;
321324
uncore_l2->pmu = &amd_l2_pmu;
325+
uncore_l2->id = -1;
322326
*per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
323327
}
324328

@@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this,
348352
continue;
349353

350354
if (this->id == that->id) {
351-
that->free_when_cpu_online = this;
355+
hlist_add_head(&this->node, &uncore_unused_list);
352356
this = that;
353357
break;
354358
}
@@ -388,13 +392,23 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
388392
return 0;
389393
}
390394

395+
static void uncore_clean_online(void)
396+
{
397+
struct amd_uncore *uncore;
398+
struct hlist_node *n;
399+
400+
hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
401+
hlist_del(&uncore->node);
402+
kfree(uncore);
403+
}
404+
}
405+
391406
static void uncore_online(unsigned int cpu,
392407
struct amd_uncore * __percpu *uncores)
393408
{
394409
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
395410

396-
kfree(uncore->free_when_cpu_online);
397-
uncore->free_when_cpu_online = NULL;
411+
uncore_clean_online();
398412

399413
if (cpu == uncore->cpu)
400414
cpumask_set_cpu(cpu, uncore->active_mask);

arch/x86/events/intel/bts.c

Lines changed: 94 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,17 @@
3131
struct bts_ctx {
3232
struct perf_output_handle handle;
3333
struct debug_store ds_back;
34-
int started;
34+
int state;
35+
};
36+
37+
/* BTS context states: */
38+
enum {
39+
/* no ongoing AUX transactions */
40+
BTS_STATE_STOPPED = 0,
41+
/* AUX transaction is on, BTS tracing is disabled */
42+
BTS_STATE_INACTIVE,
43+
/* AUX transaction is on, BTS tracing is running */
44+
BTS_STATE_ACTIVE,
3545
};
3646

3747
static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
@@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts)
204214
static int
205215
bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
206216

217+
/*
218+
* Ordering PMU callbacks wrt themselves and the PMI is done by means
219+
* of bts::state, which:
220+
* - is set when bts::handle::event is valid, that is, between
221+
* perf_aux_output_begin() and perf_aux_output_end();
222+
* - is zero otherwise;
223+
* - is ordered against bts::handle::event with a compiler barrier.
224+
*/
225+
207226
static void __bts_event_start(struct perf_event *event)
208227
{
209228
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
@@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event)
221240

222241
/*
223242
* local barrier to make sure that ds configuration made it
224-
* before we enable BTS
243+
* before we enable BTS and bts::state goes ACTIVE
225244
*/
226245
wmb();
227246

247+
/* INACTIVE/STOPPED -> ACTIVE */
248+
WRITE_ONCE(bts->state, BTS_STATE_ACTIVE);
249+
228250
intel_pmu_enable_bts(config);
229251

230252
}
@@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags)
251273

252274
__bts_event_start(event);
253275

254-
/* PMI handler: this counter is running and likely generating PMIs */
255-
ACCESS_ONCE(bts->started) = 1;
256-
257276
return;
258277

259278
fail_end_stop:
@@ -263,30 +282,34 @@ static void bts_event_start(struct perf_event *event, int flags)
263282
event->hw.state = PERF_HES_STOPPED;
264283
}
265284

266-
static void __bts_event_stop(struct perf_event *event)
285+
static void __bts_event_stop(struct perf_event *event, int state)
267286
{
287+
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
288+
289+
/* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */
290+
WRITE_ONCE(bts->state, state);
291+
268292
/*
269293
* No extra synchronization is mandated by the documentation to have
270294
* BTS data stores globally visible.
271295
*/
272296
intel_pmu_disable_bts();
273-
274-
if (event->hw.state & PERF_HES_STOPPED)
275-
return;
276-
277-
ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
278297
}
279298

280299
static void bts_event_stop(struct perf_event *event, int flags)
281300
{
282301
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
283302
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
284-
struct bts_buffer *buf = perf_get_aux(&bts->handle);
303+
struct bts_buffer *buf = NULL;
304+
int state = READ_ONCE(bts->state);
285305

286-
/* PMI handler: don't restart this counter */
287-
ACCESS_ONCE(bts->started) = 0;
306+
if (state == BTS_STATE_ACTIVE)
307+
__bts_event_stop(event, BTS_STATE_STOPPED);
288308

289-
__bts_event_stop(event);
309+
if (state != BTS_STATE_STOPPED)
310+
buf = perf_get_aux(&bts->handle);
311+
312+
event->hw.state |= PERF_HES_STOPPED;
290313

291314
if (flags & PERF_EF_UPDATE) {
292315
bts_update(bts);
@@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags)
296319
bts->handle.head =
297320
local_xchg(&buf->data_size,
298321
buf->nr_pages << PAGE_SHIFT);
322+
299323
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
300324
!!local_xchg(&buf->lost, 0));
301325
}
@@ -310,17 +334,36 @@ static void bts_event_stop(struct perf_event *event, int flags)
310334
void intel_bts_enable_local(void)
311335
{
312336
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
337+
int state = READ_ONCE(bts->state);
338+
339+
/*
340+
* Here we transition from INACTIVE to ACTIVE;
341+
* if we instead are STOPPED from the interrupt handler,
342+
* stay that way. Can't be ACTIVE here though.
343+
*/
344+
if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE))
345+
return;
346+
347+
if (state == BTS_STATE_STOPPED)
348+
return;
313349

314-
if (bts->handle.event && bts->started)
350+
if (bts->handle.event)
315351
__bts_event_start(bts->handle.event);
316352
}
317353

318354
void intel_bts_disable_local(void)
319355
{
320356
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
321357

358+
/*
359+
* Here we transition from ACTIVE to INACTIVE;
360+
* do nothing for STOPPED or INACTIVE.
361+
*/
362+
if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE)
363+
return;
364+
322365
if (bts->handle.event)
323-
__bts_event_stop(bts->handle.event);
366+
__bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE);
324367
}
325368

326369
static int
@@ -335,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
335378
return 0;
336379

337380
head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
338-
if (WARN_ON_ONCE(head != local_read(&buf->head)))
339-
return -EINVAL;
340381

341382
phys = &buf->buf[buf->cur_buf];
342383
space = phys->offset + phys->displacement + phys->size - head;
@@ -403,41 +444,65 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
403444

404445
int intel_bts_interrupt(void)
405446
{
447+
struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
406448
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
407449
struct perf_event *event = bts->handle.event;
408450
struct bts_buffer *buf;
409451
s64 old_head;
410-
int err;
452+
int err = -ENOSPC, handled = 0;
411453

412-
if (!event || !bts->started)
413-
return 0;
454+
/*
455+
* The only surefire way of knowing if this NMI is ours is by checking
456+
* the write ptr against the PMI threshold.
457+
*/
458+
if (ds->bts_index >= ds->bts_interrupt_threshold)
459+
handled = 1;
460+
461+
/*
462+
* this is wrapped in intel_bts_enable_local/intel_bts_disable_local,
463+
* so we can only be INACTIVE or STOPPED
464+
*/
465+
if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
466+
return handled;
414467

415468
buf = perf_get_aux(&bts->handle);
469+
if (!buf)
470+
return handled;
471+
416472
/*
417473
* Skip snapshot counters: they don't use the interrupt, but
418474
* there's no other way of telling, because the pointer will
419475
* keep moving
420476
*/
421-
if (!buf || buf->snapshot)
477+
if (buf->snapshot)
422478
return 0;
423479

424480
old_head = local_read(&buf->head);
425481
bts_update(bts);
426482

427483
/* no new data */
428484
if (old_head == local_read(&buf->head))
429-
return 0;
485+
return handled;
430486

431487
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
432488
!!local_xchg(&buf->lost, 0));
433489

434490
buf = perf_aux_output_begin(&bts->handle, event);
435-
if (!buf)
436-
return 1;
491+
if (buf)
492+
err = bts_buffer_reset(buf, &bts->handle);
493+
494+
if (err) {
495+
WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
437496

438-
err = bts_buffer_reset(buf, &bts->handle);
439-
if (err)
440-
perf_aux_output_end(&bts->handle, 0, false);
497+
if (buf) {
498+
/*
499+
* BTS_STATE_STOPPED should be visible before
500+
* cleared handle::event
501+
*/
502+
barrier();
503+
perf_aux_output_end(&bts->handle, 0, false);
504+
}
505+
}
441506

442507
return 1;
443508
}

arch/x86/events/intel/cqm.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info);
458458
static void init_mbm_sample(u32 rmid, u32 evt_type);
459459
static void __intel_mbm_event_count(void *info);
460460

461+
static bool is_cqm_event(int e)
462+
{
463+
return (e == QOS_L3_OCCUP_EVENT_ID);
464+
}
465+
461466
static bool is_mbm_event(int e)
462467
{
463468
return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
@@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event)
13661371
(event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
13671372
return -EINVAL;
13681373

1374+
if ((is_cqm_event(event->attr.config) && !cqm_enabled) ||
1375+
(is_mbm_event(event->attr.config) && !mbm_enabled))
1376+
return -EINVAL;
1377+
13691378
/* unsupported modes and filters */
13701379
if (event->attr.exclude_user ||
13711380
event->attr.exclude_kernel ||

arch/x86/events/intel/ds.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,18 +1312,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
13121312
struct pebs_record_nhm *p = at;
13131313
u64 pebs_status;
13141314

1315-
/* PEBS v3 has accurate status bits */
1315+
pebs_status = p->status & cpuc->pebs_enabled;
1316+
pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
1317+
1318+
/* PEBS v3 has more accurate status bits */
13161319
if (x86_pmu.intel_cap.pebs_format >= 3) {
1317-
for_each_set_bit(bit, (unsigned long *)&p->status,
1318-
MAX_PEBS_EVENTS)
1320+
for_each_set_bit(bit, (unsigned long *)&pebs_status,
1321+
x86_pmu.max_pebs_events)
13191322
counts[bit]++;
13201323

13211324
continue;
13221325
}
13231326

1324-
pebs_status = p->status & cpuc->pebs_enabled;
1325-
pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
1326-
13271327
/*
13281328
* On some CPUs the PEBS status can be zero when PEBS is
13291329
* racing with clearing of GLOBAL_STATUS.
@@ -1371,8 +1371,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
13711371
continue;
13721372

13731373
event = cpuc->events[bit];
1374-
WARN_ON_ONCE(!event);
1375-
WARN_ON_ONCE(!event->attr.precise_ip);
1374+
if (WARN_ON_ONCE(!event))
1375+
continue;
1376+
1377+
if (WARN_ON_ONCE(!event->attr.precise_ip))
1378+
continue;
13761379

13771380
/* log dropped samples number */
13781381
if (error[bit])

0 commit comments

Comments
 (0)