Skip to content

Commit eadf48c

Browse files
virtuosoIngo Molnar
authored andcommitted
perf/x86/intel/pt: Add support for address range filtering in PT
Newer versions of Intel PT support address ranges, which can be used to define IP address range-based filters or TraceSTOP regions. Number of ranges in enumerated via cpuid. This patch implements PMU callbacks and related low-level code to allow filter validation, configuration and programming into the hardware. Signed-off-by: Alexander Shishkin <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Mathieu Poirier <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vince Weaver <[email protected]> Cc: [email protected] Link: http://lkml.kernel.org/r/1461771888-10409-7-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar <[email protected]>
1 parent 375637b commit eadf48c

File tree

2 files changed

+194
-11
lines changed

2 files changed

+194
-11
lines changed

arch/x86/events/intel/pt.c

Lines changed: 168 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,75 @@ static bool pt_event_valid(struct perf_event *event)
265265
* These all are cpu affine and operate on a local PT
266266
*/
267267

268+
/* Address ranges and their corresponding msr configuration registers */
269+
static const struct pt_address_range {
270+
unsigned long msr_a;
271+
unsigned long msr_b;
272+
unsigned int reg_off;
273+
} pt_address_ranges[] = {
274+
{
275+
.msr_a = MSR_IA32_RTIT_ADDR0_A,
276+
.msr_b = MSR_IA32_RTIT_ADDR0_B,
277+
.reg_off = RTIT_CTL_ADDR0_OFFSET,
278+
},
279+
{
280+
.msr_a = MSR_IA32_RTIT_ADDR1_A,
281+
.msr_b = MSR_IA32_RTIT_ADDR1_B,
282+
.reg_off = RTIT_CTL_ADDR1_OFFSET,
283+
},
284+
{
285+
.msr_a = MSR_IA32_RTIT_ADDR2_A,
286+
.msr_b = MSR_IA32_RTIT_ADDR2_B,
287+
.reg_off = RTIT_CTL_ADDR2_OFFSET,
288+
},
289+
{
290+
.msr_a = MSR_IA32_RTIT_ADDR3_A,
291+
.msr_b = MSR_IA32_RTIT_ADDR3_B,
292+
.reg_off = RTIT_CTL_ADDR3_OFFSET,
293+
}
294+
};
295+
296+
static u64 pt_config_filters(struct perf_event *event)
297+
{
298+
struct pt_filters *filters = event->hw.addr_filters;
299+
struct pt *pt = this_cpu_ptr(&pt_ctx);
300+
unsigned int range = 0;
301+
u64 rtit_ctl = 0;
302+
303+
if (!filters)
304+
return 0;
305+
306+
perf_event_addr_filters_sync(event);
307+
308+
for (range = 0; range < filters->nr_filters; range++) {
309+
struct pt_filter *filter = &filters->filter[range];
310+
311+
/*
312+
* Note, if the range has zero start/end addresses due
313+
* to its dynamic object not being loaded yet, we just
314+
* go ahead and program zeroed range, which will simply
315+
* produce no data. Note^2: if executable code at 0x0
316+
* is a concern, we can set up an "invalid" configuration
317+
* such as msr_b < msr_a.
318+
*/
319+
320+
/* avoid redundant msr writes */
321+
if (pt->filters.filter[range].msr_a != filter->msr_a) {
322+
wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
323+
pt->filters.filter[range].msr_a = filter->msr_a;
324+
}
325+
326+
if (pt->filters.filter[range].msr_b != filter->msr_b) {
327+
wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
328+
pt->filters.filter[range].msr_b = filter->msr_b;
329+
}
330+
331+
rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
332+
}
333+
334+
return rtit_ctl;
335+
}
336+
268337
static void pt_config(struct perf_event *event)
269338
{
270339
u64 reg;
@@ -274,7 +343,8 @@ static void pt_config(struct perf_event *event)
274343
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
275344
}
276345

277-
reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
346+
reg = pt_config_filters(event);
347+
reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
278348

279349
if (!event->attr.exclude_kernel)
280350
reg |= RTIT_CTL_OS;
@@ -921,6 +991,82 @@ static void pt_buffer_free_aux(void *data)
921991
kfree(buf);
922992
}
923993

994+
static int pt_addr_filters_init(struct perf_event *event)
995+
{
996+
struct pt_filters *filters;
997+
int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
998+
999+
if (!pt_cap_get(PT_CAP_num_address_ranges))
1000+
return 0;
1001+
1002+
filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
1003+
if (!filters)
1004+
return -ENOMEM;
1005+
1006+
if (event->parent)
1007+
memcpy(filters, event->parent->hw.addr_filters,
1008+
sizeof(*filters));
1009+
1010+
event->hw.addr_filters = filters;
1011+
1012+
return 0;
1013+
}
1014+
1015+
static void pt_addr_filters_fini(struct perf_event *event)
1016+
{
1017+
kfree(event->hw.addr_filters);
1018+
event->hw.addr_filters = NULL;
1019+
}
1020+
1021+
static int pt_event_addr_filters_validate(struct list_head *filters)
1022+
{
1023+
struct perf_addr_filter *filter;
1024+
int range = 0;
1025+
1026+
list_for_each_entry(filter, filters, entry) {
1027+
/* PT doesn't support single address triggers */
1028+
if (!filter->range)
1029+
return -EOPNOTSUPP;
1030+
1031+
if (!filter->inode && !kernel_ip(filter->offset))
1032+
return -EINVAL;
1033+
1034+
if (++range > pt_cap_get(PT_CAP_num_address_ranges))
1035+
return -EOPNOTSUPP;
1036+
}
1037+
1038+
return 0;
1039+
}
1040+
1041+
static void pt_event_addr_filters_sync(struct perf_event *event)
1042+
{
1043+
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
1044+
unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
1045+
struct pt_filters *filters = event->hw.addr_filters;
1046+
struct perf_addr_filter *filter;
1047+
int range = 0;
1048+
1049+
if (!filters)
1050+
return;
1051+
1052+
list_for_each_entry(filter, &head->list, entry) {
1053+
if (filter->inode && !offs[range]) {
1054+
msr_a = msr_b = 0;
1055+
} else {
1056+
/* apply the offset */
1057+
msr_a = filter->offset + offs[range];
1058+
msr_b = filter->size + msr_a;
1059+
}
1060+
1061+
filters->filter[range].msr_a = msr_a;
1062+
filters->filter[range].msr_b = msr_b;
1063+
filters->filter[range].config = filter->filter ? 1 : 2;
1064+
range++;
1065+
}
1066+
1067+
filters->nr_filters = range;
1068+
}
1069+
9241070
/**
9251071
* intel_pt_interrupt() - PT PMI handler
9261072
*/
@@ -1128,6 +1274,7 @@ static void pt_event_read(struct perf_event *event)
11281274

11291275
static void pt_event_destroy(struct perf_event *event)
11301276
{
1277+
pt_addr_filters_fini(event);
11311278
x86_del_exclusive(x86_lbr_exclusive_pt);
11321279
}
11331280

@@ -1142,6 +1289,11 @@ static int pt_event_init(struct perf_event *event)
11421289
if (x86_add_exclusive(x86_lbr_exclusive_pt))
11431290
return -EBUSY;
11441291

1292+
if (pt_addr_filters_init(event)) {
1293+
x86_del_exclusive(x86_lbr_exclusive_pt);
1294+
return -ENOMEM;
1295+
}
1296+
11451297
event->destroy = pt_event_destroy;
11461298

11471299
return 0;
@@ -1195,16 +1347,21 @@ static __init int pt_init(void)
11951347
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
11961348

11971349
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
1198-
pt_pmu.pmu.attr_groups = pt_attr_groups;
1199-
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
1200-
pt_pmu.pmu.event_init = pt_event_init;
1201-
pt_pmu.pmu.add = pt_event_add;
1202-
pt_pmu.pmu.del = pt_event_del;
1203-
pt_pmu.pmu.start = pt_event_start;
1204-
pt_pmu.pmu.stop = pt_event_stop;
1205-
pt_pmu.pmu.read = pt_event_read;
1206-
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
1207-
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
1350+
pt_pmu.pmu.attr_groups = pt_attr_groups;
1351+
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
1352+
pt_pmu.pmu.event_init = pt_event_init;
1353+
pt_pmu.pmu.add = pt_event_add;
1354+
pt_pmu.pmu.del = pt_event_del;
1355+
pt_pmu.pmu.start = pt_event_start;
1356+
pt_pmu.pmu.stop = pt_event_stop;
1357+
pt_pmu.pmu.read = pt_event_read;
1358+
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
1359+
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
1360+
pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
1361+
pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
1362+
pt_pmu.pmu.nr_addr_filters =
1363+
pt_cap_get(PT_CAP_num_address_ranges);
1364+
12081365
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
12091366

12101367
return ret;

arch/x86/events/intel/pt.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,14 +140,40 @@ struct pt_buffer {
140140
struct topa_entry *topa_index[0];
141141
};
142142

143+
#define PT_FILTERS_NUM 4
144+
145+
/**
146+
* struct pt_filter - IP range filter configuration
147+
* @msr_a: range start, goes to RTIT_ADDRn_A
148+
* @msr_b: range end, goes to RTIT_ADDRn_B
149+
* @config: 4-bit field in RTIT_CTL
150+
*/
151+
struct pt_filter {
152+
unsigned long msr_a;
153+
unsigned long msr_b;
154+
unsigned long config;
155+
};
156+
157+
/**
158+
* struct pt_filters - IP range filtering context
159+
* @filter: filters defined for this context
160+
* @nr_filters: number of defined filters in the @filter array
161+
*/
162+
struct pt_filters {
163+
struct pt_filter filter[PT_FILTERS_NUM];
164+
unsigned int nr_filters;
165+
};
166+
143167
/**
144168
* struct pt - per-cpu pt context
145169
* @handle: perf output handle
170+
* @filters: last configured filters
146171
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
147172
* @vmx_on: 1 if VMX is ON on this cpu
148173
*/
149174
struct pt {
150175
struct perf_output_handle handle;
176+
struct pt_filters filters;
151177
int handle_nmi;
152178
int vmx_on;
153179
};

0 commit comments

Comments
 (0)