Skip to content

Commit 65d0cf0

Browse files
Boris OstrovskyDavid Vrabel
authored andcommitted
xen/PMU: Initialization code for Xen PMU
Map shared data structure that will hold CPU registers, VPMU context, V/PCPU IDs of the CPU interrupted by PMU interrupt. Hypervisor fills this information in its handler and passes it to the guest for further processing. Set up PMU VIRQ. Now that perf infrastructure will assume that PMU is available on a PV guest we need to be careful and make sure that accesses via RDPMC instruction don't cause fatal traps by the hypervisor. Provide a nop RDPMC handler. For the same reason avoid issuing a warning on a write to APIC's LVTPC. Both of these will be made functional in later patches. Signed-off-by: Boris Ostrovsky <[email protected]> Reviewed-by: David Vrabel <[email protected]> Signed-off-by: David Vrabel <[email protected]>
1 parent 5f14154 commit 65d0cf0

File tree

10 files changed

+398
-9
lines changed

10 files changed

+398
-9
lines changed

arch/x86/include/asm/xen/interface.h

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,129 @@ struct vcpu_guest_context {
250250
#endif
251251
};
252252
DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
253+
254+
/* AMD PMU registers and structures */
255+
struct xen_pmu_amd_ctxt {
256+
/*
257+
* Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
258+
* For PV(H) guests these fields are RO.
259+
*/
260+
uint32_t counters;
261+
uint32_t ctrls;
262+
263+
/* Counter MSRs */
264+
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
265+
uint64_t regs[];
266+
#elif defined(__GNUC__)
267+
uint64_t regs[0];
268+
#endif
269+
};
270+
271+
/* Intel PMU registers and structures */
272+
struct xen_pmu_cntr_pair {
273+
uint64_t counter;
274+
uint64_t control;
275+
};
276+
277+
struct xen_pmu_intel_ctxt {
278+
/*
279+
* Offsets to fixed and architectural counter MSRs (relative to
280+
* xen_pmu_arch.c.intel).
281+
* For PV(H) guests these fields are RO.
282+
*/
283+
uint32_t fixed_counters;
284+
uint32_t arch_counters;
285+
286+
/* PMU registers */
287+
uint64_t global_ctrl;
288+
uint64_t global_ovf_ctrl;
289+
uint64_t global_status;
290+
uint64_t fixed_ctrl;
291+
uint64_t ds_area;
292+
uint64_t pebs_enable;
293+
uint64_t debugctl;
294+
295+
/* Fixed and architectural counter MSRs */
296+
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
297+
uint64_t regs[];
298+
#elif defined(__GNUC__)
299+
uint64_t regs[0];
300+
#endif
301+
};
302+
303+
/* Sampled domain's registers */
304+
struct xen_pmu_regs {
305+
uint64_t ip;
306+
uint64_t sp;
307+
uint64_t flags;
308+
uint16_t cs;
309+
uint16_t ss;
310+
uint8_t cpl;
311+
uint8_t pad[3];
312+
};
313+
314+
/* PMU flags */
315+
#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */
316+
#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */
317+
#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */
318+
#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */
319+
320+
/*
321+
* Architecture-specific information describing state of the processor at
322+
* the time of PMU interrupt.
323+
* Fields of this structure marked as RW for guest should only be written by
324+
* the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
325+
* hypervisor during PMU interrupt). Hypervisor will read updated data in
326+
* XENPMU_flush hypercall and clear PMU_CACHED bit.
327+
*/
328+
struct xen_pmu_arch {
329+
union {
330+
/*
331+
* Processor's registers at the time of interrupt.
332+
* WO for hypervisor, RO for guests.
333+
*/
334+
struct xen_pmu_regs regs;
335+
/*
336+
* Padding for adding new registers to xen_pmu_regs in
337+
* the future
338+
*/
339+
#define XENPMU_REGS_PAD_SZ 64
340+
uint8_t pad[XENPMU_REGS_PAD_SZ];
341+
} r;
342+
343+
/* WO for hypervisor, RO for guest */
344+
uint64_t pmu_flags;
345+
346+
/*
347+
* APIC LVTPC register.
348+
* RW for both hypervisor and guest.
349+
* Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
350+
* during XENPMU_flush or XENPMU_lvtpc_set.
351+
*/
352+
union {
353+
uint32_t lapic_lvtpc;
354+
uint64_t pad;
355+
} l;
356+
357+
/*
358+
* Vendor-specific PMU registers.
359+
* RW for both hypervisor and guest (see exceptions above).
360+
* Guest's updates to this field are verified and then loaded by the
361+
* hypervisor into hardware during XENPMU_flush
362+
*/
363+
union {
364+
struct xen_pmu_amd_ctxt amd;
365+
struct xen_pmu_intel_ctxt intel;
366+
367+
/*
368+
* Padding for contexts (fixed parts only, does not include
369+
* MSR banks that are specified by offsets)
370+
*/
371+
#define XENPMU_CTXT_PAD_SZ 128
372+
uint8_t pad[XENPMU_CTXT_PAD_SZ];
373+
} c;
374+
};
375+
253376
#endif /* !__ASSEMBLY__ */
254377

255378
/*

arch/x86/xen/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ CFLAGS_mmu.o := $(nostackp)
1313
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
1414
time.o xen-asm.o xen-asm_$(BITS).o \
1515
grant-table.o suspend.o platform-pci-unplug.o \
16-
p2m.o apic.o
16+
p2m.o apic.o pmu.o
1717

1818
obj-$(CONFIG_EVENT_TRACING) += trace.o
1919

arch/x86/xen/apic.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ static u32 xen_apic_read(u32 reg)
7272

7373
static void xen_apic_write(u32 reg, u32 val)
7474
{
75+
if (reg == APIC_LVTPC)
76+
return;
77+
7578
/* Warn to see if there's any stray references */
7679
WARN(1,"register: %x, value: %x\n", reg, val);
7780
}

arch/x86/xen/enlighten.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
#include "mmu.h"
8585
#include "smp.h"
8686
#include "multicalls.h"
87+
#include "pmu.h"
8788

8889
EXPORT_SYMBOL_GPL(hypercall_page);
8990

@@ -1082,6 +1083,11 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
10821083
return ret;
10831084
}
10841085

1086+
unsigned long long xen_read_pmc(int counter)
1087+
{
1088+
return 0;
1089+
}
1090+
10851091
void xen_setup_shared_info(void)
10861092
{
10871093
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1216,7 +1222,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
12161222
.write_msr = xen_write_msr_safe,
12171223

12181224
.read_tsc = native_read_tsc,
1219-
.read_pmc = native_read_pmc,
1225+
.read_pmc = xen_read_pmc,
12201226

12211227
.read_tscp = native_read_tscp,
12221228

@@ -1267,6 +1273,10 @@ static const struct pv_apic_ops xen_apic_ops __initconst = {
12671273
static void xen_reboot(int reason)
12681274
{
12691275
struct sched_shutdown r = { .reason = reason };
1276+
int cpu;
1277+
1278+
for_each_online_cpu(cpu)
1279+
xen_pmu_finish(cpu);
12701280

12711281
if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
12721282
BUG();

arch/x86/xen/pmu.c

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
#include <linux/types.h>
2+
#include <linux/interrupt.h>
3+
4+
#include <asm/xen/hypercall.h>
5+
#include <xen/page.h>
6+
#include <xen/interface/xen.h>
7+
#include <xen/interface/vcpu.h>
8+
#include <xen/interface/xenpmu.h>
9+
10+
#include "xen-ops.h"
11+
#include "pmu.h"
12+
13+
/* x86_pmu.handle_irq definition */
14+
#include "../kernel/cpu/perf_event.h"
15+
16+
17+
/* Shared page between hypervisor and domain */
18+
static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
19+
#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id())
20+
21+
/* perf callbacks */
22+
static int xen_is_in_guest(void)
23+
{
24+
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
25+
26+
if (!xenpmu_data) {
27+
pr_warn_once("%s: pmudata not initialized\n", __func__);
28+
return 0;
29+
}
30+
31+
if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
32+
return 0;
33+
34+
return 1;
35+
}
36+
37+
static int xen_is_user_mode(void)
38+
{
39+
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
40+
41+
if (!xenpmu_data) {
42+
pr_warn_once("%s: pmudata not initialized\n", __func__);
43+
return 0;
44+
}
45+
46+
if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
47+
return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
48+
else
49+
return !!(xenpmu_data->pmu.r.regs.cpl & 3);
50+
}
51+
52+
static unsigned long xen_get_guest_ip(void)
53+
{
54+
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
55+
56+
if (!xenpmu_data) {
57+
pr_warn_once("%s: pmudata not initialized\n", __func__);
58+
return 0;
59+
}
60+
61+
return xenpmu_data->pmu.r.regs.ip;
62+
}
63+
64+
static struct perf_guest_info_callbacks xen_guest_cbs = {
65+
.is_in_guest = xen_is_in_guest,
66+
.is_user_mode = xen_is_user_mode,
67+
.get_guest_ip = xen_get_guest_ip,
68+
};
69+
70+
/* Convert registers from Xen's format to Linux' */
71+
static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
72+
struct pt_regs *regs, uint64_t pmu_flags)
73+
{
74+
regs->ip = xen_regs->ip;
75+
regs->cs = xen_regs->cs;
76+
regs->sp = xen_regs->sp;
77+
78+
if (pmu_flags & PMU_SAMPLE_PV) {
79+
if (pmu_flags & PMU_SAMPLE_USER)
80+
regs->cs |= 3;
81+
else
82+
regs->cs &= ~3;
83+
} else {
84+
if (xen_regs->cpl)
85+
regs->cs |= 3;
86+
else
87+
regs->cs &= ~3;
88+
}
89+
}
90+
91+
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
92+
{
93+
int ret = IRQ_NONE;
94+
struct pt_regs regs;
95+
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
96+
97+
if (!xenpmu_data) {
98+
pr_warn_once("%s: pmudata not initialized\n", __func__);
99+
return ret;
100+
}
101+
102+
xen_convert_regs(&xenpmu_data->pmu.r.regs, &regs,
103+
xenpmu_data->pmu.pmu_flags);
104+
if (x86_pmu.handle_irq(&regs))
105+
ret = IRQ_HANDLED;
106+
107+
return ret;
108+
}
109+
110+
bool is_xen_pmu(int cpu)
111+
{
112+
return (per_cpu(xenpmu_shared, cpu) != NULL);
113+
}
114+
115+
void xen_pmu_init(int cpu)
116+
{
117+
int err;
118+
struct xen_pmu_params xp;
119+
unsigned long pfn;
120+
struct xen_pmu_data *xenpmu_data;
121+
122+
BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
123+
124+
if (xen_hvm_domain())
125+
return;
126+
127+
xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
128+
if (!xenpmu_data) {
129+
pr_err("VPMU init: No memory\n");
130+
return;
131+
}
132+
pfn = virt_to_pfn(xenpmu_data);
133+
134+
xp.val = pfn_to_mfn(pfn);
135+
xp.vcpu = cpu;
136+
xp.version.maj = XENPMU_VER_MAJ;
137+
xp.version.min = XENPMU_VER_MIN;
138+
err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
139+
if (err)
140+
goto fail;
141+
142+
per_cpu(xenpmu_shared, cpu) = xenpmu_data;
143+
144+
if (cpu == 0)
145+
perf_register_guest_info_callbacks(&xen_guest_cbs);
146+
147+
return;
148+
149+
fail:
150+
pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n",
151+
cpu, err);
152+
free_pages((unsigned long)xenpmu_data, 0);
153+
}
154+
155+
void xen_pmu_finish(int cpu)
156+
{
157+
struct xen_pmu_params xp;
158+
159+
if (xen_hvm_domain())
160+
return;
161+
162+
xp.vcpu = cpu;
163+
xp.version.maj = XENPMU_VER_MAJ;
164+
xp.version.min = XENPMU_VER_MIN;
165+
166+
(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
167+
168+
free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
169+
per_cpu(xenpmu_shared, cpu) = NULL;
170+
}

arch/x86/xen/pmu.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#ifndef __XEN_PMU_H
2+
#define __XEN_PMU_H
3+
4+
#include <xen/interface/xenpmu.h>
5+
6+
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id);
7+
void xen_pmu_init(int cpu);
8+
void xen_pmu_finish(int cpu);
9+
bool is_xen_pmu(int cpu);
10+
11+
#endif /* __XEN_PMU_H */

0 commit comments

Comments
 (0)