Skip to content

Commit bf8c55d

Browse files
chao-pbonzini
authored andcommitted
KVM: x86: Implement Intel PT MSRs read/write emulation
This patch implement Intel Processor Trace MSRs read/write emulation. Intel PT MSRs read/write need to be emulated when Intel PT MSRs is intercepted in guest and during live migration. Signed-off-by: Chao Peng <[email protected]> Signed-off-by: Luwei Kang <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent 6c0f0bb commit bf8c55d

File tree

2 files changed

+216
-1
lines changed

2 files changed

+216
-1
lines changed

arch/x86/kvm/vmx/vmx.c

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,14 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
140140

141141
#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
142142

143+
#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
144+
RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
145+
RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
146+
RTIT_STATUS_BYTECNT))
147+
148+
#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
149+
(~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
150+
143151
/*
144152
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
145153
* ple_gap: upper bound on the amount of time between two successive
@@ -1354,6 +1362,79 @@ void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
13541362
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
13551363
}
13561364

1365+
static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
1366+
{
1367+
struct vcpu_vmx *vmx = to_vmx(vcpu);
1368+
unsigned long value;
1369+
1370+
/*
1371+
* Any MSR write that attempts to change bits marked reserved will
1372+
* case a #GP fault.
1373+
*/
1374+
if (data & vmx->pt_desc.ctl_bitmask)
1375+
return 1;
1376+
1377+
/*
1378+
* Any attempt to modify IA32_RTIT_CTL while TraceEn is set will
1379+
* result in a #GP unless the same write also clears TraceEn.
1380+
*/
1381+
if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
1382+
((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
1383+
return 1;
1384+
1385+
/*
1386+
* WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit
1387+
* and FabricEn would cause #GP, if
1388+
* CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0
1389+
*/
1390+
if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
1391+
!(data & RTIT_CTL_FABRIC_EN) &&
1392+
!intel_pt_validate_cap(vmx->pt_desc.caps,
1393+
PT_CAP_single_range_output))
1394+
return 1;
1395+
1396+
/*
1397+
* MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that
1398+
* utilize encodings marked reserved will casue a #GP fault.
1399+
*/
1400+
value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
1401+
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
1402+
!test_bit((data & RTIT_CTL_MTC_RANGE) >>
1403+
RTIT_CTL_MTC_RANGE_OFFSET, &value))
1404+
return 1;
1405+
value = intel_pt_validate_cap(vmx->pt_desc.caps,
1406+
PT_CAP_cycle_thresholds);
1407+
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
1408+
!test_bit((data & RTIT_CTL_CYC_THRESH) >>
1409+
RTIT_CTL_CYC_THRESH_OFFSET, &value))
1410+
return 1;
1411+
value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
1412+
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
1413+
!test_bit((data & RTIT_CTL_PSB_FREQ) >>
1414+
RTIT_CTL_PSB_FREQ_OFFSET, &value))
1415+
return 1;
1416+
1417+
/*
1418+
* If ADDRx_CFG is reserved or the encodings is >2 will
1419+
* cause a #GP fault.
1420+
*/
1421+
value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
1422+
if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2))
1423+
return 1;
1424+
value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
1425+
if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2))
1426+
return 1;
1427+
value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
1428+
if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2))
1429+
return 1;
1430+
value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
1431+
if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2))
1432+
return 1;
1433+
1434+
return 0;
1435+
}
1436+
1437+
13571438
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
13581439
{
13591440
unsigned long rip;
@@ -1555,6 +1636,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
15551636
{
15561637
struct vcpu_vmx *vmx = to_vmx(vcpu);
15571638
struct shared_msr_entry *msr;
1639+
u32 index;
15581640

15591641
switch (msr_info->index) {
15601642
#ifdef CONFIG_X86_64
@@ -1619,6 +1701,52 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
16191701
return 1;
16201702
msr_info->data = vcpu->arch.ia32_xss;
16211703
break;
1704+
case MSR_IA32_RTIT_CTL:
1705+
if (pt_mode != PT_MODE_HOST_GUEST)
1706+
return 1;
1707+
msr_info->data = vmx->pt_desc.guest.ctl;
1708+
break;
1709+
case MSR_IA32_RTIT_STATUS:
1710+
if (pt_mode != PT_MODE_HOST_GUEST)
1711+
return 1;
1712+
msr_info->data = vmx->pt_desc.guest.status;
1713+
break;
1714+
case MSR_IA32_RTIT_CR3_MATCH:
1715+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1716+
!intel_pt_validate_cap(vmx->pt_desc.caps,
1717+
PT_CAP_cr3_filtering))
1718+
return 1;
1719+
msr_info->data = vmx->pt_desc.guest.cr3_match;
1720+
break;
1721+
case MSR_IA32_RTIT_OUTPUT_BASE:
1722+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1723+
(!intel_pt_validate_cap(vmx->pt_desc.caps,
1724+
PT_CAP_topa_output) &&
1725+
!intel_pt_validate_cap(vmx->pt_desc.caps,
1726+
PT_CAP_single_range_output)))
1727+
return 1;
1728+
msr_info->data = vmx->pt_desc.guest.output_base;
1729+
break;
1730+
case MSR_IA32_RTIT_OUTPUT_MASK:
1731+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1732+
(!intel_pt_validate_cap(vmx->pt_desc.caps,
1733+
PT_CAP_topa_output) &&
1734+
!intel_pt_validate_cap(vmx->pt_desc.caps,
1735+
PT_CAP_single_range_output)))
1736+
return 1;
1737+
msr_info->data = vmx->pt_desc.guest.output_mask;
1738+
break;
1739+
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
1740+
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
1741+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1742+
(index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
1743+
PT_CAP_num_address_ranges)))
1744+
return 1;
1745+
if (index % 2)
1746+
msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
1747+
else
1748+
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
1749+
break;
16221750
case MSR_TSC_AUX:
16231751
if (!msr_info->host_initiated &&
16241752
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
@@ -1648,6 +1776,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
16481776
int ret = 0;
16491777
u32 msr_index = msr_info->index;
16501778
u64 data = msr_info->data;
1779+
u32 index;
16511780

16521781
switch (msr_index) {
16531782
case MSR_EFER:
@@ -1799,6 +1928,61 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
17991928
else
18001929
clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
18011930
break;
1931+
case MSR_IA32_RTIT_CTL:
1932+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1933+
vmx_rtit_ctl_check(vcpu, data))
1934+
return 1;
1935+
vmcs_write64(GUEST_IA32_RTIT_CTL, data);
1936+
vmx->pt_desc.guest.ctl = data;
1937+
break;
1938+
case MSR_IA32_RTIT_STATUS:
1939+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1940+
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
1941+
(data & MSR_IA32_RTIT_STATUS_MASK))
1942+
return 1;
1943+
vmx->pt_desc.guest.status = data;
1944+
break;
1945+
case MSR_IA32_RTIT_CR3_MATCH:
1946+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1947+
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
1948+
!intel_pt_validate_cap(vmx->pt_desc.caps,
1949+
PT_CAP_cr3_filtering))
1950+
return 1;
1951+
vmx->pt_desc.guest.cr3_match = data;
1952+
break;
1953+
case MSR_IA32_RTIT_OUTPUT_BASE:
1954+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1955+
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
1956+
(!intel_pt_validate_cap(vmx->pt_desc.caps,
1957+
PT_CAP_topa_output) &&
1958+
!intel_pt_validate_cap(vmx->pt_desc.caps,
1959+
PT_CAP_single_range_output)) ||
1960+
(data & MSR_IA32_RTIT_OUTPUT_BASE_MASK))
1961+
return 1;
1962+
vmx->pt_desc.guest.output_base = data;
1963+
break;
1964+
case MSR_IA32_RTIT_OUTPUT_MASK:
1965+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1966+
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
1967+
(!intel_pt_validate_cap(vmx->pt_desc.caps,
1968+
PT_CAP_topa_output) &&
1969+
!intel_pt_validate_cap(vmx->pt_desc.caps,
1970+
PT_CAP_single_range_output)))
1971+
return 1;
1972+
vmx->pt_desc.guest.output_mask = data;
1973+
break;
1974+
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
1975+
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
1976+
if ((pt_mode != PT_MODE_HOST_GUEST) ||
1977+
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
1978+
(index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
1979+
PT_CAP_num_address_ranges)))
1980+
return 1;
1981+
if (index % 2)
1982+
vmx->pt_desc.guest.addr_b[index / 2] = data;
1983+
else
1984+
vmx->pt_desc.guest.addr_a[index / 2] = data;
1985+
break;
18021986
case MSR_TSC_AUX:
18031987
if (!msr_info->host_initiated &&
18041988
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))

arch/x86/kvm/x86.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#include <asm/irq_remapping.h>
7070
#include <asm/mshyperv.h>
7171
#include <asm/hypervisor.h>
72+
#include <asm/intel_pt.h>
7273

7374
#define CREATE_TRACE_POINTS
7475
#include "trace.h"
@@ -1124,7 +1125,13 @@ static u32 msrs_to_save[] = {
11241125
#endif
11251126
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
11261127
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1127-
MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
1128+
MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES,
1129+
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1130+
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1131+
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1132+
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1133+
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1134+
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
11281135
};
11291136

11301137
static unsigned num_msrs_to_save;
@@ -4884,6 +4891,30 @@ static void kvm_init_msr_list(void)
48844891
if (!kvm_x86_ops->rdtscp_supported())
48854892
continue;
48864893
break;
4894+
case MSR_IA32_RTIT_CTL:
4895+
case MSR_IA32_RTIT_STATUS:
4896+
if (!kvm_x86_ops->pt_supported())
4897+
continue;
4898+
break;
4899+
case MSR_IA32_RTIT_CR3_MATCH:
4900+
if (!kvm_x86_ops->pt_supported() ||
4901+
!intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
4902+
continue;
4903+
break;
4904+
case MSR_IA32_RTIT_OUTPUT_BASE:
4905+
case MSR_IA32_RTIT_OUTPUT_MASK:
4906+
if (!kvm_x86_ops->pt_supported() ||
4907+
(!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
4908+
!intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
4909+
continue;
4910+
break;
4911+
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
4912+
if (!kvm_x86_ops->pt_supported() ||
4913+
msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
4914+
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
4915+
continue;
4916+
break;
4917+
}
48874918
default:
48884919
break;
48894920
}

0 commit comments

Comments
 (0)