Skip to content

Commit ee0bfa0

Browse files
mdrothsuryasaimadhu
authored andcommitted
x86/compressed/64: Add support for SEV-SNP CPUID table in #VC handlers
CPUID instructions generate a #VC exception for SEV-ES/SEV-SNP guests, for which early handlers are currently set up to handle. In the case of SEV-SNP, guests can use a configurable location in guest memory that has been pre-populated with a firmware-validated CPUID table to look up the relevant CPUID values rather than requesting them from hypervisor via a VMGEXIT. Add the various hooks in the #VC handlers to allow CPUID instructions to be handled via the table. The code to actually configure/enable the table will be added in a subsequent commit. Signed-off-by: Michael Roth <[email protected]> Signed-off-by: Brijesh Singh <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 801baa6 commit ee0bfa0

File tree

2 files changed

+326
-0
lines changed

2 files changed

+326
-0
lines changed

arch/x86/include/asm/sev-common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ struct snp_psc_desc {
152152
#define GHCB_TERM_PSC 1 /* Page State Change failure */
153153
#define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */
154154
#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */
155+
#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */
156+
#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */
155157

156158
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
157159

arch/x86/kernel/sev-shared.c

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,36 @@ struct cpuid_leaf {
2424
u32 edx;
2525
};
2626

27+
/*
28+
* Individual entries of the SNP CPUID table, as defined by the SNP
29+
* Firmware ABI, Revision 0.9, Section 7.1, Table 14.
30+
*/
31+
struct snp_cpuid_fn {
32+
u32 eax_in;
33+
u32 ecx_in;
34+
u64 xcr0_in;
35+
u64 xss_in;
36+
u32 eax;
37+
u32 ebx;
38+
u32 ecx;
39+
u32 edx;
40+
u64 __reserved;
41+
} __packed;
42+
43+
/*
44+
* SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
45+
* Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
46+
* of 64 entries per CPUID table.
47+
*/
48+
#define SNP_CPUID_COUNT_MAX 64
49+
50+
struct snp_cpuid_table {
51+
u32 count;
52+
u32 __reserved1;
53+
u64 __reserved2;
54+
struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
55+
} __packed;
56+
2757
/*
2858
* Since feature negotiation related variables are set early in the boot
2959
* process they must reside in the .data section so as not to be zeroed
@@ -33,6 +63,19 @@ struct cpuid_leaf {
3363
*/
3464
static u16 ghcb_version __ro_after_init;
3565

66+
/* Copy of the SNP firmware's CPUID page. */
67+
static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
68+
69+
/*
70+
* These will be initialized based on CPUID table so that non-present
71+
* all-zero leaves (for sparse tables) can be differentiated from
72+
* invalid/out-of-range leaves. This is needed since all-zero leaves
73+
* still need to be post-processed.
74+
*/
75+
static u32 cpuid_std_range_max __ro_after_init;
76+
static u32 cpuid_hyp_range_max __ro_after_init;
77+
static u32 cpuid_ext_range_max __ro_after_init;
78+
3679
static bool __init sev_es_check_cpu_features(void)
3780
{
3881
if (!has_cpuflag(X86_FEATURE_RDRAND)) {
@@ -242,6 +285,252 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf)
242285
return ret;
243286
}
244287

288+
/*
289+
* This may be called early while still running on the initial identity
290+
* mapping. Use RIP-relative addressing to obtain the correct address
291+
* while running with the initial identity mapping as well as the
292+
* switch-over to kernel virtual addresses later.
293+
*/
294+
static const struct snp_cpuid_table *snp_cpuid_get_table(void)
295+
{
296+
void *ptr;
297+
298+
asm ("lea cpuid_table_copy(%%rip), %0"
299+
: "=r" (ptr)
300+
: "p" (&cpuid_table_copy));
301+
302+
return ptr;
303+
}
304+
305+
/*
306+
* The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
307+
* XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
308+
* and 1 based on the corresponding features enabled by a particular
309+
* combination of XCR0 and XSS registers so that a guest can look up the
310+
* version corresponding to the features currently enabled in its XCR0/XSS
311+
* registers. The only values that differ between these versions/table
312+
* entries is the enabled XSAVE area size advertised via EBX.
313+
*
314+
* While hypervisors may choose to make use of this support, it is more
315+
* robust/secure for a guest to simply find the entry corresponding to the
316+
* base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
317+
* XSAVE area size using subfunctions 2 through 64, as documented in APM
318+
* Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
319+
*
320+
* Since base/legacy XSAVE area size is documented as 0x240, use that value
321+
* directly rather than relying on the base size in the CPUID table.
322+
*
323+
* Return: XSAVE area size on success, 0 otherwise.
324+
*/
325+
static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
326+
{
327+
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
328+
u64 xfeatures_found = 0;
329+
u32 xsave_size = 0x240;
330+
int i;
331+
332+
for (i = 0; i < cpuid_table->count; i++) {
333+
const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
334+
335+
if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64))
336+
continue;
337+
if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
338+
continue;
339+
if (xfeatures_found & (BIT_ULL(e->ecx_in)))
340+
continue;
341+
342+
xfeatures_found |= (BIT_ULL(e->ecx_in));
343+
344+
if (compacted)
345+
xsave_size += e->eax;
346+
else
347+
xsave_size = max(xsave_size, e->eax + e->ebx);
348+
}
349+
350+
/*
351+
* Either the guest set unsupported XCR0/XSS bits, or the corresponding
352+
* entries in the CPUID table were not present. This is not a valid
353+
* state to be in.
354+
*/
355+
if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2)))
356+
return 0;
357+
358+
return xsave_size;
359+
}
360+
361+
static bool
362+
snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
363+
{
364+
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
365+
int i;
366+
367+
for (i = 0; i < cpuid_table->count; i++) {
368+
const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
369+
370+
if (e->eax_in != leaf->fn)
371+
continue;
372+
373+
if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn)
374+
continue;
375+
376+
/*
377+
* For 0xD subfunctions 0 and 1, only use the entry corresponding
378+
* to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
379+
* See the comments above snp_cpuid_calc_xsave_size() for more
380+
* details.
381+
*/
382+
if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1))
383+
if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in)
384+
continue;
385+
386+
leaf->eax = e->eax;
387+
leaf->ebx = e->ebx;
388+
leaf->ecx = e->ecx;
389+
leaf->edx = e->edx;
390+
391+
return true;
392+
}
393+
394+
return false;
395+
}
396+
397+
static void snp_cpuid_hv(struct cpuid_leaf *leaf)
398+
{
399+
if (sev_cpuid_hv(leaf))
400+
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
401+
}
402+
403+
static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
404+
{
405+
struct cpuid_leaf leaf_hv = *leaf;
406+
407+
switch (leaf->fn) {
408+
case 0x1:
409+
snp_cpuid_hv(&leaf_hv);
410+
411+
/* initial APIC ID */
412+
leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
413+
/* APIC enabled bit */
414+
leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9));
415+
416+
/* OSXSAVE enabled bit */
417+
if (native_read_cr4() & X86_CR4_OSXSAVE)
418+
leaf->ecx |= BIT(27);
419+
break;
420+
case 0x7:
421+
/* OSPKE enabled bit */
422+
leaf->ecx &= ~BIT(4);
423+
if (native_read_cr4() & X86_CR4_PKE)
424+
leaf->ecx |= BIT(4);
425+
break;
426+
case 0xB:
427+
leaf_hv.subfn = 0;
428+
snp_cpuid_hv(&leaf_hv);
429+
430+
/* extended APIC ID */
431+
leaf->edx = leaf_hv.edx;
432+
break;
433+
case 0xD: {
434+
bool compacted = false;
435+
u64 xcr0 = 1, xss = 0;
436+
u32 xsave_size;
437+
438+
if (leaf->subfn != 0 && leaf->subfn != 1)
439+
return 0;
440+
441+
if (native_read_cr4() & X86_CR4_OSXSAVE)
442+
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
443+
if (leaf->subfn == 1) {
444+
/* Get XSS value if XSAVES is enabled. */
445+
if (leaf->eax & BIT(3)) {
446+
unsigned long lo, hi;
447+
448+
asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
449+
: "c" (MSR_IA32_XSS));
450+
xss = (hi << 32) | lo;
451+
}
452+
453+
/*
454+
* The PPR and APM aren't clear on what size should be
455+
* encoded in 0xD:0x1:EBX when compaction is not enabled
456+
* by either XSAVEC (feature bit 1) or XSAVES (feature
457+
* bit 3) since SNP-capable hardware has these feature
458+
* bits fixed as 1. KVM sets it to 0 in this case, but
459+
* to avoid this becoming an issue it's safer to simply
460+
* treat this as unsupported for SNP guests.
461+
*/
462+
if (!(leaf->eax & (BIT(1) | BIT(3))))
463+
return -EINVAL;
464+
465+
compacted = true;
466+
}
467+
468+
xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted);
469+
if (!xsave_size)
470+
return -EINVAL;
471+
472+
leaf->ebx = xsave_size;
473+
}
474+
break;
475+
case 0x8000001E:
476+
snp_cpuid_hv(&leaf_hv);
477+
478+
/* extended APIC ID */
479+
leaf->eax = leaf_hv.eax;
480+
/* compute ID */
481+
leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0));
482+
/* node ID */
483+
leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0));
484+
break;
485+
default:
486+
/* No fix-ups needed, use values as-is. */
487+
break;
488+
}
489+
490+
return 0;
491+
}
492+
493+
/*
494+
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
495+
* should be treated as fatal by caller.
496+
*/
497+
static int snp_cpuid(struct cpuid_leaf *leaf)
498+
{
499+
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
500+
501+
if (!cpuid_table->count)
502+
return -EOPNOTSUPP;
503+
504+
if (!snp_cpuid_get_validated_func(leaf)) {
505+
/*
506+
* Some hypervisors will avoid keeping track of CPUID entries
507+
* where all values are zero, since they can be handled the
508+
* same as out-of-range values (all-zero). This is useful here
509+
* as well as it allows virtually all guest configurations to
510+
* work using a single SNP CPUID table.
511+
*
512+
* To allow for this, there is a need to distinguish between
513+
* out-of-range entries and in-range zero entries, since the
514+
* CPUID table entries are only a template that may need to be
515+
* augmented with additional values for things like
516+
* CPU-specific information during post-processing. So if it's
517+
* not in the table, set the values to zero. Then, if they are
518+
* within a valid CPUID range, proceed with post-processing
519+
* using zeros as the initial values. Otherwise, skip
520+
* post-processing and just return zeros immediately.
521+
*/
522+
leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
523+
524+
/* Skip post-processing for out-of-range zero leafs. */
525+
if (!(leaf->fn <= cpuid_std_range_max ||
526+
(leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
527+
(leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
528+
return 0;
529+
}
530+
531+
return snp_cpuid_postprocess(leaf);
532+
}
533+
245534
/*
246535
* Boot VC Handler - This is the first VC handler during boot, there is no GHCB
247536
* page yet, so it only supports the MSR based communication with the
@@ -252,16 +541,26 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
252541
unsigned int subfn = lower_bits(regs->cx, 32);
253542
unsigned int fn = lower_bits(regs->ax, 32);
254543
struct cpuid_leaf leaf;
544+
int ret;
255545

256546
/* Only CPUID is supported via MSR protocol */
257547
if (exit_code != SVM_EXIT_CPUID)
258548
goto fail;
259549

260550
leaf.fn = fn;
261551
leaf.subfn = subfn;
552+
553+
ret = snp_cpuid(&leaf);
554+
if (!ret)
555+
goto cpuid_done;
556+
557+
if (ret != -EOPNOTSUPP)
558+
goto fail;
559+
262560
if (sev_cpuid_hv(&leaf))
263561
goto fail;
264562

563+
cpuid_done:
265564
regs->ax = leaf.eax;
266565
regs->bx = leaf.ebx;
267566
regs->cx = leaf.ecx;
@@ -556,12 +855,37 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
556855
return ret;
557856
}
558857

858+
static int vc_handle_cpuid_snp(struct pt_regs *regs)
859+
{
860+
struct cpuid_leaf leaf;
861+
int ret;
862+
863+
leaf.fn = regs->ax;
864+
leaf.subfn = regs->cx;
865+
ret = snp_cpuid(&leaf);
866+
if (!ret) {
867+
regs->ax = leaf.eax;
868+
regs->bx = leaf.ebx;
869+
regs->cx = leaf.ecx;
870+
regs->dx = leaf.edx;
871+
}
872+
873+
return ret;
874+
}
875+
559876
static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
560877
struct es_em_ctxt *ctxt)
561878
{
562879
struct pt_regs *regs = ctxt->regs;
563880
u32 cr4 = native_read_cr4();
564881
enum es_result ret;
882+
int snp_cpuid_ret;
883+
884+
snp_cpuid_ret = vc_handle_cpuid_snp(regs);
885+
if (!snp_cpuid_ret)
886+
return ES_OK;
887+
if (snp_cpuid_ret != -EOPNOTSUPP)
888+
return ES_VMM_ERROR;
565889

566890
ghcb_set_rax(ghcb, regs->ax);
567891
ghcb_set_rcx(ghcb, regs->cx);

0 commit comments

Comments
 (0)