@@ -24,6 +24,36 @@ struct cpuid_leaf {
24
24
u32 edx ;
25
25
};
26
26
27
+ /*
28
+ * Individual entries of the SNP CPUID table, as defined by the SNP
29
+ * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
30
+ */
31
+ struct snp_cpuid_fn {
32
+ u32 eax_in ;
33
+ u32 ecx_in ;
34
+ u64 xcr0_in ;
35
+ u64 xss_in ;
36
+ u32 eax ;
37
+ u32 ebx ;
38
+ u32 ecx ;
39
+ u32 edx ;
40
+ u64 __reserved ;
41
+ } __packed ;
42
+
43
+ /*
44
+ * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
45
+ * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
46
+ * of 64 entries per CPUID table.
47
+ */
48
+ #define SNP_CPUID_COUNT_MAX 64
49
+
50
+ struct snp_cpuid_table {
51
+ u32 count ;
52
+ u32 __reserved1 ;
53
+ u64 __reserved2 ;
54
+ struct snp_cpuid_fn fn [SNP_CPUID_COUNT_MAX ];
55
+ } __packed ;
56
+
27
57
/*
28
58
* Since feature negotiation related variables are set early in the boot
29
59
* process they must reside in the .data section so as not to be zeroed
@@ -33,6 +63,19 @@ struct cpuid_leaf {
33
63
*/
34
64
static u16 ghcb_version __ro_after_init ;
35
65
66
+ /* Copy of the SNP firmware's CPUID page. */
67
+ static struct snp_cpuid_table cpuid_table_copy __ro_after_init ;
68
+
69
+ /*
70
+ * These will be initialized based on CPUID table so that non-present
71
+ * all-zero leaves (for sparse tables) can be differentiated from
72
+ * invalid/out-of-range leaves. This is needed since all-zero leaves
73
+ * still need to be post-processed.
74
+ */
75
+ static u32 cpuid_std_range_max __ro_after_init ;
76
+ static u32 cpuid_hyp_range_max __ro_after_init ;
77
+ static u32 cpuid_ext_range_max __ro_after_init ;
78
+
36
79
static bool __init sev_es_check_cpu_features (void )
37
80
{
38
81
if (!has_cpuflag (X86_FEATURE_RDRAND )) {
@@ -242,6 +285,252 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf)
242
285
return ret ;
243
286
}
244
287
288
+ /*
289
+ * This may be called early while still running on the initial identity
290
+ * mapping. Use RIP-relative addressing to obtain the correct address
291
+ * while running with the initial identity mapping as well as the
292
+ * switch-over to kernel virtual addresses later.
293
+ */
294
+ static const struct snp_cpuid_table * snp_cpuid_get_table (void )
295
+ {
296
+ void * ptr ;
297
+
298
+ asm ("lea cpuid_table_copy(%%rip), %0"
299
+ : "=r" (ptr )
300
+ : "p" (& cpuid_table_copy ));
301
+
302
+ return ptr ;
303
+ }
304
+
305
+ /*
306
+ * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
307
+ * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
308
+ * and 1 based on the corresponding features enabled by a particular
309
+ * combination of XCR0 and XSS registers so that a guest can look up the
310
+ * version corresponding to the features currently enabled in its XCR0/XSS
311
+ * registers. The only values that differ between these versions/table
312
+ * entries is the enabled XSAVE area size advertised via EBX.
313
+ *
314
+ * While hypervisors may choose to make use of this support, it is more
315
+ * robust/secure for a guest to simply find the entry corresponding to the
316
+ * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
317
+ * XSAVE area size using subfunctions 2 through 64, as documented in APM
318
+ * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
319
+ *
320
+ * Since base/legacy XSAVE area size is documented as 0x240, use that value
321
+ * directly rather than relying on the base size in the CPUID table.
322
+ *
323
+ * Return: XSAVE area size on success, 0 otherwise.
324
+ */
325
+ static u32 snp_cpuid_calc_xsave_size (u64 xfeatures_en , bool compacted )
326
+ {
327
+ const struct snp_cpuid_table * cpuid_table = snp_cpuid_get_table ();
328
+ u64 xfeatures_found = 0 ;
329
+ u32 xsave_size = 0x240 ;
330
+ int i ;
331
+
332
+ for (i = 0 ; i < cpuid_table -> count ; i ++ ) {
333
+ const struct snp_cpuid_fn * e = & cpuid_table -> fn [i ];
334
+
335
+ if (!(e -> eax_in == 0xD && e -> ecx_in > 1 && e -> ecx_in < 64 ))
336
+ continue ;
337
+ if (!(xfeatures_en & (BIT_ULL (e -> ecx_in ))))
338
+ continue ;
339
+ if (xfeatures_found & (BIT_ULL (e -> ecx_in )))
340
+ continue ;
341
+
342
+ xfeatures_found |= (BIT_ULL (e -> ecx_in ));
343
+
344
+ if (compacted )
345
+ xsave_size += e -> eax ;
346
+ else
347
+ xsave_size = max (xsave_size , e -> eax + e -> ebx );
348
+ }
349
+
350
+ /*
351
+ * Either the guest set unsupported XCR0/XSS bits, or the corresponding
352
+ * entries in the CPUID table were not present. This is not a valid
353
+ * state to be in.
354
+ */
355
+ if (xfeatures_found != (xfeatures_en & GENMASK_ULL (63 , 2 )))
356
+ return 0 ;
357
+
358
+ return xsave_size ;
359
+ }
360
+
361
+ static bool
362
+ snp_cpuid_get_validated_func (struct cpuid_leaf * leaf )
363
+ {
364
+ const struct snp_cpuid_table * cpuid_table = snp_cpuid_get_table ();
365
+ int i ;
366
+
367
+ for (i = 0 ; i < cpuid_table -> count ; i ++ ) {
368
+ const struct snp_cpuid_fn * e = & cpuid_table -> fn [i ];
369
+
370
+ if (e -> eax_in != leaf -> fn )
371
+ continue ;
372
+
373
+ if (cpuid_function_is_indexed (leaf -> fn ) && e -> ecx_in != leaf -> subfn )
374
+ continue ;
375
+
376
+ /*
377
+ * For 0xD subfunctions 0 and 1, only use the entry corresponding
378
+ * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
379
+ * See the comments above snp_cpuid_calc_xsave_size() for more
380
+ * details.
381
+ */
382
+ if (e -> eax_in == 0xD && (e -> ecx_in == 0 || e -> ecx_in == 1 ))
383
+ if (!(e -> xcr0_in == 1 || e -> xcr0_in == 3 ) || e -> xss_in )
384
+ continue ;
385
+
386
+ leaf -> eax = e -> eax ;
387
+ leaf -> ebx = e -> ebx ;
388
+ leaf -> ecx = e -> ecx ;
389
+ leaf -> edx = e -> edx ;
390
+
391
+ return true;
392
+ }
393
+
394
+ return false;
395
+ }
396
+
397
+ static void snp_cpuid_hv (struct cpuid_leaf * leaf )
398
+ {
399
+ if (sev_cpuid_hv (leaf ))
400
+ sev_es_terminate (SEV_TERM_SET_LINUX , GHCB_TERM_CPUID_HV );
401
+ }
402
+
403
+ static int snp_cpuid_postprocess (struct cpuid_leaf * leaf )
404
+ {
405
+ struct cpuid_leaf leaf_hv = * leaf ;
406
+
407
+ switch (leaf -> fn ) {
408
+ case 0x1 :
409
+ snp_cpuid_hv (& leaf_hv );
410
+
411
+ /* initial APIC ID */
412
+ leaf -> ebx = (leaf_hv .ebx & GENMASK (31 , 24 )) | (leaf -> ebx & GENMASK (23 , 0 ));
413
+ /* APIC enabled bit */
414
+ leaf -> edx = (leaf_hv .edx & BIT (9 )) | (leaf -> edx & ~BIT (9 ));
415
+
416
+ /* OSXSAVE enabled bit */
417
+ if (native_read_cr4 () & X86_CR4_OSXSAVE )
418
+ leaf -> ecx |= BIT (27 );
419
+ break ;
420
+ case 0x7 :
421
+ /* OSPKE enabled bit */
422
+ leaf -> ecx &= ~BIT (4 );
423
+ if (native_read_cr4 () & X86_CR4_PKE )
424
+ leaf -> ecx |= BIT (4 );
425
+ break ;
426
+ case 0xB :
427
+ leaf_hv .subfn = 0 ;
428
+ snp_cpuid_hv (& leaf_hv );
429
+
430
+ /* extended APIC ID */
431
+ leaf -> edx = leaf_hv .edx ;
432
+ break ;
433
+ case 0xD : {
434
+ bool compacted = false;
435
+ u64 xcr0 = 1 , xss = 0 ;
436
+ u32 xsave_size ;
437
+
438
+ if (leaf -> subfn != 0 && leaf -> subfn != 1 )
439
+ return 0 ;
440
+
441
+ if (native_read_cr4 () & X86_CR4_OSXSAVE )
442
+ xcr0 = xgetbv (XCR_XFEATURE_ENABLED_MASK );
443
+ if (leaf -> subfn == 1 ) {
444
+ /* Get XSS value if XSAVES is enabled. */
445
+ if (leaf -> eax & BIT (3 )) {
446
+ unsigned long lo , hi ;
447
+
448
+ asm volatile ("rdmsr" : "=a" (lo ), "=d" (hi )
449
+ : "c" (MSR_IA32_XSS ));
450
+ xss = (hi << 32 ) | lo ;
451
+ }
452
+
453
+ /*
454
+ * The PPR and APM aren't clear on what size should be
455
+ * encoded in 0xD:0x1:EBX when compaction is not enabled
456
+ * by either XSAVEC (feature bit 1) or XSAVES (feature
457
+ * bit 3) since SNP-capable hardware has these feature
458
+ * bits fixed as 1. KVM sets it to 0 in this case, but
459
+ * to avoid this becoming an issue it's safer to simply
460
+ * treat this as unsupported for SNP guests.
461
+ */
462
+ if (!(leaf -> eax & (BIT (1 ) | BIT (3 ))))
463
+ return - EINVAL ;
464
+
465
+ compacted = true;
466
+ }
467
+
468
+ xsave_size = snp_cpuid_calc_xsave_size (xcr0 | xss , compacted );
469
+ if (!xsave_size )
470
+ return - EINVAL ;
471
+
472
+ leaf -> ebx = xsave_size ;
473
+ }
474
+ break ;
475
+ case 0x8000001E :
476
+ snp_cpuid_hv (& leaf_hv );
477
+
478
+ /* extended APIC ID */
479
+ leaf -> eax = leaf_hv .eax ;
480
+ /* compute ID */
481
+ leaf -> ebx = (leaf -> ebx & GENMASK (31 , 8 )) | (leaf_hv .ebx & GENMASK (7 , 0 ));
482
+ /* node ID */
483
+ leaf -> ecx = (leaf -> ecx & GENMASK (31 , 8 )) | (leaf_hv .ecx & GENMASK (7 , 0 ));
484
+ break ;
485
+ default :
486
+ /* No fix-ups needed, use values as-is. */
487
+ break ;
488
+ }
489
+
490
+ return 0 ;
491
+ }
492
+
493
+ /*
494
+ * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
495
+ * should be treated as fatal by caller.
496
+ */
497
+ static int snp_cpuid (struct cpuid_leaf * leaf )
498
+ {
499
+ const struct snp_cpuid_table * cpuid_table = snp_cpuid_get_table ();
500
+
501
+ if (!cpuid_table -> count )
502
+ return - EOPNOTSUPP ;
503
+
504
+ if (!snp_cpuid_get_validated_func (leaf )) {
505
+ /*
506
+ * Some hypervisors will avoid keeping track of CPUID entries
507
+ * where all values are zero, since they can be handled the
508
+ * same as out-of-range values (all-zero). This is useful here
509
+ * as well as it allows virtually all guest configurations to
510
+ * work using a single SNP CPUID table.
511
+ *
512
+ * To allow for this, there is a need to distinguish between
513
+ * out-of-range entries and in-range zero entries, since the
514
+ * CPUID table entries are only a template that may need to be
515
+ * augmented with additional values for things like
516
+ * CPU-specific information during post-processing. So if it's
517
+ * not in the table, set the values to zero. Then, if they are
518
+ * within a valid CPUID range, proceed with post-processing
519
+ * using zeros as the initial values. Otherwise, skip
520
+ * post-processing and just return zeros immediately.
521
+ */
522
+ leaf -> eax = leaf -> ebx = leaf -> ecx = leaf -> edx = 0 ;
523
+
524
+ /* Skip post-processing for out-of-range zero leafs. */
525
+ if (!(leaf -> fn <= cpuid_std_range_max ||
526
+ (leaf -> fn >= 0x40000000 && leaf -> fn <= cpuid_hyp_range_max ) ||
527
+ (leaf -> fn >= 0x80000000 && leaf -> fn <= cpuid_ext_range_max )))
528
+ return 0 ;
529
+ }
530
+
531
+ return snp_cpuid_postprocess (leaf );
532
+ }
533
+
245
534
/*
246
535
* Boot VC Handler - This is the first VC handler during boot, there is no GHCB
247
536
* page yet, so it only supports the MSR based communication with the
@@ -252,16 +541,26 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
252
541
unsigned int subfn = lower_bits (regs -> cx , 32 );
253
542
unsigned int fn = lower_bits (regs -> ax , 32 );
254
543
struct cpuid_leaf leaf ;
544
+ int ret ;
255
545
256
546
/* Only CPUID is supported via MSR protocol */
257
547
if (exit_code != SVM_EXIT_CPUID )
258
548
goto fail ;
259
549
260
550
leaf .fn = fn ;
261
551
leaf .subfn = subfn ;
552
+
553
+ ret = snp_cpuid (& leaf );
554
+ if (!ret )
555
+ goto cpuid_done ;
556
+
557
+ if (ret != - EOPNOTSUPP )
558
+ goto fail ;
559
+
262
560
if (sev_cpuid_hv (& leaf ))
263
561
goto fail ;
264
562
563
+ cpuid_done :
265
564
regs -> ax = leaf .eax ;
266
565
regs -> bx = leaf .ebx ;
267
566
regs -> cx = leaf .ecx ;
@@ -556,12 +855,37 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
556
855
return ret ;
557
856
}
558
857
858
+ static int vc_handle_cpuid_snp (struct pt_regs * regs )
859
+ {
860
+ struct cpuid_leaf leaf ;
861
+ int ret ;
862
+
863
+ leaf .fn = regs -> ax ;
864
+ leaf .subfn = regs -> cx ;
865
+ ret = snp_cpuid (& leaf );
866
+ if (!ret ) {
867
+ regs -> ax = leaf .eax ;
868
+ regs -> bx = leaf .ebx ;
869
+ regs -> cx = leaf .ecx ;
870
+ regs -> dx = leaf .edx ;
871
+ }
872
+
873
+ return ret ;
874
+ }
875
+
559
876
static enum es_result vc_handle_cpuid (struct ghcb * ghcb ,
560
877
struct es_em_ctxt * ctxt )
561
878
{
562
879
struct pt_regs * regs = ctxt -> regs ;
563
880
u32 cr4 = native_read_cr4 ();
564
881
enum es_result ret ;
882
+ int snp_cpuid_ret ;
883
+
884
+ snp_cpuid_ret = vc_handle_cpuid_snp (regs );
885
+ if (!snp_cpuid_ret )
886
+ return ES_OK ;
887
+ if (snp_cpuid_ret != - EOPNOTSUPP )
888
+ return ES_VMM_ERROR ;
565
889
566
890
ghcb_set_rax (ghcb , regs -> ax );
567
891
ghcb_set_rcx (ghcb , regs -> cx );
0 commit comments