Skip to content

Commit 901ddbb

Browse files
jarkkojssuryasaimadhu
authored andcommitted
x86/sgx: Add a basic NUMA allocation scheme to sgx_alloc_epc_page()
Background ========== SGX enclave memory is enumerated by the processor in contiguous physical ranges called Enclave Page Cache (EPC) sections. Currently, there is a free list per section, but allocations simply target the lowest-numbered sections. This is functional, but has no NUMA awareness. Fortunately, EPC sections are covered by entries in the ACPI SRAT table. These entries allow each EPC section to be associated with a NUMA node, just like normal RAM. Solution ======== Implement a NUMA-aware enclave page allocator. Mirror the buddy allocator and maintain a list of enclave pages for each NUMA node. Attempt to allocate enclave memory first from local nodes, then fall back to other nodes. Note that the fallback is not as sophisticated as the buddy allocator and is itself not aware of NUMA distances. When a node's free list is empty, it searches for the next-highest node with enclave pages (and will wrap if necessary). This could be improved in the future. Other ===== NUMA_KEEP_MEMINFO dependency is required for phys_to_target_node(). [ Kai Huang: Do not return NULL from __sgx_alloc_epc_page() because callers do not expect that and that leads to a NULL ptr deref. ] [ dhansen: Fix an uninitialized 'nid' variable in __sgx_alloc_epc_page() as Reported-by: kernel test robot <[email protected]> to avoid any potential allocations from the wrong NUMA node or even premature allocation failures. ] Signed-off-by: Jarkko Sakkinen <[email protected]> Signed-off-by: Kai Huang <[email protected]> Signed-off-by: Dave Hansen <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Acked-by: Dave Hansen <[email protected]> Link: https://lore.kernel.org/lkml/158188326978.894464.217282995221175417.stgit@dwillia2-desk3.amr.corp.intel.com/ Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected]
1 parent 51ab30e commit 901ddbb

File tree

3 files changed

+88
-48
lines changed

3 files changed

+88
-48
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1931,6 +1931,7 @@ config X86_SGX
19311931
depends on CRYPTO_SHA256=y
19321932
select SRCU
19331933
select MMU_NOTIFIER
1934+
select NUMA_KEEP_MEMINFO if NUMA
19341935
help
19351936
Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
19361937
that can be used by applications to set aside private regions of code

arch/x86/kernel/cpu/sgx/main.c

Lines changed: 77 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,21 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
2323
* with sgx_reclaimer_lock acquired.
2424
*/
2525
static LIST_HEAD(sgx_active_page_list);
26-
2726
static DEFINE_SPINLOCK(sgx_reclaimer_lock);
2827

28+
/* The free page list lock protected variables prepend the lock. */
29+
static unsigned long sgx_nr_free_pages;
30+
31+
/* Nodes with one or more EPC sections. */
32+
static nodemask_t sgx_numa_mask;
33+
34+
/*
35+
* Array with one list_head for each possible NUMA node. Each
36+
* list contains all the sgx_epc_section's which are on that
37+
* node.
38+
*/
39+
static struct sgx_numa_node *sgx_numa_nodes;
40+
2941
static LIST_HEAD(sgx_dirty_page_list);
3042

3143
/*
@@ -312,6 +324,7 @@ static void sgx_reclaim_pages(void)
312324
struct sgx_epc_section *section;
313325
struct sgx_encl_page *encl_page;
314326
struct sgx_epc_page *epc_page;
327+
struct sgx_numa_node *node;
315328
pgoff_t page_index;
316329
int cnt = 0;
317330
int ret;
@@ -383,28 +396,18 @@ static void sgx_reclaim_pages(void)
383396
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
384397

385398
section = &sgx_epc_sections[epc_page->section];
386-
spin_lock(&section->lock);
387-
list_add_tail(&epc_page->list, &section->page_list);
388-
section->free_cnt++;
389-
spin_unlock(&section->lock);
390-
}
391-
}
392-
393-
static unsigned long sgx_nr_free_pages(void)
394-
{
395-
unsigned long cnt = 0;
396-
int i;
397-
398-
for (i = 0; i < sgx_nr_epc_sections; i++)
399-
cnt += sgx_epc_sections[i].free_cnt;
399+
node = section->node;
400400

401-
return cnt;
401+
spin_lock(&node->lock);
402+
list_add_tail(&epc_page->list, &node->free_page_list);
403+
sgx_nr_free_pages++;
404+
spin_unlock(&node->lock);
405+
}
402406
}
403407

404408
static bool sgx_should_reclaim(unsigned long watermark)
405409
{
406-
return sgx_nr_free_pages() < watermark &&
407-
!list_empty(&sgx_active_page_list);
410+
return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
408411
}
409412

410413
static int ksgxd(void *p)
@@ -451,45 +454,56 @@ static bool __init sgx_page_reclaimer_init(void)
451454
return true;
452455
}
453456

454-
static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_section *section)
457+
static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
455458
{
456-
struct sgx_epc_page *page;
459+
struct sgx_numa_node *node = &sgx_numa_nodes[nid];
460+
struct sgx_epc_page *page = NULL;
457461

458-
spin_lock(&section->lock);
462+
spin_lock(&node->lock);
459463

460-
if (list_empty(&section->page_list)) {
461-
spin_unlock(&section->lock);
464+
if (list_empty(&node->free_page_list)) {
465+
spin_unlock(&node->lock);
462466
return NULL;
463467
}
464468

465-
page = list_first_entry(&section->page_list, struct sgx_epc_page, list);
469+
page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
466470
list_del_init(&page->list);
467-
section->free_cnt--;
471+
sgx_nr_free_pages--;
472+
473+
spin_unlock(&node->lock);
468474

469-
spin_unlock(&section->lock);
470475
return page;
471476
}
472477

473478
/**
474479
* __sgx_alloc_epc_page() - Allocate an EPC page
475480
*
476-
* Iterate through EPC sections and borrow a free EPC page to the caller. When a
477-
* page is no longer needed it must be released with sgx_free_epc_page().
481+
* Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
482+
* from the NUMA node, where the caller is executing.
478483
*
479484
* Return:
480-
* an EPC page,
481-
* -errno on error
485+
* - an EPC page: A borrowed EPC pages were available.
486+
* - NULL: Out of EPC pages.
482487
*/
483488
struct sgx_epc_page *__sgx_alloc_epc_page(void)
484489
{
485-
struct sgx_epc_section *section;
486490
struct sgx_epc_page *page;
487-
int i;
491+
int nid_of_current = numa_node_id();
492+
int nid = nid_of_current;
488493

489-
for (i = 0; i < sgx_nr_epc_sections; i++) {
490-
section = &sgx_epc_sections[i];
494+
if (node_isset(nid_of_current, sgx_numa_mask)) {
495+
page = __sgx_alloc_epc_page_from_node(nid_of_current);
496+
if (page)
497+
return page;
498+
}
499+
500+
/* Fall back to the non-local NUMA nodes: */
501+
while (true) {
502+
nid = next_node_in(nid, sgx_numa_mask);
503+
if (nid == nid_of_current)
504+
break;
491505

492-
page = __sgx_alloc_epc_page_from_section(section);
506+
page = __sgx_alloc_epc_page_from_node(nid);
493507
if (page)
494508
return page;
495509
}
@@ -600,6 +614,7 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
600614
void sgx_free_epc_page(struct sgx_epc_page *page)
601615
{
602616
struct sgx_epc_section *section = &sgx_epc_sections[page->section];
617+
struct sgx_numa_node *node = section->node;
603618
int ret;
604619

605620
WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
@@ -608,10 +623,12 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
608623
if (WARN_ONCE(ret, "EREMOVE returned %d (0x%x)", ret, ret))
609624
return;
610625

611-
spin_lock(&section->lock);
612-
list_add_tail(&page->list, &section->page_list);
613-
section->free_cnt++;
614-
spin_unlock(&section->lock);
626+
spin_lock(&node->lock);
627+
628+
list_add_tail(&page->list, &node->free_page_list);
629+
sgx_nr_free_pages++;
630+
631+
spin_unlock(&node->lock);
615632
}
616633

617634
static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
@@ -632,8 +649,6 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
632649
}
633650

634651
section->phys_addr = phys_addr;
635-
spin_lock_init(&section->lock);
636-
INIT_LIST_HEAD(&section->page_list);
637652

638653
for (i = 0; i < nr_pages; i++) {
639654
section->pages[i].section = index;
@@ -642,7 +657,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
642657
list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
643658
}
644659

645-
section->free_cnt = nr_pages;
660+
sgx_nr_free_pages += nr_pages;
646661
return true;
647662
}
648663

@@ -661,8 +676,13 @@ static bool __init sgx_page_cache_init(void)
661676
{
662677
u32 eax, ebx, ecx, edx, type;
663678
u64 pa, size;
679+
int nid;
664680
int i;
665681

682+
sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL);
683+
if (!sgx_numa_nodes)
684+
return false;
685+
666686
for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
667687
cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx);
668688

@@ -685,6 +705,21 @@ static bool __init sgx_page_cache_init(void)
685705
break;
686706
}
687707

708+
nid = numa_map_to_online_node(phys_to_target_node(pa));
709+
if (nid == NUMA_NO_NODE) {
710+
/* The physical address is already printed above. */
711+
pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n");
712+
nid = 0;
713+
}
714+
715+
if (!node_isset(nid, sgx_numa_mask)) {
716+
spin_lock_init(&sgx_numa_nodes[nid].lock);
717+
INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
718+
node_set(nid, sgx_numa_mask);
719+
}
720+
721+
sgx_epc_sections[i].node = &sgx_numa_nodes[nid];
722+
688723
sgx_nr_epc_sections++;
689724
}
690725

arch/x86/kernel/cpu/sgx/sgx.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,26 @@ struct sgx_epc_page {
2929
struct list_head list;
3030
};
3131

32+
/*
33+
* Contains the tracking data for NUMA nodes having EPC pages. Most importantly,
34+
* the free page list local to the node is stored here.
35+
*/
36+
struct sgx_numa_node {
37+
struct list_head free_page_list;
38+
spinlock_t lock;
39+
};
40+
3241
/*
3342
* The firmware can define multiple chunks of EPC to the different areas of the
3443
* physical memory e.g. for memory areas of the each node. This structure is
3544
* used to store EPC pages for one EPC section and virtual memory area where
3645
* the pages have been mapped.
37-
*
38-
* 'lock' must be held before accessing 'page_list' or 'free_cnt'.
3946
*/
4047
struct sgx_epc_section {
4148
unsigned long phys_addr;
4249
void *virt_addr;
4350
struct sgx_epc_page *pages;
44-
45-
spinlock_t lock;
46-
struct list_head page_list;
47-
unsigned long free_cnt;
51+
struct sgx_numa_node *node;
4852
};
4953

5054
extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];

0 commit comments

Comments
 (0)