Skip to content

Commit 1728ab5

Browse files
jarkkojssuryasaimadhu
authored andcommitted
x86/sgx: Add a page reclaimer
Just like normal RAM, there is a limited amount of enclave memory available and overcommitting it is a very valuable tool to reduce resource use. Introduce a simple reclaim mechanism for enclave pages. In contrast to normal page reclaim, the kernel cannot directly access enclave memory. To get around this, the SGX architecture provides a set of functions to help. Among other things, these functions copy enclave memory to and from normal memory, encrypting it and protecting its integrity in the process. Implement a page reclaimer by using these functions. Picks victim pages in LRU fashion from all the enclaves running in the system. A new kernel thread (ksgxswapd) reclaims pages in the background based on watermarks, similar to normal kswapd. All enclave pages can be reclaimed, architecturally. But, there are some limits to this, such as the special SECS metadata page which must be reclaimed last. The page version array (used to mitigate replaying old reclaimed pages) is also architecturally reclaimable, but not yet implemented. The end result is that the vast majority of enclave pages are currently reclaimable. Co-developed-by: Sean Christopherson <[email protected]> Signed-off-by: Sean Christopherson <[email protected]> Signed-off-by: Jarkko Sakkinen <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Acked-by: Jethro Beekman <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 2adcba7 commit 1728ab5

File tree

6 files changed

+1134
-27
lines changed

6 files changed

+1134
-27
lines changed

arch/x86/kernel/cpu/sgx/driver.c

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,24 @@ u32 sgx_misc_reserved_mask;
1717
static int sgx_open(struct inode *inode, struct file *file)
1818
{
1919
struct sgx_encl *encl;
20+
int ret;
2021

2122
encl = kzalloc(sizeof(*encl), GFP_KERNEL);
2223
if (!encl)
2324
return -ENOMEM;
2425

26+
kref_init(&encl->refcount);
2527
xa_init(&encl->page_array);
2628
mutex_init(&encl->lock);
29+
INIT_LIST_HEAD(&encl->va_pages);
30+
INIT_LIST_HEAD(&encl->mm_list);
31+
spin_lock_init(&encl->mm_lock);
32+
33+
ret = init_srcu_struct(&encl->srcu);
34+
if (ret) {
35+
kfree(encl);
36+
return ret;
37+
}
2738

2839
file->private_data = encl;
2940

@@ -33,31 +44,37 @@ static int sgx_open(struct inode *inode, struct file *file)
3344
static int sgx_release(struct inode *inode, struct file *file)
3445
{
3546
struct sgx_encl *encl = file->private_data;
36-
struct sgx_encl_page *entry;
37-
unsigned long index;
38-
39-
xa_for_each(&encl->page_array, index, entry) {
40-
if (entry->epc_page) {
41-
sgx_free_epc_page(entry->epc_page);
42-
encl->secs_child_cnt--;
43-
entry->epc_page = NULL;
47+
struct sgx_encl_mm *encl_mm;
48+
49+
/*
50+
* Drain the remaining mm_list entries. At this point the list contains
51+
* entries for processes, which have closed the enclave file but have
52+
* not exited yet. The processes, which have exited, are gone from the
53+
* list by sgx_mmu_notifier_release().
54+
*/
55+
for ( ; ; ) {
56+
spin_lock(&encl->mm_lock);
57+
58+
if (list_empty(&encl->mm_list)) {
59+
encl_mm = NULL;
60+
} else {
61+
encl_mm = list_first_entry(&encl->mm_list,
62+
struct sgx_encl_mm, list);
63+
list_del_rcu(&encl_mm->list);
4464
}
4565

46-
kfree(entry);
47-
}
66+
spin_unlock(&encl->mm_lock);
4867

49-
xa_destroy(&encl->page_array);
68+
/* The enclave is no longer mapped by any mm. */
69+
if (!encl_mm)
70+
break;
5071

51-
if (!encl->secs_child_cnt && encl->secs.epc_page) {
52-
sgx_free_epc_page(encl->secs.epc_page);
53-
encl->secs.epc_page = NULL;
72+
synchronize_srcu(&encl->srcu);
73+
mmu_notifier_unregister(&encl_mm->mmu_notifier, encl_mm->mm);
74+
kfree(encl_mm);
5475
}
5576

56-
/* Detect EPC page leaks. */
57-
WARN_ON_ONCE(encl->secs_child_cnt);
58-
WARN_ON_ONCE(encl->secs.epc_page);
59-
60-
kfree(encl);
77+
kref_put(&encl->refcount, sgx_encl_release);
6178
return 0;
6279
}
6380

@@ -70,6 +87,10 @@ static int sgx_mmap(struct file *file, struct vm_area_struct *vma)
7087
if (ret)
7188
return ret;
7289

90+
ret = sgx_encl_mm_add(encl, vma->vm_mm);
91+
if (ret)
92+
return ret;
93+
7394
vma->vm_ops = &sgx_vm_ops;
7495
vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
7596
vma->vm_private_data = encl;

0 commit comments

Comments
 (0)