Skip to content

Commit 76fa497

Browse files
aikmpe
authored andcommitted
KVM: PPC: Check if IOMMU page is contained in the pinned physical page
A VM which has: - a DMA capable device passed through to it (eg. network card); - running a malicious kernel that ignores H_PUT_TCE failure; - capability of using IOMMU pages bigger that physical pages can create an IOMMU mapping that exposes (for example) 16MB of the host physical memory to the device when only 64K was allocated to the VM. The remaining 16MB - 64K will be some other content of host memory, possibly including pages of the VM, but also pages of host kernel memory, host programs or other VMs. The attacking VM does not control the location of the page it can map, and is only allowed to map as many pages as it has pages of RAM. We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that an IOMMU page is contained in the physical page so the PCI hardware won't get access to unassigned host memory; however this check is missing in the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and did not hit this yet as the very first time when the mapping happens we do not have tbl::it_userspace allocated yet and fall back to the userspace which in turn calls VFIO IOMMU driver, this fails and the guest does not retry, This stores the smallest preregistered page size in the preregistered region descriptor and changes the mm_iommu_xxx API to check this against the IOMMU page size. This calculates maximum page size as a minimum of the natural region alignment and compound page size. For the page shift this uses the shift returned by find_linux_pte() which indicates how the page is mapped to the current userspace - if the page is huge and this is not a zero, then it is a leaf pte and the page is mapped within the range. Fixes: 121f80b ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Cc: [email protected] # v4.12+ Signed-off-by: Alexey Kardashevskiy <[email protected]> Reviewed-by: David Gibson <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent 1463edc commit 76fa497

File tree

5 files changed

+43
-8
lines changed

5 files changed

+43
-8
lines changed

arch/powerpc/include/asm/mmu_context.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
3535
extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
3636
unsigned long ua, unsigned long entries);
3737
extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
38-
unsigned long ua, unsigned long *hpa);
38+
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
3939
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
40-
unsigned long ua, unsigned long *hpa);
40+
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
4141
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
4242
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
4343
#endif

arch/powerpc/kvm/book3s_64_vio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
449449
/* This only handles v2 IOMMU type, v1 is handled via ioctl() */
450450
return H_TOO_HARD;
451451

452-
if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa)))
452+
if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
453453
return H_HARDWARE;
454454

455455
if (mm_iommu_mapped_inc(mem))

arch/powerpc/kvm/book3s_64_vio_hv.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,8 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
279279
if (!mem)
280280
return H_TOO_HARD;
281281

282-
if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
282+
if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
283+
&hpa)))
283284
return H_HARDWARE;
284285

285286
pua = (void *) vmalloc_to_phys(pua);
@@ -469,7 +470,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
469470

470471
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
471472
if (mem)
472-
prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0;
473+
prereg = mm_iommu_ua_to_hpa_rm(mem, ua,
474+
IOMMU_PAGE_SHIFT_4K, &tces) == 0;
473475
}
474476

475477
if (!prereg) {

arch/powerpc/mm/mmu_context_iommu.c

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/hugetlb.h>
2020
#include <linux/swap.h>
2121
#include <asm/mmu_context.h>
22+
#include <asm/pte-walk.h>
2223

2324
static DEFINE_MUTEX(mem_list_mutex);
2425

@@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t {
2728
struct rcu_head rcu;
2829
unsigned long used;
2930
atomic64_t mapped;
31+
unsigned int pageshift;
3032
u64 ua; /* userspace address */
3133
u64 entries; /* number of entries in hpas[] */
3234
u64 *hpas; /* vmalloc'ed */
@@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
125127
{
126128
struct mm_iommu_table_group_mem_t *mem;
127129
long i, j, ret = 0, locked_entries = 0;
130+
unsigned int pageshift;
131+
unsigned long flags;
128132
struct page *page = NULL;
129133

130134
mutex_lock(&mem_list_mutex);
@@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
159163
goto unlock_exit;
160164
}
161165

166+
/*
167+
* For a starting point for a maximum page size calculation
168+
* we use @ua and @entries natural alignment to allow IOMMU pages
169+
* smaller than huge pages but still bigger than PAGE_SIZE.
170+
*/
171+
mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
162172
mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
163173
if (!mem->hpas) {
164174
kfree(mem);
@@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
199209
}
200210
}
201211
populate:
212+
pageshift = PAGE_SHIFT;
213+
if (PageCompound(page)) {
214+
pte_t *pte;
215+
struct page *head = compound_head(page);
216+
unsigned int compshift = compound_order(head);
217+
218+
local_irq_save(flags); /* disables as well */
219+
pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift);
220+
local_irq_restore(flags);
221+
222+
/* Double check it is still the same pinned page */
223+
if (pte && pte_page(*pte) == head &&
224+
pageshift == compshift)
225+
pageshift = max_t(unsigned int, pageshift,
226+
PAGE_SHIFT);
227+
}
228+
mem->pageshift = min(mem->pageshift, pageshift);
202229
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
203230
}
204231

@@ -349,22 +376,25 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
349376
EXPORT_SYMBOL_GPL(mm_iommu_find);
350377

351378
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
352-
unsigned long ua, unsigned long *hpa)
379+
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
353380
{
354381
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
355382
u64 *va = &mem->hpas[entry];
356383

357384
if (entry >= mem->entries)
358385
return -EFAULT;
359386

387+
if (pageshift > mem->pageshift)
388+
return -EFAULT;
389+
360390
*hpa = *va | (ua & ~PAGE_MASK);
361391

362392
return 0;
363393
}
364394
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
365395

366396
long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
367-
unsigned long ua, unsigned long *hpa)
397+
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
368398
{
369399
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
370400
void *va = &mem->hpas[entry];
@@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
373403
if (entry >= mem->entries)
374404
return -EFAULT;
375405

406+
if (pageshift > mem->pageshift)
407+
return -EFAULT;
408+
376409
pa = (void *) vmalloc_to_phys(va);
377410
if (!pa)
378411
return -EFAULT;

drivers/vfio/vfio_iommu_spapr_tce.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
467467
if (!mem)
468468
return -EINVAL;
469469

470-
ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
470+
ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
471471
if (ret)
472472
return -EINVAL;
473473

0 commit comments

Comments
 (0)