Skip to content

Commit e1a1ef8

Browse files
aikpaulusmack
authored andcommitted
KVM: PPC: Book3S: Allocate guest TCEs on demand too
We already allocate hardware TCE tables in multiple levels and skip intermediate levels when we can, now it is a turn of the KVM TCE tables. Thankfully these are allocated already in 2 levels. This moves the table's last level allocation from the creating helper to kvmppc_tce_put() and kvm_spapr_tce_fault(). Since such allocation cannot be done in real mode, this creates a virtual mode version of kvmppc_tce_put() which handles allocations. This adds kvmppc_rm_ioba_validate() to do an additional test if the consequent kvmppc_tce_put() needs a page which has not been allocated; if this is the case, we bail out to virtual mode handlers. The allocations are protected by a new mutex as kvm->lock is not suitable for the task because the fault handler is called with the mmap_sem held but kvmhv_setup_mmu() locks kvm->lock and mmap_sem in the reverse order. Signed-off-by: Alexey Kardashevskiy <[email protected]> Signed-off-by: Paul Mackerras <[email protected]>
1 parent 2001825 commit e1a1ef8

File tree

4 files changed

+110
-33
lines changed

4 files changed

+110
-33
lines changed

arch/powerpc/include/asm/kvm_host.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
201201
struct kref kref;
202202
};
203203

204+
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
205+
204206
struct kvmppc_spapr_tce_table {
205207
struct list_head list;
206208
struct kvm *kvm;
@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
210212
u64 offset; /* in pages */
211213
u64 size; /* window size in pages */
212214
struct list_head iommu_tables;
215+
struct mutex alloc_lock;
213216
struct page *pages[0];
214217
};
215218

arch/powerpc/include/asm/kvm_ppc.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
197197
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
198198
(stt)->size, (ioba), (npages)) ? \
199199
H_PARAMETER : H_SUCCESS)
200-
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
201-
unsigned long idx, unsigned long tce);
202200
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
203201
unsigned long ioba, unsigned long tce);
204202
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,

arch/powerpc/kvm/book3s_64_vio.c

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
228228
unsigned long i, npages = kvmppc_tce_pages(stt->size);
229229

230230
for (i = 0; i < npages; i++)
231-
__free_page(stt->pages[i]);
231+
if (stt->pages[i])
232+
__free_page(stt->pages[i]);
232233

233234
kfree(stt);
234235
}
235236

237+
static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
238+
unsigned long sttpage)
239+
{
240+
struct page *page = stt->pages[sttpage];
241+
242+
if (page)
243+
return page;
244+
245+
mutex_lock(&stt->alloc_lock);
246+
page = stt->pages[sttpage];
247+
if (!page) {
248+
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
249+
WARN_ON_ONCE(!page);
250+
if (page)
251+
stt->pages[sttpage] = page;
252+
}
253+
mutex_unlock(&stt->alloc_lock);
254+
255+
return page;
256+
}
257+
236258
static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
237259
{
238260
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
241263
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
242264
return VM_FAULT_SIGBUS;
243265

244-
page = stt->pages[vmf->pgoff];
266+
page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
267+
if (!page)
268+
return VM_FAULT_OOM;
269+
245270
get_page(page);
246271
vmf->page = page;
247272
return 0;
@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
296321
struct kvmppc_spapr_tce_table *siter;
297322
unsigned long npages, size = args->size;
298323
int ret = -ENOMEM;
299-
int i;
300324

301325
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
302326
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
318342
stt->offset = args->offset;
319343
stt->size = size;
320344
stt->kvm = kvm;
345+
mutex_init(&stt->alloc_lock);
321346
INIT_LIST_HEAD_RCU(&stt->iommu_tables);
322347

323-
for (i = 0; i < npages; i++) {
324-
stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
325-
if (!stt->pages[i])
326-
goto fail;
327-
}
328-
329348
mutex_lock(&kvm->lock);
330349

331350
/* Check this LIOBN hasn't been previously allocated */
@@ -352,11 +371,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
352371
if (ret >= 0)
353372
return ret;
354373

355-
fail:
356-
for (i = 0; i < npages; i++)
357-
if (stt->pages[i])
358-
__free_page(stt->pages[i]);
359-
360374
kfree(stt);
361375
fail_acct:
362376
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
@@ -413,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
413427
return H_SUCCESS;
414428
}
415429

430+
/*
431+
* Handles TCE requests for emulated devices.
432+
* Puts guest TCE values to the table and expects user space to convert them.
433+
* Cannot fail so kvmppc_tce_validate must be called before it.
434+
*/
435+
static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
436+
unsigned long idx, unsigned long tce)
437+
{
438+
struct page *page;
439+
u64 *tbl;
440+
unsigned long sttpage;
441+
442+
idx -= stt->offset;
443+
sttpage = idx / TCES_PER_PAGE;
444+
page = stt->pages[sttpage];
445+
446+
if (!page) {
447+
/* We allow any TCE, not just with read|write permissions */
448+
if (!tce)
449+
return;
450+
451+
page = kvm_spapr_get_tce_page(stt, sttpage);
452+
if (!page)
453+
return;
454+
}
455+
tbl = page_to_virt(page);
456+
457+
tbl[idx % TCES_PER_PAGE] = tce;
458+
}
459+
416460
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
417461
unsigned long entry)
418462
{

arch/powerpc/kvm/book3s_64_vio_hv.c

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,6 @@
6666

6767
#endif
6868

69-
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
70-
7169
/*
7270
* Finds a TCE table descriptor by LIOBN.
7371
*
@@ -148,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
148146

149147
return H_SUCCESS;
150148
}
151-
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
152149

153150
/* Note on the use of page_address() in real mode,
154151
*
@@ -180,27 +177,58 @@ static u64 *kvmppc_page_address(struct page *page)
180177
/*
181178
* Handles TCE requests for emulated devices.
182179
* Puts guest TCE values to the table and expects user space to convert them.
183-
* Called in both real and virtual modes.
184-
* Cannot fail so kvmppc_tce_validate must be called before it.
185-
*
186-
* WARNING: This will be called in real-mode on HV KVM and virtual
187-
* mode on PR KVM
180+
* Cannot fail so kvmppc_rm_tce_validate must be called before it.
188181
*/
189-
void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
182+
static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
190183
unsigned long idx, unsigned long tce)
191184
{
192185
struct page *page;
193186
u64 *tbl;
194187

195188
idx -= stt->offset;
196189
page = stt->pages[idx / TCES_PER_PAGE];
190+
/*
191+
* page must not be NULL in real mode,
192+
* kvmppc_rm_ioba_validate() must have taken care of this.
193+
*/
194+
WARN_ON_ONCE_RM(!page);
197195
tbl = kvmppc_page_address(page);
198196

199197
tbl[idx % TCES_PER_PAGE] = tce;
200198
}
201-
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
202199

203-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
200+
/*
201+
* TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
202+
* in real mode.
203+
* Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
204+
* allocated or not required (when clearing a tce entry).
205+
*/
206+
static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
207+
unsigned long ioba, unsigned long npages, bool clearing)
208+
{
209+
unsigned long i, idx, sttpage, sttpages;
210+
unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);
211+
212+
if (ret)
213+
return ret;
214+
/*
215+
* clearing==true says kvmppc_rm_tce_put won't be allocating pages
216+
* for empty tces.
217+
*/
218+
if (clearing)
219+
return H_SUCCESS;
220+
221+
idx = (ioba >> stt->page_shift) - stt->offset;
222+
sttpage = idx / TCES_PER_PAGE;
223+
sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
224+
TCES_PER_PAGE;
225+
for (i = sttpage; i < sttpage + sttpages; ++i)
226+
if (!stt->pages[i])
227+
return H_TOO_HARD;
228+
229+
return H_SUCCESS;
230+
}
231+
204232
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
205233
unsigned long entry, unsigned long *hpa,
206234
enum dma_data_direction *direction)
@@ -378,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
378406
if (!stt)
379407
return H_TOO_HARD;
380408

381-
ret = kvmppc_ioba_validate(stt, ioba, 1);
409+
ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
382410
if (ret != H_SUCCESS)
383411
return ret;
384412

@@ -406,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
406434
}
407435
}
408436

409-
kvmppc_tce_put(stt, entry, tce);
437+
kvmppc_rm_tce_put(stt, entry, tce);
410438

411439
return H_SUCCESS;
412440
}
@@ -477,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
477505
if (tce_list & (SZ_4K - 1))
478506
return H_PARAMETER;
479507

480-
ret = kvmppc_ioba_validate(stt, ioba, npages);
508+
ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
481509
if (ret != H_SUCCESS)
482510
return ret;
483511

@@ -554,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
554582
}
555583
}
556584

557-
kvmppc_tce_put(stt, entry + i, tce);
585+
kvmppc_rm_tce_put(stt, entry + i, tce);
558586
}
559587

560588
unlock_exit:
@@ -580,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
580608
if (!stt)
581609
return H_TOO_HARD;
582610

583-
ret = kvmppc_ioba_validate(stt, ioba, npages);
611+
ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
584612
if (ret != H_SUCCESS)
585613
return ret;
586614

@@ -607,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
607635
}
608636

609637
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
610-
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
638+
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
611639

612640
return H_SUCCESS;
613641
}
@@ -632,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
632660

633661
idx = (ioba >> stt->page_shift) - stt->offset;
634662
page = stt->pages[idx / TCES_PER_PAGE];
663+
if (!page) {
664+
vcpu->arch.regs.gpr[4] = 0;
665+
return H_SUCCESS;
666+
}
635667
tbl = (u64 *)page_address(page);
636668

637669
vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];

0 commit comments

Comments
 (0)