Skip to content

Commit d3695aa

Browse files
aikpaulusmack
authored andcommitted
KVM: PPC: Add support for multiple-TCE hcalls
This adds real and virtual mode handlers for the H_PUT_TCE_INDIRECT and H_STUFF_TCE hypercalls for user space emulated devices such as IBMVIO devices or emulated PCI. These calls allow adding multiple entries (up to 512) into the TCE table in one call which saves time on transition between kernel and user space. The current implementation of kvmppc_h_stuff_tce() allows it to be executed in both real and virtual modes so there is one helper. The kvmppc_rm_h_put_tce_indirect() needs to translate the guest address to the host address and since the translation is different, there are 2 helpers - one for each mode. This implements the KVM_CAP_PPC_MULTITCE capability. When present, the kernel will try handling H_PUT_TCE_INDIRECT and H_STUFF_TCE if these are enabled by the userspace via KVM_CAP_PPC_ENABLE_HCALL. If they can not be handled by the kernel, they are passed on to the user space. The user space still has to have an implementation for these. Both HV and PR-syle KVM are supported. Signed-off-by: Alexey Kardashevskiy <[email protected]> Reviewed-by: David Gibson <[email protected]> Signed-off-by: Paul Mackerras <[email protected]>
1 parent 5ee7af1 commit d3695aa

File tree

8 files changed

+306
-9
lines changed

8 files changed

+306
-9
lines changed

Documentation/virtual/kvm/api.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3035,6 +3035,31 @@ Returns: 0 on success, -1 on error
30353035

30363036
Queues an SMI on the thread's vcpu.
30373037

3038+
4.97 KVM_CAP_PPC_MULTITCE
3039+
3040+
Capability: KVM_CAP_PPC_MULTITCE
3041+
Architectures: ppc
3042+
Type: vm
3043+
3044+
This capability means the kernel is capable of handling hypercalls
3045+
H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
3046+
space. This significantly accelerates DMA operations for PPC KVM guests.
3047+
User space should expect that its handlers for these hypercalls
3048+
are not going to be called if user space previously registered LIOBN
3049+
in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
3050+
3051+
In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
3052+
user space might have to advertise it for the guest. For example,
3053+
IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
3054+
present in the "ibm,hypertas-functions" device-tree property.
3055+
3056+
The hypercalls mentioned above may or may not be processed successfully
3057+
in the kernel based fast path. If they can not be handled by the kernel,
3058+
they will get passed on to user space. So user space still has to have
3059+
an implementation for these despite the in kernel acceleration.
3060+
3061+
This capability is always enabled.
3062+
30383063
5. The kvm_run structure
30393064
------------------------
30403065

arch/powerpc/include/asm/kvm_ppc.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,24 @@ extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
166166

167167
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
168168
struct kvm_create_spapr_tce *args);
169+
extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
170+
struct kvm_vcpu *vcpu, unsigned long liobn);
169171
extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
170172
unsigned long ioba, unsigned long npages);
171173
extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
172174
unsigned long tce);
175+
extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
176+
unsigned long *ua, unsigned long **prmap);
177+
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
178+
unsigned long idx, unsigned long tce);
173179
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
174180
unsigned long ioba, unsigned long tce);
181+
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
182+
unsigned long liobn, unsigned long ioba,
183+
unsigned long tce_list, unsigned long npages);
184+
extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
185+
unsigned long liobn, unsigned long ioba,
186+
unsigned long tce_value, unsigned long npages);
175187
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
176188
unsigned long ioba);
177189
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);

arch/powerpc/kvm/book3s_64_vio.c

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
*
1515
* Copyright 2010 Paul Mackerras, IBM Corp. <[email protected]>
1616
* Copyright 2011 David Gibson, IBM Corporation <[email protected]>
17+
* Copyright 2016 Alexey Kardashevskiy, IBM Corporation <[email protected]>
1718
*/
1819

1920
#include <linux/types.h>
@@ -37,8 +38,7 @@
3738
#include <asm/kvm_host.h>
3839
#include <asm/udbg.h>
3940
#include <asm/iommu.h>
40-
41-
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
41+
#include <asm/tce.h>
4242

4343
static unsigned long kvmppc_tce_pages(unsigned long window_size)
4444
{
@@ -204,3 +204,59 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
204204
}
205205
return ret;
206206
}
207+
208+
long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
209+
unsigned long liobn, unsigned long ioba,
210+
unsigned long tce_list, unsigned long npages)
211+
{
212+
struct kvmppc_spapr_tce_table *stt;
213+
long i, ret = H_SUCCESS, idx;
214+
unsigned long entry, ua = 0;
215+
u64 __user *tces, tce;
216+
217+
stt = kvmppc_find_table(vcpu, liobn);
218+
if (!stt)
219+
return H_TOO_HARD;
220+
221+
entry = ioba >> IOMMU_PAGE_SHIFT_4K;
222+
/*
223+
* SPAPR spec says that the maximum size of the list is 512 TCEs
224+
* so the whole table fits in 4K page
225+
*/
226+
if (npages > 512)
227+
return H_PARAMETER;
228+
229+
if (tce_list & (SZ_4K - 1))
230+
return H_PARAMETER;
231+
232+
ret = kvmppc_ioba_validate(stt, ioba, npages);
233+
if (ret != H_SUCCESS)
234+
return ret;
235+
236+
idx = srcu_read_lock(&vcpu->kvm->srcu);
237+
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
238+
ret = H_TOO_HARD;
239+
goto unlock_exit;
240+
}
241+
tces = (u64 __user *) ua;
242+
243+
for (i = 0; i < npages; ++i) {
244+
if (get_user(tce, tces + i)) {
245+
ret = H_TOO_HARD;
246+
goto unlock_exit;
247+
}
248+
tce = be64_to_cpu(tce);
249+
250+
ret = kvmppc_tce_validate(stt, tce);
251+
if (ret != H_SUCCESS)
252+
goto unlock_exit;
253+
254+
kvmppc_tce_put(stt, entry + i, tce);
255+
}
256+
257+
unlock_exit:
258+
srcu_read_unlock(&vcpu->kvm->srcu, idx);
259+
260+
return ret;
261+
}
262+
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);

arch/powerpc/kvm/book3s_64_vio_hv.c

Lines changed: 146 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
*
1515
* Copyright 2010 Paul Mackerras, IBM Corp. <[email protected]>
1616
* Copyright 2011 David Gibson, IBM Corporation <[email protected]>
17+
* Copyright 2016 Alexey Kardashevskiy, IBM Corporation <[email protected]>
1718
*/
1819

1920
#include <linux/types.h>
@@ -30,13 +31,15 @@
3031
#include <asm/kvm_ppc.h>
3132
#include <asm/kvm_book3s.h>
3233
#include <asm/mmu-hash64.h>
34+
#include <asm/mmu_context.h>
3335
#include <asm/hvcall.h>
3436
#include <asm/synch.h>
3537
#include <asm/ppc-opcode.h>
3638
#include <asm/kvm_host.h>
3739
#include <asm/udbg.h>
3840
#include <asm/iommu.h>
3941
#include <asm/tce.h>
42+
#include <asm/iommu.h>
4043

4144
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
4245

@@ -46,7 +49,7 @@
4649
* WARNING: This will be called in real or virtual mode on HV KVM and virtual
4750
* mode on PR KVM
4851
*/
49-
static struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
52+
struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
5053
unsigned long liobn)
5154
{
5255
struct kvm *kvm = vcpu->kvm;
@@ -58,6 +61,7 @@ static struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
5861

5962
return NULL;
6063
}
64+
EXPORT_SYMBOL_GPL(kvmppc_find_table);
6165

6266
/*
6367
* Validates IO address.
@@ -151,9 +155,29 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
151155
}
152156
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
153157

154-
/* WARNING: This will be called in real-mode on HV KVM and virtual
155-
* mode on PR KVM
156-
*/
158+
long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
159+
unsigned long *ua, unsigned long **prmap)
160+
{
161+
unsigned long gfn = gpa >> PAGE_SHIFT;
162+
struct kvm_memory_slot *memslot;
163+
164+
memslot = search_memslots(kvm_memslots(kvm), gfn);
165+
if (!memslot)
166+
return -EINVAL;
167+
168+
*ua = __gfn_to_hva_memslot(memslot, gfn) |
169+
(gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
170+
171+
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
172+
if (prmap)
173+
*prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
174+
#endif
175+
176+
return 0;
177+
}
178+
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
179+
180+
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
157181
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
158182
unsigned long ioba, unsigned long tce)
159183
{
@@ -180,6 +204,122 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
180204
}
181205
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
182206

207+
static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
208+
unsigned long ua, unsigned long *phpa)
209+
{
210+
pte_t *ptep, pte;
211+
unsigned shift = 0;
212+
213+
ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
214+
if (!ptep || !pte_present(*ptep))
215+
return -ENXIO;
216+
pte = *ptep;
217+
218+
if (!shift)
219+
shift = PAGE_SHIFT;
220+
221+
/* Avoid handling anything potentially complicated in realmode */
222+
if (shift > PAGE_SHIFT)
223+
return -EAGAIN;
224+
225+
if (!pte_young(pte))
226+
return -EAGAIN;
227+
228+
*phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) |
229+
(ua & ~PAGE_MASK);
230+
231+
return 0;
232+
}
233+
234+
long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
235+
unsigned long liobn, unsigned long ioba,
236+
unsigned long tce_list, unsigned long npages)
237+
{
238+
struct kvmppc_spapr_tce_table *stt;
239+
long i, ret = H_SUCCESS;
240+
unsigned long tces, entry, ua = 0;
241+
unsigned long *rmap = NULL;
242+
243+
stt = kvmppc_find_table(vcpu, liobn);
244+
if (!stt)
245+
return H_TOO_HARD;
246+
247+
entry = ioba >> IOMMU_PAGE_SHIFT_4K;
248+
/*
249+
* The spec says that the maximum size of the list is 512 TCEs
250+
* so the whole table addressed resides in 4K page
251+
*/
252+
if (npages > 512)
253+
return H_PARAMETER;
254+
255+
if (tce_list & (SZ_4K - 1))
256+
return H_PARAMETER;
257+
258+
ret = kvmppc_ioba_validate(stt, ioba, npages);
259+
if (ret != H_SUCCESS)
260+
return ret;
261+
262+
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
263+
return H_TOO_HARD;
264+
265+
rmap = (void *) vmalloc_to_phys(rmap);
266+
267+
/*
268+
* Synchronize with the MMU notifier callbacks in
269+
* book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
270+
* While we have the rmap lock, code running on other CPUs
271+
* cannot finish unmapping the host real page that backs
272+
* this guest real page, so we are OK to access the host
273+
* real page.
274+
*/
275+
lock_rmap(rmap);
276+
if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
277+
ret = H_TOO_HARD;
278+
goto unlock_exit;
279+
}
280+
281+
for (i = 0; i < npages; ++i) {
282+
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
283+
284+
ret = kvmppc_tce_validate(stt, tce);
285+
if (ret != H_SUCCESS)
286+
goto unlock_exit;
287+
288+
kvmppc_tce_put(stt, entry + i, tce);
289+
}
290+
291+
unlock_exit:
292+
unlock_rmap(rmap);
293+
294+
return ret;
295+
}
296+
297+
long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
298+
unsigned long liobn, unsigned long ioba,
299+
unsigned long tce_value, unsigned long npages)
300+
{
301+
struct kvmppc_spapr_tce_table *stt;
302+
long i, ret;
303+
304+
stt = kvmppc_find_table(vcpu, liobn);
305+
if (!stt)
306+
return H_TOO_HARD;
307+
308+
ret = kvmppc_ioba_validate(stt, ioba, npages);
309+
if (ret != H_SUCCESS)
310+
return ret;
311+
312+
/* Check permission bits only to allow userspace poison TCE for debug */
313+
if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
314+
return H_PARAMETER;
315+
316+
for (i = 0; i < npages; ++i, ioba += IOMMU_PAGE_SIZE_4K)
317+
kvmppc_tce_put(stt, ioba >> IOMMU_PAGE_SHIFT_4K, tce_value);
318+
319+
return H_SUCCESS;
320+
}
321+
EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
322+
183323
long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
184324
unsigned long ioba)
185325
{
@@ -205,3 +345,5 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
205345
return H_SUCCESS;
206346
}
207347
EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
348+
349+
#endif /* KVM_BOOK3S_HV_POSSIBLE */

arch/powerpc/kvm/book3s_hv.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
768768
if (kvmppc_xics_enabled(vcpu)) {
769769
ret = kvmppc_xics_hcall(vcpu, req);
770770
break;
771-
} /* fallthrough */
771+
}
772+
return RESUME_HOST;
773+
case H_PUT_TCE:
774+
ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
775+
kvmppc_get_gpr(vcpu, 5),
776+
kvmppc_get_gpr(vcpu, 6));
777+
if (ret == H_TOO_HARD)
778+
return RESUME_HOST;
779+
break;
780+
case H_PUT_TCE_INDIRECT:
781+
ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
782+
kvmppc_get_gpr(vcpu, 5),
783+
kvmppc_get_gpr(vcpu, 6),
784+
kvmppc_get_gpr(vcpu, 7));
785+
if (ret == H_TOO_HARD)
786+
return RESUME_HOST;
787+
break;
788+
case H_STUFF_TCE:
789+
ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
790+
kvmppc_get_gpr(vcpu, 5),
791+
kvmppc_get_gpr(vcpu, 6),
792+
kvmppc_get_gpr(vcpu, 7));
793+
if (ret == H_TOO_HARD)
794+
return RESUME_HOST;
795+
break;
772796
default:
773797
return RESUME_HOST;
774798
}

arch/powerpc/kvm/book3s_hv_rmhandlers.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,8 +2006,8 @@ hcall_real_table:
20062006
.long 0 /* 0x12c */
20072007
.long 0 /* 0x130 */
20082008
.long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
2009-
.long 0 /* 0x138 */
2010-
.long 0 /* 0x13c */
2009+
.long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
2010+
.long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
20112011
.long 0 /* 0x140 */
20122012
.long 0 /* 0x144 */
20132013
.long 0 /* 0x148 */

0 commit comments

Comments
 (0)