Skip to content

Commit afdad61

Browse files
Claudio Imbrendaborntraeger
authored andcommitted
KVM: s390: Fix storage attributes migration with memory slots
This is a fix for several issues that were found in the original code for storage attributes migration. Now no bitmap is allocated to keep track of dirty storage attributes; the extra bits of the per-memslot bitmap that are always present anyway are now used for this purpose. The code has also been refactored a little to improve readability. Fixes: 190df4a ("KVM: s390: CMMA tracking, ESSA emulation, migration mode") Fixes: 4036e38 ("KVM: s390: ioctls to get and set guest storage attributes") Acked-by: Janosch Frank <[email protected]> Signed-off-by: Claudio Imbrenda <[email protected]> Message-Id: <[email protected]> Signed-off-by: Christian Borntraeger <[email protected]>
1 parent 0313334 commit afdad61

File tree

3 files changed

+195
-135
lines changed

3 files changed

+195
-135
lines changed

arch/s390/include/asm/kvm_host.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -793,12 +793,6 @@ struct kvm_s390_vsie {
793793
struct page *pages[KVM_MAX_VCPUS];
794794
};
795795

796-
struct kvm_s390_migration_state {
797-
unsigned long bitmap_size; /* in bits (number of guest pages) */
798-
atomic64_t dirty_pages; /* number of dirty pages */
799-
unsigned long *pgste_bitmap;
800-
};
801-
802796
struct kvm_arch{
803797
void *sca;
804798
int use_esca;
@@ -828,7 +822,8 @@ struct kvm_arch{
828822
struct kvm_s390_vsie vsie;
829823
u8 epdx;
830824
u64 epoch;
831-
struct kvm_s390_migration_state *migration_state;
825+
int migration_mode;
826+
atomic64_t cmma_dirty_pages;
832827
/* subset of available cpu features enabled by user space */
833828
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
834829
struct kvm_s390_gisa *gisa;

arch/s390/kvm/kvm-s390.c

Lines changed: 174 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -862,54 +862,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
862862
*/
863863
static int kvm_s390_vm_start_migration(struct kvm *kvm)
864864
{
865-
struct kvm_s390_migration_state *mgs;
866865
struct kvm_memory_slot *ms;
867-
/* should be the only one */
868866
struct kvm_memslots *slots;
869-
unsigned long ram_pages;
867+
unsigned long ram_pages = 0;
870868
int slotnr;
871869

872870
/* migration mode already enabled */
873-
if (kvm->arch.migration_state)
871+
if (kvm->arch.migration_mode)
874872
return 0;
875-
876873
slots = kvm_memslots(kvm);
877874
if (!slots || !slots->used_slots)
878875
return -EINVAL;
879876

880-
mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
881-
if (!mgs)
882-
return -ENOMEM;
883-
kvm->arch.migration_state = mgs;
884-
885-
if (kvm->arch.use_cmma) {
877+
if (!kvm->arch.use_cmma) {
878+
kvm->arch.migration_mode = 1;
879+
return 0;
880+
}
881+
/* mark all the pages in active slots as dirty */
882+
for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
883+
ms = slots->memslots + slotnr;
886884
/*
887-
* Get the first slot. They are reverse sorted by base_gfn, so
888-
* the first slot is also the one at the end of the address
889-
* space. We have verified above that at least one slot is
890-
* present.
885+
* The second half of the bitmap is only used on x86,
886+
* and would be wasted otherwise, so we put it to good
887+
* use here to keep track of the state of the storage
888+
* attributes.
891889
*/
892-
ms = slots->memslots;
893-
/* round up so we only use full longs */
894-
ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
895-
/* allocate enough bytes to store all the bits */
896-
mgs->pgste_bitmap = vmalloc(ram_pages / 8);
897-
if (!mgs->pgste_bitmap) {
898-
kfree(mgs);
899-
kvm->arch.migration_state = NULL;
900-
return -ENOMEM;
901-
}
902-
903-
mgs->bitmap_size = ram_pages;
904-
atomic64_set(&mgs->dirty_pages, ram_pages);
905-
/* mark all the pages in active slots as dirty */
906-
for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
907-
ms = slots->memslots + slotnr;
908-
bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
909-
}
910-
911-
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
890+
memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
891+
ram_pages += ms->npages;
912892
}
893+
atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
894+
kvm->arch.migration_mode = 1;
895+
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
913896
return 0;
914897
}
915898

@@ -919,21 +902,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
919902
*/
920903
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
921904
{
922-
struct kvm_s390_migration_state *mgs;
923-
924905
/* migration mode already disabled */
925-
if (!kvm->arch.migration_state)
906+
if (!kvm->arch.migration_mode)
926907
return 0;
927-
mgs = kvm->arch.migration_state;
928-
kvm->arch.migration_state = NULL;
929-
930-
if (kvm->arch.use_cmma) {
908+
kvm->arch.migration_mode = 0;
909+
if (kvm->arch.use_cmma)
931910
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
932-
/* We have to wait for the essa emulation to finish */
933-
synchronize_srcu(&kvm->srcu);
934-
vfree(mgs->pgste_bitmap);
935-
}
936-
kfree(mgs);
937911
return 0;
938912
}
939913

@@ -961,7 +935,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
961935
static int kvm_s390_vm_get_migration(struct kvm *kvm,
962936
struct kvm_device_attr *attr)
963937
{
964-
u64 mig = (kvm->arch.migration_state != NULL);
938+
u64 mig = kvm->arch.migration_mode;
965939

966940
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
967941
return -ENXIO;
@@ -1599,6 +1573,134 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
15991573
/* for consistency */
16001574
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
16011575

1576+
/*
1577+
* Similar to gfn_to_memslot, but returns the index of a memslot also when the
1578+
* address falls in a hole. In that case the index of one of the memslots
1579+
* bordering the hole is returned.
1580+
*/
1581+
static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1582+
{
1583+
int start = 0, end = slots->used_slots;
1584+
int slot = atomic_read(&slots->lru_slot);
1585+
struct kvm_memory_slot *memslots = slots->memslots;
1586+
1587+
if (gfn >= memslots[slot].base_gfn &&
1588+
gfn < memslots[slot].base_gfn + memslots[slot].npages)
1589+
return slot;
1590+
1591+
while (start < end) {
1592+
slot = start + (end - start) / 2;
1593+
1594+
if (gfn >= memslots[slot].base_gfn)
1595+
end = slot;
1596+
else
1597+
start = slot + 1;
1598+
}
1599+
1600+
if (gfn >= memslots[start].base_gfn &&
1601+
gfn < memslots[start].base_gfn + memslots[start].npages) {
1602+
atomic_set(&slots->lru_slot, start);
1603+
}
1604+
1605+
return start;
1606+
}
1607+
1608+
static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1609+
u8 *res, unsigned long bufsize)
1610+
{
1611+
unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1612+
1613+
args->count = 0;
1614+
while (args->count < bufsize) {
1615+
hva = gfn_to_hva(kvm, cur_gfn);
1616+
/*
1617+
* We return an error if the first value was invalid, but we
1618+
* return successfully if at least one value was copied.
1619+
*/
1620+
if (kvm_is_error_hva(hva))
1621+
return args->count ? 0 : -EFAULT;
1622+
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1623+
pgstev = 0;
1624+
res[args->count++] = (pgstev >> 24) & 0x43;
1625+
cur_gfn++;
1626+
}
1627+
1628+
return 0;
1629+
}
1630+
1631+
static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1632+
unsigned long cur_gfn)
1633+
{
1634+
int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1635+
struct kvm_memory_slot *ms = slots->memslots + slotidx;
1636+
unsigned long ofs = cur_gfn - ms->base_gfn;
1637+
1638+
if (ms->base_gfn + ms->npages <= cur_gfn) {
1639+
slotidx--;
1640+
/* If we are above the highest slot, wrap around */
1641+
if (slotidx < 0)
1642+
slotidx = slots->used_slots - 1;
1643+
1644+
ms = slots->memslots + slotidx;
1645+
ofs = 0;
1646+
}
1647+
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1648+
while ((slotidx > 0) && (ofs >= ms->npages)) {
1649+
slotidx--;
1650+
ms = slots->memslots + slotidx;
1651+
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1652+
}
1653+
return ms->base_gfn + ofs;
1654+
}
1655+
1656+
static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1657+
u8 *res, unsigned long bufsize)
1658+
{
1659+
unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1660+
struct kvm_memslots *slots = kvm_memslots(kvm);
1661+
struct kvm_memory_slot *ms;
1662+
1663+
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1664+
ms = gfn_to_memslot(kvm, cur_gfn);
1665+
args->count = 0;
1666+
args->start_gfn = cur_gfn;
1667+
if (!ms)
1668+
return 0;
1669+
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1670+
mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1671+
1672+
while (args->count < bufsize) {
1673+
hva = gfn_to_hva(kvm, cur_gfn);
1674+
if (kvm_is_error_hva(hva))
1675+
return 0;
1676+
/* Decrement only if we actually flipped the bit to 0 */
1677+
if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1678+
atomic64_dec(&kvm->arch.cmma_dirty_pages);
1679+
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1680+
pgstev = 0;
1681+
/* Save the value */
1682+
res[args->count++] = (pgstev >> 24) & 0x43;
1683+
/* If the next bit is too far away, stop. */
1684+
if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1685+
return 0;
1686+
/* If we reached the previous "next", find the next one */
1687+
if (cur_gfn == next_gfn)
1688+
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1689+
/* Reached the end of memory or of the buffer, stop */
1690+
if ((next_gfn >= mem_end) ||
1691+
(next_gfn - args->start_gfn >= bufsize))
1692+
return 0;
1693+
cur_gfn++;
1694+
/* Reached the end of the current memslot, take the next one. */
1695+
if (cur_gfn - ms->base_gfn >= ms->npages) {
1696+
ms = gfn_to_memslot(kvm, cur_gfn);
1697+
if (!ms)
1698+
return 0;
1699+
}
1700+
}
1701+
return 0;
1702+
}
1703+
16021704
/*
16031705
* This function searches for the next page with dirty CMMA attributes, and
16041706
* saves the attributes in the buffer up to either the end of the buffer or
@@ -1610,97 +1712,54 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
16101712
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
16111713
struct kvm_s390_cmma_log *args)
16121714
{
1613-
struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1614-
unsigned long bufsize, hva, pgstev, i, next, cur;
1615-
int srcu_idx, peek, r = 0, rr;
1616-
u8 *res;
1617-
1618-
cur = args->start_gfn;
1619-
i = next = pgstev = 0;
1715+
unsigned long bufsize;
1716+
int srcu_idx, peek, ret;
1717+
u8 *values;
16201718

1621-
if (unlikely(!kvm->arch.use_cmma))
1719+
if (!kvm->arch.use_cmma)
16221720
return -ENXIO;
16231721
/* Invalid/unsupported flags were specified */
16241722
if (args->flags & ~KVM_S390_CMMA_PEEK)
16251723
return -EINVAL;
16261724
/* Migration mode query, and we are not doing a migration */
16271725
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1628-
if (!peek && !s)
1726+
if (!peek && !kvm->arch.migration_mode)
16291727
return -EINVAL;
16301728
/* CMMA is disabled or was not used, or the buffer has length zero */
16311729
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
16321730
if (!bufsize || !kvm->mm->context.uses_cmm) {
16331731
memset(args, 0, sizeof(*args));
16341732
return 0;
16351733
}
1636-
1637-
if (!peek) {
1638-
/* We are not peeking, and there are no dirty pages */
1639-
if (!atomic64_read(&s->dirty_pages)) {
1640-
memset(args, 0, sizeof(*args));
1641-
return 0;
1642-
}
1643-
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1644-
args->start_gfn);
1645-
if (cur >= s->bitmap_size) /* nothing found, loop back */
1646-
cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1647-
if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1648-
memset(args, 0, sizeof(*args));
1649-
return 0;
1650-
}
1651-
next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1734+
/* We are not peeking, and there are no dirty pages */
1735+
if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1736+
memset(args, 0, sizeof(*args));
1737+
return 0;
16521738
}
16531739

1654-
res = vmalloc(bufsize);
1655-
if (!res)
1740+
values = vmalloc(bufsize);
1741+
if (!values)
16561742
return -ENOMEM;
16571743

1658-
args->start_gfn = cur;
1659-
16601744
down_read(&kvm->mm->mmap_sem);
16611745
srcu_idx = srcu_read_lock(&kvm->srcu);
1662-
while (i < bufsize) {
1663-
hva = gfn_to_hva(kvm, cur);
1664-
if (kvm_is_error_hva(hva)) {
1665-
r = -EFAULT;
1666-
break;
1667-
}
1668-
/* decrement only if we actually flipped the bit to 0 */
1669-
if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1670-
atomic64_dec(&s->dirty_pages);
1671-
r = get_pgste(kvm->mm, hva, &pgstev);
1672-
if (r < 0)
1673-
pgstev = 0;
1674-
/* save the value */
1675-
res[i++] = (pgstev >> 24) & 0x43;
1676-
/*
1677-
* if the next bit is too far away, stop.
1678-
* if we reached the previous "next", find the next one
1679-
*/
1680-
if (!peek) {
1681-
if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1682-
break;
1683-
if (cur == next)
1684-
next = find_next_bit(s->pgste_bitmap,
1685-
s->bitmap_size, cur + 1);
1686-
/* reached the end of the bitmap or of the buffer, stop */
1687-
if ((next >= s->bitmap_size) ||
1688-
(next >= args->start_gfn + bufsize))
1689-
break;
1690-
}
1691-
cur++;
1692-
}
1746+
if (peek)
1747+
ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1748+
else
1749+
ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
16931750
srcu_read_unlock(&kvm->srcu, srcu_idx);
16941751
up_read(&kvm->mm->mmap_sem);
1695-
args->count = i;
1696-
args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
16971752

1698-
rr = copy_to_user((void __user *)args->values, res, args->count);
1699-
if (rr)
1700-
r = -EFAULT;
1753+
if (kvm->arch.migration_mode)
1754+
args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1755+
else
1756+
args->remaining = 0;
17011757

1702-
vfree(res);
1703-
return r;
1758+
if (copy_to_user((void __user *)args->values, values, args->count))
1759+
ret = -EFAULT;
1760+
1761+
vfree(values);
1762+
return ret;
17041763
}
17051764

17061765
/*
@@ -2139,10 +2198,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
21392198
kvm_s390_destroy_adapters(kvm);
21402199
kvm_s390_clear_float_irqs(kvm);
21412200
kvm_s390_vsie_destroy(kvm);
2142-
if (kvm->arch.migration_state) {
2143-
vfree(kvm->arch.migration_state->pgste_bitmap);
2144-
kfree(kvm->arch.migration_state);
2145-
}
21462201
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
21472202
}
21482203

0 commit comments

Comments
 (0)