@@ -862,54 +862,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
862
862
*/
863
863
static int kvm_s390_vm_start_migration (struct kvm * kvm )
864
864
{
865
- struct kvm_s390_migration_state * mgs ;
866
865
struct kvm_memory_slot * ms ;
867
- /* should be the only one */
868
866
struct kvm_memslots * slots ;
869
- unsigned long ram_pages ;
867
+ unsigned long ram_pages = 0 ;
870
868
int slotnr ;
871
869
872
870
/* migration mode already enabled */
873
- if (kvm -> arch .migration_state )
871
+ if (kvm -> arch .migration_mode )
874
872
return 0 ;
875
-
876
873
slots = kvm_memslots (kvm );
877
874
if (!slots || !slots -> used_slots )
878
875
return - EINVAL ;
879
876
880
- mgs = kzalloc (sizeof (* mgs ), GFP_KERNEL );
881
- if (!mgs )
882
- return - ENOMEM ;
883
- kvm -> arch .migration_state = mgs ;
884
-
885
- if (kvm -> arch .use_cmma ) {
877
+ if (!kvm -> arch .use_cmma ) {
878
+ kvm -> arch .migration_mode = 1 ;
879
+ return 0 ;
880
+ }
881
+ /* mark all the pages in active slots as dirty */
882
+ for (slotnr = 0 ; slotnr < slots -> used_slots ; slotnr ++ ) {
883
+ ms = slots -> memslots + slotnr ;
886
884
/*
887
- * Get the first slot. They are reverse sorted by base_gfn, so
888
- * the first slot is also the one at the end of the address
889
- * space. We have verified above that at least one slot is
890
- * present .
885
+ * The second half of the bitmap is only used on x86,
886
+ * and would be wasted otherwise, so we put it to good
887
+ * use here to keep track of the state of the storage
888
+ * attributes .
891
889
*/
892
- ms = slots -> memslots ;
893
- /* round up so we only use full longs */
894
- ram_pages = roundup (ms -> base_gfn + ms -> npages , BITS_PER_LONG );
895
- /* allocate enough bytes to store all the bits */
896
- mgs -> pgste_bitmap = vmalloc (ram_pages / 8 );
897
- if (!mgs -> pgste_bitmap ) {
898
- kfree (mgs );
899
- kvm -> arch .migration_state = NULL ;
900
- return - ENOMEM ;
901
- }
902
-
903
- mgs -> bitmap_size = ram_pages ;
904
- atomic64_set (& mgs -> dirty_pages , ram_pages );
905
- /* mark all the pages in active slots as dirty */
906
- for (slotnr = 0 ; slotnr < slots -> used_slots ; slotnr ++ ) {
907
- ms = slots -> memslots + slotnr ;
908
- bitmap_set (mgs -> pgste_bitmap , ms -> base_gfn , ms -> npages );
909
- }
910
-
911
- kvm_s390_sync_request_broadcast (kvm , KVM_REQ_START_MIGRATION );
890
+ memset (kvm_second_dirty_bitmap (ms ), 0xff , kvm_dirty_bitmap_bytes (ms ));
891
+ ram_pages += ms -> npages ;
912
892
}
893
+ atomic64_set (& kvm -> arch .cmma_dirty_pages , ram_pages );
894
+ kvm -> arch .migration_mode = 1 ;
895
+ kvm_s390_sync_request_broadcast (kvm , KVM_REQ_START_MIGRATION );
913
896
return 0 ;
914
897
}
915
898
@@ -919,21 +902,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
919
902
*/
920
903
static int kvm_s390_vm_stop_migration (struct kvm * kvm )
921
904
{
922
- struct kvm_s390_migration_state * mgs ;
923
-
924
905
/* migration mode already disabled */
925
- if (!kvm -> arch .migration_state )
906
+ if (!kvm -> arch .migration_mode )
926
907
return 0 ;
927
- mgs = kvm -> arch .migration_state ;
928
- kvm -> arch .migration_state = NULL ;
929
-
930
- if (kvm -> arch .use_cmma ) {
908
+ kvm -> arch .migration_mode = 0 ;
909
+ if (kvm -> arch .use_cmma )
931
910
kvm_s390_sync_request_broadcast (kvm , KVM_REQ_STOP_MIGRATION );
932
- /* We have to wait for the essa emulation to finish */
933
- synchronize_srcu (& kvm -> srcu );
934
- vfree (mgs -> pgste_bitmap );
935
- }
936
- kfree (mgs );
937
911
return 0 ;
938
912
}
939
913
@@ -961,7 +935,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
961
935
static int kvm_s390_vm_get_migration (struct kvm * kvm ,
962
936
struct kvm_device_attr * attr )
963
937
{
964
- u64 mig = ( kvm -> arch .migration_state != NULL ) ;
938
+ u64 mig = kvm -> arch .migration_mode ;
965
939
966
940
if (attr -> attr != KVM_S390_VM_MIGRATION_STATUS )
967
941
return - ENXIO ;
@@ -1599,6 +1573,134 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1599
1573
/* for consistency */
1600
1574
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1601
1575
1576
+ /*
1577
+ * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1578
+ * address falls in a hole. In that case the index of one of the memslots
1579
+ * bordering the hole is returned.
1580
+ */
1581
+ static int gfn_to_memslot_approx (struct kvm_memslots * slots , gfn_t gfn )
1582
+ {
1583
+ int start = 0 , end = slots -> used_slots ;
1584
+ int slot = atomic_read (& slots -> lru_slot );
1585
+ struct kvm_memory_slot * memslots = slots -> memslots ;
1586
+
1587
+ if (gfn >= memslots [slot ].base_gfn &&
1588
+ gfn < memslots [slot ].base_gfn + memslots [slot ].npages )
1589
+ return slot ;
1590
+
1591
+ while (start < end ) {
1592
+ slot = start + (end - start ) / 2 ;
1593
+
1594
+ if (gfn >= memslots [slot ].base_gfn )
1595
+ end = slot ;
1596
+ else
1597
+ start = slot + 1 ;
1598
+ }
1599
+
1600
+ if (gfn >= memslots [start ].base_gfn &&
1601
+ gfn < memslots [start ].base_gfn + memslots [start ].npages ) {
1602
+ atomic_set (& slots -> lru_slot , start );
1603
+ }
1604
+
1605
+ return start ;
1606
+ }
1607
+
1608
+ static int kvm_s390_peek_cmma (struct kvm * kvm , struct kvm_s390_cmma_log * args ,
1609
+ u8 * res , unsigned long bufsize )
1610
+ {
1611
+ unsigned long pgstev , hva , cur_gfn = args -> start_gfn ;
1612
+
1613
+ args -> count = 0 ;
1614
+ while (args -> count < bufsize ) {
1615
+ hva = gfn_to_hva (kvm , cur_gfn );
1616
+ /*
1617
+ * We return an error if the first value was invalid, but we
1618
+ * return successfully if at least one value was copied.
1619
+ */
1620
+ if (kvm_is_error_hva (hva ))
1621
+ return args -> count ? 0 : - EFAULT ;
1622
+ if (get_pgste (kvm -> mm , hva , & pgstev ) < 0 )
1623
+ pgstev = 0 ;
1624
+ res [args -> count ++ ] = (pgstev >> 24 ) & 0x43 ;
1625
+ cur_gfn ++ ;
1626
+ }
1627
+
1628
+ return 0 ;
1629
+ }
1630
+
1631
+ static unsigned long kvm_s390_next_dirty_cmma (struct kvm_memslots * slots ,
1632
+ unsigned long cur_gfn )
1633
+ {
1634
+ int slotidx = gfn_to_memslot_approx (slots , cur_gfn );
1635
+ struct kvm_memory_slot * ms = slots -> memslots + slotidx ;
1636
+ unsigned long ofs = cur_gfn - ms -> base_gfn ;
1637
+
1638
+ if (ms -> base_gfn + ms -> npages <= cur_gfn ) {
1639
+ slotidx -- ;
1640
+ /* If we are above the highest slot, wrap around */
1641
+ if (slotidx < 0 )
1642
+ slotidx = slots -> used_slots - 1 ;
1643
+
1644
+ ms = slots -> memslots + slotidx ;
1645
+ ofs = 0 ;
1646
+ }
1647
+ ofs = find_next_bit (kvm_second_dirty_bitmap (ms ), ms -> npages , ofs );
1648
+ while ((slotidx > 0 ) && (ofs >= ms -> npages )) {
1649
+ slotidx -- ;
1650
+ ms = slots -> memslots + slotidx ;
1651
+ ofs = find_next_bit (kvm_second_dirty_bitmap (ms ), ms -> npages , 0 );
1652
+ }
1653
+ return ms -> base_gfn + ofs ;
1654
+ }
1655
+
1656
+ static int kvm_s390_get_cmma (struct kvm * kvm , struct kvm_s390_cmma_log * args ,
1657
+ u8 * res , unsigned long bufsize )
1658
+ {
1659
+ unsigned long mem_end , cur_gfn , next_gfn , hva , pgstev ;
1660
+ struct kvm_memslots * slots = kvm_memslots (kvm );
1661
+ struct kvm_memory_slot * ms ;
1662
+
1663
+ cur_gfn = kvm_s390_next_dirty_cmma (slots , args -> start_gfn );
1664
+ ms = gfn_to_memslot (kvm , cur_gfn );
1665
+ args -> count = 0 ;
1666
+ args -> start_gfn = cur_gfn ;
1667
+ if (!ms )
1668
+ return 0 ;
1669
+ next_gfn = kvm_s390_next_dirty_cmma (slots , cur_gfn + 1 );
1670
+ mem_end = slots -> memslots [0 ].base_gfn + slots -> memslots [0 ].npages ;
1671
+
1672
+ while (args -> count < bufsize ) {
1673
+ hva = gfn_to_hva (kvm , cur_gfn );
1674
+ if (kvm_is_error_hva (hva ))
1675
+ return 0 ;
1676
+ /* Decrement only if we actually flipped the bit to 0 */
1677
+ if (test_and_clear_bit (cur_gfn - ms -> base_gfn , kvm_second_dirty_bitmap (ms )))
1678
+ atomic64_dec (& kvm -> arch .cmma_dirty_pages );
1679
+ if (get_pgste (kvm -> mm , hva , & pgstev ) < 0 )
1680
+ pgstev = 0 ;
1681
+ /* Save the value */
1682
+ res [args -> count ++ ] = (pgstev >> 24 ) & 0x43 ;
1683
+ /* If the next bit is too far away, stop. */
1684
+ if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE )
1685
+ return 0 ;
1686
+ /* If we reached the previous "next", find the next one */
1687
+ if (cur_gfn == next_gfn )
1688
+ next_gfn = kvm_s390_next_dirty_cmma (slots , cur_gfn + 1 );
1689
+ /* Reached the end of memory or of the buffer, stop */
1690
+ if ((next_gfn >= mem_end ) ||
1691
+ (next_gfn - args -> start_gfn >= bufsize ))
1692
+ return 0 ;
1693
+ cur_gfn ++ ;
1694
+ /* Reached the end of the current memslot, take the next one. */
1695
+ if (cur_gfn - ms -> base_gfn >= ms -> npages ) {
1696
+ ms = gfn_to_memslot (kvm , cur_gfn );
1697
+ if (!ms )
1698
+ return 0 ;
1699
+ }
1700
+ }
1701
+ return 0 ;
1702
+ }
1703
+
1602
1704
/*
1603
1705
* This function searches for the next page with dirty CMMA attributes, and
1604
1706
* saves the attributes in the buffer up to either the end of the buffer or
@@ -1610,97 +1712,54 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1610
1712
static int kvm_s390_get_cmma_bits (struct kvm * kvm ,
1611
1713
struct kvm_s390_cmma_log * args )
1612
1714
{
1613
- struct kvm_s390_migration_state * s = kvm -> arch .migration_state ;
1614
- unsigned long bufsize , hva , pgstev , i , next , cur ;
1615
- int srcu_idx , peek , r = 0 , rr ;
1616
- u8 * res ;
1617
-
1618
- cur = args -> start_gfn ;
1619
- i = next = pgstev = 0 ;
1715
+ unsigned long bufsize ;
1716
+ int srcu_idx , peek , ret ;
1717
+ u8 * values ;
1620
1718
1621
- if (unlikely ( !kvm -> arch .use_cmma ) )
1719
+ if (!kvm -> arch .use_cmma )
1622
1720
return - ENXIO ;
1623
1721
/* Invalid/unsupported flags were specified */
1624
1722
if (args -> flags & ~KVM_S390_CMMA_PEEK )
1625
1723
return - EINVAL ;
1626
1724
/* Migration mode query, and we are not doing a migration */
1627
1725
peek = !!(args -> flags & KVM_S390_CMMA_PEEK );
1628
- if (!peek && !s )
1726
+ if (!peek && !kvm -> arch . migration_mode )
1629
1727
return - EINVAL ;
1630
1728
/* CMMA is disabled or was not used, or the buffer has length zero */
1631
1729
bufsize = min (args -> count , KVM_S390_CMMA_SIZE_MAX );
1632
1730
if (!bufsize || !kvm -> mm -> context .uses_cmm ) {
1633
1731
memset (args , 0 , sizeof (* args ));
1634
1732
return 0 ;
1635
1733
}
1636
-
1637
- if (!peek ) {
1638
- /* We are not peeking, and there are no dirty pages */
1639
- if (!atomic64_read (& s -> dirty_pages )) {
1640
- memset (args , 0 , sizeof (* args ));
1641
- return 0 ;
1642
- }
1643
- cur = find_next_bit (s -> pgste_bitmap , s -> bitmap_size ,
1644
- args -> start_gfn );
1645
- if (cur >= s -> bitmap_size ) /* nothing found, loop back */
1646
- cur = find_next_bit (s -> pgste_bitmap , s -> bitmap_size , 0 );
1647
- if (cur >= s -> bitmap_size ) { /* again! (very unlikely) */
1648
- memset (args , 0 , sizeof (* args ));
1649
- return 0 ;
1650
- }
1651
- next = find_next_bit (s -> pgste_bitmap , s -> bitmap_size , cur + 1 );
1734
+ /* We are not peeking, and there are no dirty pages */
1735
+ if (!peek && !atomic64_read (& kvm -> arch .cmma_dirty_pages )) {
1736
+ memset (args , 0 , sizeof (* args ));
1737
+ return 0 ;
1652
1738
}
1653
1739
1654
- res = vmalloc (bufsize );
1655
- if (!res )
1740
+ values = vmalloc (bufsize );
1741
+ if (!values )
1656
1742
return - ENOMEM ;
1657
1743
1658
- args -> start_gfn = cur ;
1659
-
1660
1744
down_read (& kvm -> mm -> mmap_sem );
1661
1745
srcu_idx = srcu_read_lock (& kvm -> srcu );
1662
- while (i < bufsize ) {
1663
- hva = gfn_to_hva (kvm , cur );
1664
- if (kvm_is_error_hva (hva )) {
1665
- r = - EFAULT ;
1666
- break ;
1667
- }
1668
- /* decrement only if we actually flipped the bit to 0 */
1669
- if (!peek && test_and_clear_bit (cur , s -> pgste_bitmap ))
1670
- atomic64_dec (& s -> dirty_pages );
1671
- r = get_pgste (kvm -> mm , hva , & pgstev );
1672
- if (r < 0 )
1673
- pgstev = 0 ;
1674
- /* save the value */
1675
- res [i ++ ] = (pgstev >> 24 ) & 0x43 ;
1676
- /*
1677
- * if the next bit is too far away, stop.
1678
- * if we reached the previous "next", find the next one
1679
- */
1680
- if (!peek ) {
1681
- if (next > cur + KVM_S390_MAX_BIT_DISTANCE )
1682
- break ;
1683
- if (cur == next )
1684
- next = find_next_bit (s -> pgste_bitmap ,
1685
- s -> bitmap_size , cur + 1 );
1686
- /* reached the end of the bitmap or of the buffer, stop */
1687
- if ((next >= s -> bitmap_size ) ||
1688
- (next >= args -> start_gfn + bufsize ))
1689
- break ;
1690
- }
1691
- cur ++ ;
1692
- }
1746
+ if (peek )
1747
+ ret = kvm_s390_peek_cmma (kvm , args , values , bufsize );
1748
+ else
1749
+ ret = kvm_s390_get_cmma (kvm , args , values , bufsize );
1693
1750
srcu_read_unlock (& kvm -> srcu , srcu_idx );
1694
1751
up_read (& kvm -> mm -> mmap_sem );
1695
- args -> count = i ;
1696
- args -> remaining = s ? atomic64_read (& s -> dirty_pages ) : 0 ;
1697
1752
1698
- rr = copy_to_user ((void __user * )args -> values , res , args -> count );
1699
- if (rr )
1700
- r = - EFAULT ;
1753
+ if (kvm -> arch .migration_mode )
1754
+ args -> remaining = atomic64_read (& kvm -> arch .cmma_dirty_pages );
1755
+ else
1756
+ args -> remaining = 0 ;
1701
1757
1702
- vfree (res );
1703
- return r ;
1758
+ if (copy_to_user ((void __user * )args -> values , values , args -> count ))
1759
+ ret = - EFAULT ;
1760
+
1761
+ vfree (values );
1762
+ return ret ;
1704
1763
}
1705
1764
1706
1765
/*
@@ -2139,10 +2198,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
2139
2198
kvm_s390_destroy_adapters (kvm );
2140
2199
kvm_s390_clear_float_irqs (kvm );
2141
2200
kvm_s390_vsie_destroy (kvm );
2142
- if (kvm -> arch .migration_state ) {
2143
- vfree (kvm -> arch .migration_state -> pgste_bitmap );
2144
- kfree (kvm -> arch .migration_state );
2145
- }
2146
2201
KVM_EVENT (3 , "vm 0x%pK destroyed" , kvm );
2147
2202
}
2148
2203
0 commit comments