Skip to content

Commit 0a61b22

Browse files
Martin Schwidefskyborntraeger
authored andcommitted
KVM: s390/mm: use software dirty bit detection for user dirty tracking
Switch the user dirty bit detection used for migration from the hardware provided host change-bit in the pgste to a fault based detection method. This reduced the dependency of the host from the storage key to a point where it becomes possible to enable the RCP bypass for KVM guests. The fault based dirty detection will only indicate changes caused by accesses via the guest address space. The hardware based method can detect all changes, even those caused by I/O or accesses via the kernel page table. The KVM/qemu code needs to take this into account. Signed-off-by: Martin Schwidefsky <[email protected]> Signed-off-by: Dominik Dingel <[email protected]> Signed-off-by: Christian Borntraeger <[email protected]>
1 parent 693ffc0 commit 0a61b22

File tree

2 files changed

+59
-82
lines changed

2 files changed

+59
-82
lines changed

arch/s390/include/asm/pgtable.h

Lines changed: 56 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,8 @@ extern unsigned long MODULES_END;
309309
#define PGSTE_HC_BIT 0x00200000UL
310310
#define PGSTE_GR_BIT 0x00040000UL
311311
#define PGSTE_GC_BIT 0x00020000UL
312-
#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */
312+
#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */
313+
#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */
313314

314315
#else /* CONFIG_64BIT */
315316

@@ -391,7 +392,8 @@ extern unsigned long MODULES_END;
391392
#define PGSTE_HC_BIT 0x0020000000000000UL
392393
#define PGSTE_GR_BIT 0x0004000000000000UL
393394
#define PGSTE_GC_BIT 0x0002000000000000UL
394-
#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
395+
#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
396+
#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
395397

396398
#endif /* CONFIG_64BIT */
397399

@@ -720,16 +722,6 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
720722
address = pte_val(*ptep) & PAGE_MASK;
721723
skey = (unsigned long) page_get_storage_key(address);
722724
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
723-
if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
724-
/* Transfer dirty + referenced bit to host bits in pgste */
725-
pgste_val(pgste) |= bits << 52;
726-
page_set_storage_key(address, skey ^ bits, 0);
727-
} else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
728-
(bits & _PAGE_REFERENCED)) {
729-
/* Transfer referenced bit to host bit in pgste */
730-
pgste_val(pgste) |= PGSTE_HR_BIT;
731-
page_reset_referenced(address);
732-
}
733725
/* Transfer page changed & referenced bit to guest bits in pgste */
734726
pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
735727
/* Copy page access key and fetch protection bit to pgste */
@@ -740,19 +732,6 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
740732

741733
}
742734

743-
static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste,
744-
struct mm_struct *mm)
745-
{
746-
#ifdef CONFIG_PGSTE
747-
if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
748-
return pgste;
749-
/* Get referenced bit from storage key */
750-
if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
751-
pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
752-
#endif
753-
return pgste;
754-
}
755-
756735
static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
757736
struct mm_struct *mm)
758737
{
@@ -770,23 +749,30 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
770749
* key C/R to 0.
771750
*/
772751
nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
752+
nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
773753
page_set_storage_key(address, nkey, 0);
774754
#endif
775755
}
776756

777-
static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
757+
static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
778758
{
779-
if (!MACHINE_HAS_ESOP &&
780-
(pte_val(entry) & _PAGE_PRESENT) &&
781-
(pte_val(entry) & _PAGE_WRITE)) {
782-
/*
783-
* Without enhanced suppression-on-protection force
784-
* the dirty bit on for all writable ptes.
785-
*/
786-
pte_val(entry) |= _PAGE_DIRTY;
787-
pte_val(entry) &= ~_PAGE_PROTECT;
759+
if ((pte_val(entry) & _PAGE_PRESENT) &&
760+
(pte_val(entry) & _PAGE_WRITE) &&
761+
!(pte_val(entry) & _PAGE_INVALID)) {
762+
if (!MACHINE_HAS_ESOP) {
763+
/*
764+
* Without enhanced suppression-on-protection force
765+
* the dirty bit on for all writable ptes.
766+
*/
767+
pte_val(entry) |= _PAGE_DIRTY;
768+
pte_val(entry) &= ~_PAGE_PROTECT;
769+
}
770+
if (!(pte_val(entry) & _PAGE_PROTECT))
771+
/* This pte allows write access, set user-dirty */
772+
pgste_val(pgste) |= PGSTE_UC_BIT;
788773
}
789774
*ptep = entry;
775+
return pgste;
790776
}
791777

792778
/**
@@ -884,7 +870,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
884870
pgste = pgste_get_lock(ptep);
885871
pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
886872
pgste_set_key(ptep, pgste, entry, mm);
887-
pgste_set_pte(ptep, entry);
873+
pgste = pgste_set_pte(ptep, pgste, entry);
888874
pgste_set_unlock(ptep, pgste);
889875
} else {
890876
if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
@@ -1030,45 +1016,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
10301016
}
10311017
#endif
10321018

1033-
/*
1034-
* Get (and clear) the user dirty bit for a pte.
1035-
*/
1036-
static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1037-
pte_t *ptep)
1038-
{
1039-
pgste_t pgste;
1040-
int dirty = 0;
1041-
1042-
if (mm_has_pgste(mm)) {
1043-
pgste = pgste_get_lock(ptep);
1044-
pgste = pgste_update_all(ptep, pgste, mm);
1045-
dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
1046-
pgste_val(pgste) &= ~PGSTE_HC_BIT;
1047-
pgste_set_unlock(ptep, pgste);
1048-
return dirty;
1049-
}
1050-
return dirty;
1051-
}
1052-
1053-
/*
1054-
* Get (and clear) the user referenced bit for a pte.
1055-
*/
1056-
static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
1057-
pte_t *ptep)
1058-
{
1059-
pgste_t pgste;
1060-
int young = 0;
1061-
1062-
if (mm_has_pgste(mm)) {
1063-
pgste = pgste_get_lock(ptep);
1064-
pgste = pgste_update_young(ptep, pgste, mm);
1065-
young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
1066-
pgste_val(pgste) &= ~PGSTE_HR_BIT;
1067-
pgste_set_unlock(ptep, pgste);
1068-
}
1069-
return young;
1070-
}
1071-
10721019
static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
10731020
{
10741021
unsigned long pto = (unsigned long) ptep;
@@ -1131,6 +1078,36 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
11311078
atomic_sub(0x10000, &mm->context.attach_count);
11321079
}
11331080

1081+
/*
1082+
* Get (and clear) the user dirty bit for a pte.
1083+
*/
1084+
static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
1085+
unsigned long addr,
1086+
pte_t *ptep)
1087+
{
1088+
pgste_t pgste;
1089+
pte_t pte;
1090+
int dirty;
1091+
1092+
if (!mm_has_pgste(mm))
1093+
return 0;
1094+
pgste = pgste_get_lock(ptep);
1095+
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
1096+
pgste_val(pgste) &= ~PGSTE_UC_BIT;
1097+
pte = *ptep;
1098+
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
1099+
pgste = pgste_ipte_notify(mm, ptep, pgste);
1100+
__ptep_ipte(addr, ptep);
1101+
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1102+
pte_val(pte) |= _PAGE_PROTECT;
1103+
else
1104+
pte_val(pte) |= _PAGE_INVALID;
1105+
*ptep = pte;
1106+
}
1107+
pgste_set_unlock(ptep, pgste);
1108+
return dirty;
1109+
}
1110+
11341111
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11351112
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
11361113
unsigned long addr, pte_t *ptep)
@@ -1150,7 +1127,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
11501127
pte = pte_mkold(pte);
11511128

11521129
if (mm_has_pgste(vma->vm_mm)) {
1153-
pgste_set_pte(ptep, pte);
1130+
pgste = pgste_set_pte(ptep, pgste, pte);
11541131
pgste_set_unlock(ptep, pgste);
11551132
} else
11561133
*ptep = pte;
@@ -1233,7 +1210,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
12331210
if (mm_has_pgste(mm)) {
12341211
pgste = pgste_get(ptep);
12351212
pgste_set_key(ptep, pgste, pte, mm);
1236-
pgste_set_pte(ptep, pte);
1213+
pgste = pgste_set_pte(ptep, pgste, pte);
12371214
pgste_set_unlock(ptep, pgste);
12381215
} else
12391216
*ptep = pte;
@@ -1314,7 +1291,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
13141291
pte = pte_wrprotect(pte);
13151292

13161293
if (mm_has_pgste(mm)) {
1317-
pgste_set_pte(ptep, pte);
1294+
pgste = pgste_set_pte(ptep, pgste, pte);
13181295
pgste_set_unlock(ptep, pgste);
13191296
} else
13201297
*ptep = pte;
@@ -1339,7 +1316,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
13391316
ptep_flush_direct(vma->vm_mm, address, ptep);
13401317

13411318
if (mm_has_pgste(vma->vm_mm)) {
1342-
pgste_set_pte(ptep, entry);
1319+
pgste = pgste_set_pte(ptep, pgste, entry);
13431320
pgste_set_unlock(ptep, pgste);
13441321
} else
13451322
*ptep = entry;

arch/s390/mm/pgtable.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
832832
}
833833
spin_unlock(&gmap_notifier_lock);
834834
}
835+
EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
835836

836837
static inline int page_table_with_pgste(struct page *page)
837838
{
@@ -864,8 +865,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
864865
atomic_set(&page->_mapcount, 0);
865866
table = (unsigned long *) page_to_phys(page);
866867
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
867-
clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
868-
PAGE_SIZE/2);
868+
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
869869
return table;
870870
}
871871

@@ -1005,7 +1005,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
10051005
/* changing the guest storage key is considered a change of the page */
10061006
if ((pgste_val(new) ^ pgste_val(old)) &
10071007
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
1008-
pgste_val(new) |= PGSTE_HC_BIT;
1008+
pgste_val(new) |= PGSTE_UC_BIT;
10091009

10101010
pgste_set_unlock(ptep, new);
10111011
pte_unmap_unlock(*ptep, ptl);

0 commit comments

Comments
 (0)