Skip to content

Commit 828502d

Browse files
Izik Eidustorvalds
authored andcommitted
ksm: add mmu_notifier set_pte_at_notify()
KSM is a linux driver that allows dynamicly sharing identical memory pages between one or more processes. Unlike tradtional page sharing that is made at the allocation of the memory, ksm do it dynamicly after the memory was created. Memory is periodically scanned; identical pages are identified and merged. The sharing is made in a transparent way to the processes that use it. Ksm is highly important for hypervisors (kvm), where in production enviorments there might be many copys of the same data data among the host memory. This kind of data can be: similar kernels, librarys, cache, and so on. Even that ksm was wrote for kvm, any userspace application that want to use it to share its data can try it. Ksm may be useful for any application that might have similar (page aligment) data strctures among the memory, ksm will find this data merge it to one copy, and even if it will be changed and thereforew copy on writed, ksm will merge it again as soon as it will be identical again. Another reason to consider using ksm is the fact that it might simplify alot the userspace code of application that want to use shared private data, instead that the application will mange shared area, ksm will do this for the application, and even write to this data will be allowed without any synchinization acts from the application. Ksm was designed to be a loadable module that doesn't change the VM code of linux. This patch: The set_pte_at_notify() macro allows setting a pte in the shadow page table directly, instead of flushing the shadow page table entry and then getting vmexit to set it. It uses a new change_pte() callback to do so. set_pte_at_notify() is an optimization for kvm, and other users of mmu_notifiers, for COW pages. It is useful for kvm when ksm is used, because it allows kvm not to have to receive vmexit and only then map the ksm page into the shadow page table, but instead map it directly at the same time as Linux maps the page into the host page table. Users of mmu_notifiers who don't implement new mmu_notifier_change_pte() callback will just receive the mmu_notifier_invalidate_page() callback. Signed-off-by: Izik Eidus <[email protected]> Signed-off-by: Chris Wright <[email protected]> Signed-off-by: Hugh Dickins <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Wu Fengguang <[email protected]> Cc: Balbir Singh <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: KAMEZAWA Hiroyuki <[email protected]> Cc: Lee Schermerhorn <[email protected]> Cc: Avi Kivity <[email protected]> Cc: Nick Piggin <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 451ea25 commit 828502d

File tree

3 files changed

+61
-2
lines changed

3 files changed

+61
-2
lines changed

include/linux/mmu_notifier.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,15 @@ struct mmu_notifier_ops {
6161
struct mm_struct *mm,
6262
unsigned long address);
6363

64+
/*
65+
* change_pte is called in cases that pte mapping to page is changed:
66+
* for example, when ksm remaps pte to point to a new shared page.
67+
*/
68+
void (*change_pte)(struct mmu_notifier *mn,
69+
struct mm_struct *mm,
70+
unsigned long address,
71+
pte_t pte);
72+
6473
/*
6574
* Before this is invoked any secondary MMU is still ok to
6675
* read/write to the page previously pointed to by the Linux
@@ -154,6 +163,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
154163
extern void __mmu_notifier_release(struct mm_struct *mm);
155164
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
156165
unsigned long address);
166+
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
167+
unsigned long address, pte_t pte);
157168
extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
158169
unsigned long address);
159170
extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
@@ -175,6 +186,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
175186
return 0;
176187
}
177188

189+
static inline void mmu_notifier_change_pte(struct mm_struct *mm,
190+
unsigned long address, pte_t pte)
191+
{
192+
if (mm_has_notifiers(mm))
193+
__mmu_notifier_change_pte(mm, address, pte);
194+
}
195+
178196
static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
179197
unsigned long address)
180198
{
@@ -236,6 +254,16 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
236254
__young; \
237255
})
238256

257+
#define set_pte_at_notify(__mm, __address, __ptep, __pte) \
258+
({ \
259+
struct mm_struct *___mm = __mm; \
260+
unsigned long ___address = __address; \
261+
pte_t ___pte = __pte; \
262+
\
263+
set_pte_at(___mm, ___address, __ptep, ___pte); \
264+
mmu_notifier_change_pte(___mm, ___address, ___pte); \
265+
})
266+
239267
#else /* CONFIG_MMU_NOTIFIER */
240268

241269
static inline void mmu_notifier_release(struct mm_struct *mm)
@@ -248,6 +276,11 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
248276
return 0;
249277
}
250278

279+
static inline void mmu_notifier_change_pte(struct mm_struct *mm,
280+
unsigned long address, pte_t pte)
281+
{
282+
}
283+
251284
static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
252285
unsigned long address)
253286
{
@@ -273,6 +306,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
273306

274307
#define ptep_clear_flush_young_notify ptep_clear_flush_young
275308
#define ptep_clear_flush_notify ptep_clear_flush
309+
#define set_pte_at_notify set_pte_at
276310

277311
#endif /* CONFIG_MMU_NOTIFIER */
278312

mm/memory.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2115,9 +2115,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
21152115
* seen in the presence of one thread doing SMC and another
21162116
* thread doing COW.
21172117
*/
2118-
ptep_clear_flush_notify(vma, address, page_table);
2118+
ptep_clear_flush(vma, address, page_table);
21192119
page_add_new_anon_rmap(new_page, vma, address);
2120-
set_pte_at(mm, address, page_table, entry);
2120+
/*
2121+
* We call the notify macro here because, when using secondary
2122+
* mmu page tables (such as kvm shadow page tables), we want the
2123+
* new page to be mapped directly into the secondary page table.
2124+
*/
2125+
set_pte_at_notify(mm, address, page_table, entry);
21212126
update_mmu_cache(vma, address, entry);
21222127
if (old_page) {
21232128
/*

mm/mmu_notifier.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,26 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
9999
return young;
100100
}
101101

102+
void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
103+
pte_t pte)
104+
{
105+
struct mmu_notifier *mn;
106+
struct hlist_node *n;
107+
108+
rcu_read_lock();
109+
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
110+
if (mn->ops->change_pte)
111+
mn->ops->change_pte(mn, mm, address, pte);
112+
/*
113+
* Some drivers don't have change_pte,
114+
* so we must call invalidate_page in that case.
115+
*/
116+
else if (mn->ops->invalidate_page)
117+
mn->ops->invalidate_page(mn, mm, address);
118+
}
119+
rcu_read_unlock();
120+
}
121+
102122
void __mmu_notifier_invalidate_page(struct mm_struct *mm,
103123
unsigned long address)
104124
{

0 commit comments

Comments
 (0)