Skip to content

Commit e86c59b

Browse files
Claudio Imbrendatorvalds
authored andcommitted
mm/ksm: improve deduplication of zero pages with colouring
Some architectures have a set of zero pages (coloured zero pages) instead of only one zero page, in order to improve the cache performance. In those cases, the kernel samepage merger (KSM) would merge all the allocated pages that happen to be filled with zeroes to the same deduplicated page, thus losing all the advantages of coloured zero pages. This behaviour is noticeable when a process accesses large arrays of allocated pages containing zeroes. A test I conducted on s390 shows that there is a speed penalty when KSM merges such pages, compared to not merging them or using actual zero pages from the start without breaking the COW. This patch fixes this behaviour. When coloured zero pages are present, the checksum of a zero page is calculated during initialisation, and compared with the checksum of the current canditate during merging. In case of a match, the normal merging routine is used to merge the page with the correct coloured zero page, which ensures the candidate page is checked to be equal to the target zero page. A sysfs entry is also added to toggle this behaviour, since it can potentially introduce performance regressions, especially on architectures without coloured zero pages. The default value is disabled, for backwards compatibility. With this patch, the performance with KSM is the same as with non COW-broken actual zero pages, which is also the same as without KSM. [[email protected]: make zero_checksum and ksm_use_zero_pages __read_mostly, per Andrea] [[email protected]: documentation for coloured zero pages deduplication] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Claudio Imbrenda <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Andrea Arcangeli <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 8d4a017 commit e86c59b

File tree

2 files changed

+79
-3
lines changed

2 files changed

+79
-3
lines changed

Documentation/vm/ksm.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,20 @@ run - set 0 to stop ksmd from running but keep merged pages,
8080
Default: 0 (must be changed to 1 to activate KSM,
8181
except if CONFIG_SYSFS is disabled)
8282

83+
use_zero_pages - specifies whether empty pages (i.e. allocated pages
84+
that only contain zeroes) should be treated specially.
85+
When set to 1, empty pages are merged with the kernel
86+
zero page(s) instead of with each other as it would
87+
happen normally. This can improve the performance on
88+
architectures with coloured zero pages, depending on
89+
the workload. Care should be taken when enabling this
90+
setting, as it can potentially degrade the performance
91+
of KSM for some workloads, for example if the checksums
92+
of pages candidate for merging match the checksum of
93+
an empty page. This setting can be changed at any time,
94+
it is only effective for pages merged after the change.
95+
Default: 0 (normal KSM behaviour as in earlier releases)
96+
8397
The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
8498

8599
pages_shared - how many shared pages are being used

mm/ksm.c

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,12 @@ static unsigned int ksm_thread_pages_to_scan = 100;
223223
/* Milliseconds ksmd should sleep between batches */
224224
static unsigned int ksm_thread_sleep_millisecs = 20;
225225

226+
/* Checksum of an empty (zeroed) page */
227+
static unsigned int zero_checksum __read_mostly;
228+
229+
/* Whether to merge empty (zeroed) pages with actual zero pages */
230+
static bool ksm_use_zero_pages __read_mostly;
231+
226232
#ifdef CONFIG_NUMA
227233
/* Zeroed when merging across nodes is not allowed */
228234
static unsigned int ksm_merge_across_nodes = 1;
@@ -926,6 +932,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
926932
struct mm_struct *mm = vma->vm_mm;
927933
pmd_t *pmd;
928934
pte_t *ptep;
935+
pte_t newpte;
929936
spinlock_t *ptl;
930937
unsigned long addr;
931938
int err = -EFAULT;
@@ -950,12 +957,22 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
950957
goto out_mn;
951958
}
952959

953-
get_page(kpage);
954-
page_add_anon_rmap(kpage, vma, addr, false);
960+
/*
961+
* No need to check ksm_use_zero_pages here: we can only have a
962+
* zero_page here if ksm_use_zero_pages was enabled alreaady.
963+
*/
964+
if (!is_zero_pfn(page_to_pfn(kpage))) {
965+
get_page(kpage);
966+
page_add_anon_rmap(kpage, vma, addr, false);
967+
newpte = mk_pte(kpage, vma->vm_page_prot);
968+
} else {
969+
newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
970+
vma->vm_page_prot));
971+
}
955972

956973
flush_cache_page(vma, addr, pte_pfn(*ptep));
957974
ptep_clear_flush_notify(vma, addr, ptep);
958-
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
975+
set_pte_at_notify(mm, addr, ptep, newpte);
959976

960977
page_remove_rmap(page, false);
961978
if (!page_mapped(page))
@@ -1467,6 +1484,23 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
14671484
return;
14681485
}
14691486

1487+
/*
1488+
* Same checksum as an empty page. We attempt to merge it with the
1489+
* appropriate zero page if the user enabled this via sysfs.
1490+
*/
1491+
if (ksm_use_zero_pages && (checksum == zero_checksum)) {
1492+
struct vm_area_struct *vma;
1493+
1494+
vma = find_mergeable_vma(rmap_item->mm, rmap_item->address);
1495+
err = try_to_merge_one_page(vma, page,
1496+
ZERO_PAGE(rmap_item->address));
1497+
/*
1498+
* In case of failure, the page was not really empty, so we
1499+
* need to continue. Otherwise we're done.
1500+
*/
1501+
if (!err)
1502+
return;
1503+
}
14701504
tree_rmap_item =
14711505
unstable_tree_search_insert(rmap_item, page, &tree_page);
14721506
if (tree_rmap_item) {
@@ -2233,6 +2267,28 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
22332267
KSM_ATTR(merge_across_nodes);
22342268
#endif
22352269

2270+
static ssize_t use_zero_pages_show(struct kobject *kobj,
2271+
struct kobj_attribute *attr, char *buf)
2272+
{
2273+
return sprintf(buf, "%u\n", ksm_use_zero_pages);
2274+
}
2275+
static ssize_t use_zero_pages_store(struct kobject *kobj,
2276+
struct kobj_attribute *attr,
2277+
const char *buf, size_t count)
2278+
{
2279+
int err;
2280+
bool value;
2281+
2282+
err = kstrtobool(buf, &value);
2283+
if (err)
2284+
return -EINVAL;
2285+
2286+
ksm_use_zero_pages = value;
2287+
2288+
return count;
2289+
}
2290+
KSM_ATTR(use_zero_pages);
2291+
22362292
static ssize_t pages_shared_show(struct kobject *kobj,
22372293
struct kobj_attribute *attr, char *buf)
22382294
{
@@ -2290,6 +2346,7 @@ static struct attribute *ksm_attrs[] = {
22902346
#ifdef CONFIG_NUMA
22912347
&merge_across_nodes_attr.attr,
22922348
#endif
2349+
&use_zero_pages_attr.attr,
22932350
NULL,
22942351
};
22952352

@@ -2304,6 +2361,11 @@ static int __init ksm_init(void)
23042361
struct task_struct *ksm_thread;
23052362
int err;
23062363

2364+
/* The correct value depends on page size and endianness */
2365+
zero_checksum = calc_checksum(ZERO_PAGE(0));
2366+
/* Default to false for backwards compatibility */
2367+
ksm_use_zero_pages = false;
2368+
23072369
err = ksm_slab_init();
23082370
if (err)
23092371
goto out;

0 commit comments

Comments
 (0)