Skip to content

Commit 6b2dbba

Browse files
walken-googletorvalds
authored andcommitted
mm: replace vma prio_tree with an interval tree
Implement an interval tree as a replacement for the VMA prio_tree. The algorithms are similar to lib/interval_tree.c; however that code can't be directly reused as the interval endpoints are not explicitly stored in the VMA. So instead, the common algorithm is moved into a template and the details (node type, how to get interval endpoints from the node, etc) are filled in using the C preprocessor. Once the interval tree functions are available, using them as a replacement to the VMA prio tree is a relatively simple, mechanical job. Signed-off-by: Michel Lespinasse <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Hillf Danton <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: David Woodhouse <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent fff3fd8 commit 6b2dbba

File tree

25 files changed

+357
-466
lines changed

25 files changed

+357
-466
lines changed

arch/arm/mm/fault-armv.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
134134
{
135135
struct mm_struct *mm = vma->vm_mm;
136136
struct vm_area_struct *mpnt;
137-
struct prio_tree_iter iter;
138137
unsigned long offset;
139138
pgoff_t pgoff;
140139
int aliases = 0;
@@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
147146
* cache coherency.
148147
*/
149148
flush_dcache_mmap_lock(mapping);
150-
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
149+
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
151150
/*
152151
* If this VMA is not in our MM, we can ignore it.
153152
* Note that we intentionally mask out the VMA

arch/arm/mm/flush.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
196196
{
197197
struct mm_struct *mm = current->active_mm;
198198
struct vm_area_struct *mpnt;
199-
struct prio_tree_iter iter;
200199
pgoff_t pgoff;
201200

202201
/*
@@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
208207
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
209208

210209
flush_dcache_mmap_lock(mapping);
211-
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
210+
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
212211
unsigned long offset;
213212

214213
/*

arch/parisc/kernel/cache.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page)
276276
{
277277
struct address_space *mapping = page_mapping(page);
278278
struct vm_area_struct *mpnt;
279-
struct prio_tree_iter iter;
280279
unsigned long offset;
281280
unsigned long addr, old_addr = 0;
282281
pgoff_t pgoff;
@@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page)
299298
* to flush one address here for them all to become coherent */
300299

301300
flush_dcache_mmap_lock(mapping);
302-
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
301+
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
303302
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
304303
addr = mpnt->vm_start + offset;
305304

arch/x86/mm/hugetlbpage.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
7171
struct address_space *mapping = vma->vm_file->f_mapping;
7272
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
7373
vma->vm_pgoff;
74-
struct prio_tree_iter iter;
7574
struct vm_area_struct *svma;
7675
unsigned long saddr;
7776
pte_t *spte = NULL;
@@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
8180
return (pte_t *)pmd_alloc(mm, pud, addr);
8281

8382
mutex_lock(&mapping->i_mmap_mutex);
84-
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
83+
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
8584
if (svma == vma)
8685
continue;
8786

fs/hugetlbfs/inode.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode)
397397
}
398398

399399
static inline void
400-
hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
400+
hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
401401
{
402402
struct vm_area_struct *vma;
403-
struct prio_tree_iter iter;
404403

405-
vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {
404+
vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) {
406405
unsigned long v_offset;
407406

408407
/*
409408
* Can the expression below overflow on 32-bit arches?
410-
* No, because the prio_tree returns us only those vmas
409+
* No, because the interval tree returns us only those vmas
411410
* which overlap the truncated area starting at pgoff,
412411
* and no vma on a 32-bit arch can span beyond the 4GB.
413412
*/
@@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
432431

433432
i_size_write(inode, offset);
434433
mutex_lock(&mapping->i_mmap_mutex);
435-
if (!prio_tree_empty(&mapping->i_mmap))
434+
if (!RB_EMPTY_ROOT(&mapping->i_mmap))
436435
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
437436
mutex_unlock(&mapping->i_mmap_mutex);
438437
truncate_hugepages(inode, offset);

fs/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping)
348348
mutex_init(&mapping->i_mmap_mutex);
349349
INIT_LIST_HEAD(&mapping->private_list);
350350
spin_lock_init(&mapping->private_lock);
351-
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
351+
mapping->i_mmap = RB_ROOT;
352352
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
353353
}
354354
EXPORT_SYMBOL(address_space_init_once);

include/linux/fs.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ struct inodes_stat_t {
401401
#include <linux/cache.h>
402402
#include <linux/list.h>
403403
#include <linux/radix-tree.h>
404-
#include <linux/prio_tree.h>
404+
#include <linux/rbtree.h>
405405
#include <linux/init.h>
406406
#include <linux/pid.h>
407407
#include <linux/bug.h>
@@ -669,7 +669,7 @@ struct address_space {
669669
struct radix_tree_root page_tree; /* radix tree of all pages */
670670
spinlock_t tree_lock; /* and lock protecting it */
671671
unsigned int i_mmap_writable;/* count VM_SHARED mappings */
672-
struct prio_tree_root i_mmap; /* tree of private and shared mappings */
672+
struct rb_root i_mmap; /* tree of private and shared mappings */
673673
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
674674
struct mutex i_mmap_mutex; /* protect tree, count, list */
675675
/* Protected by tree_lock together with the radix tree */
@@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
741741
*/
742742
static inline int mapping_mapped(struct address_space *mapping)
743743
{
744-
return !prio_tree_empty(&mapping->i_mmap) ||
744+
return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
745745
!list_empty(&mapping->i_mmap_nonlinear);
746746
}
747747

include/linux/interval_tree_tmpl.h

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
/*
2+
Interval Trees
3+
(C) 2012 Michel Lespinasse <[email protected]>
4+
5+
This program is free software; you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation; either version 2 of the License, or
8+
(at your option) any later version.
9+
10+
This program is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with this program; if not, write to the Free Software
17+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18+
19+
include/linux/interval_tree_tmpl.h
20+
*/
21+
22+
/*
23+
* Template for implementing interval trees
24+
*
25+
* ITSTRUCT: struct type of the interval tree nodes
26+
* ITRB: name of struct rb_node field within ITSTRUCT
27+
* ITTYPE: type of the interval endpoints
28+
* ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree
29+
* ITSTART(n): start endpoint of ITSTRUCT node n
30+
* ITLAST(n): last endpoing of ITSTRUCT node n
31+
* ITSTATIC: 'static' or empty
32+
* ITPREFIX: prefix to use for the inline tree definitions
33+
*/
34+
35+
/* IT(name) -> ITPREFIX_name */
36+
#define _ITNAME(prefix, name) prefix ## _ ## name
37+
#define ITNAME(prefix, name) _ITNAME(prefix, name)
38+
#define IT(name) ITNAME(ITPREFIX, name)
39+
40+
/* Callbacks for augmented rbtree insert and remove */
41+
42+
static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node)
43+
{
44+
ITTYPE max = ITLAST(node), subtree_last;
45+
if (node->ITRB.rb_left) {
46+
subtree_last = rb_entry(node->ITRB.rb_left,
47+
ITSTRUCT, ITRB)->ITSUBTREE;
48+
if (max < subtree_last)
49+
max = subtree_last;
50+
}
51+
if (node->ITRB.rb_right) {
52+
subtree_last = rb_entry(node->ITRB.rb_right,
53+
ITSTRUCT, ITRB)->ITSUBTREE;
54+
if (max < subtree_last)
55+
max = subtree_last;
56+
}
57+
return max;
58+
}
59+
60+
static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
61+
{
62+
while (rb != stop) {
63+
ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB);
64+
ITTYPE subtree_last = IT(compute_subtree_last)(node);
65+
if (node->ITSUBTREE == subtree_last)
66+
break;
67+
node->ITSUBTREE = subtree_last;
68+
rb = rb_parent(&node->ITRB);
69+
}
70+
}
71+
72+
static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
73+
{
74+
ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
75+
ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
76+
77+
new->ITSUBTREE = old->ITSUBTREE;
78+
}
79+
80+
static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new)
81+
{
82+
ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
83+
ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
84+
85+
new->ITSUBTREE = old->ITSUBTREE;
86+
old->ITSUBTREE = IT(compute_subtree_last)(old);
87+
}
88+
89+
static const struct rb_augment_callbacks IT(augment_callbacks) = {
90+
IT(augment_propagate), IT(augment_copy), IT(augment_rotate)
91+
};
92+
93+
/* Insert / remove interval nodes from the tree */
94+
95+
ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root)
96+
{
97+
struct rb_node **link = &root->rb_node, *rb_parent = NULL;
98+
ITTYPE start = ITSTART(node), last = ITLAST(node);
99+
ITSTRUCT *parent;
100+
101+
while (*link) {
102+
rb_parent = *link;
103+
parent = rb_entry(rb_parent, ITSTRUCT, ITRB);
104+
if (parent->ITSUBTREE < last)
105+
parent->ITSUBTREE = last;
106+
if (start < ITSTART(parent))
107+
link = &parent->ITRB.rb_left;
108+
else
109+
link = &parent->ITRB.rb_right;
110+
}
111+
112+
node->ITSUBTREE = last;
113+
rb_link_node(&node->ITRB, rb_parent, link);
114+
rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks));
115+
}
116+
117+
ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root)
118+
{
119+
rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks));
120+
}
121+
122+
/*
123+
* Iterate over intervals intersecting [start;last]
124+
*
125+
* Note that a node's interval intersects [start;last] iff:
126+
* Cond1: ITSTART(node) <= last
127+
* and
128+
* Cond2: start <= ITLAST(node)
129+
*/
130+
131+
static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
132+
{
133+
while (true) {
134+
/*
135+
* Loop invariant: start <= node->ITSUBTREE
136+
* (Cond2 is satisfied by one of the subtree nodes)
137+
*/
138+
if (node->ITRB.rb_left) {
139+
ITSTRUCT *left = rb_entry(node->ITRB.rb_left,
140+
ITSTRUCT, ITRB);
141+
if (start <= left->ITSUBTREE) {
142+
/*
143+
* Some nodes in left subtree satisfy Cond2.
144+
* Iterate to find the leftmost such node N.
145+
* If it also satisfies Cond1, that's the match
146+
* we are looking for. Otherwise, there is no
147+
* matching interval as nodes to the right of N
148+
* can't satisfy Cond1 either.
149+
*/
150+
node = left;
151+
continue;
152+
}
153+
}
154+
if (ITSTART(node) <= last) { /* Cond1 */
155+
if (start <= ITLAST(node)) /* Cond2 */
156+
return node; /* node is leftmost match */
157+
if (node->ITRB.rb_right) {
158+
node = rb_entry(node->ITRB.rb_right,
159+
ITSTRUCT, ITRB);
160+
if (start <= node->ITSUBTREE)
161+
continue;
162+
}
163+
}
164+
return NULL; /* No match */
165+
}
166+
}
167+
168+
ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root,
169+
ITTYPE start, ITTYPE last)
170+
{
171+
ITSTRUCT *node;
172+
173+
if (!root->rb_node)
174+
return NULL;
175+
node = rb_entry(root->rb_node, ITSTRUCT, ITRB);
176+
if (node->ITSUBTREE < start)
177+
return NULL;
178+
return IT(subtree_search)(node, start, last);
179+
}
180+
181+
ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
182+
{
183+
struct rb_node *rb = node->ITRB.rb_right, *prev;
184+
185+
while (true) {
186+
/*
187+
* Loop invariants:
188+
* Cond1: ITSTART(node) <= last
189+
* rb == node->ITRB.rb_right
190+
*
191+
* First, search right subtree if suitable
192+
*/
193+
if (rb) {
194+
ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB);
195+
if (start <= right->ITSUBTREE)
196+
return IT(subtree_search)(right, start, last);
197+
}
198+
199+
/* Move up the tree until we come from a node's left child */
200+
do {
201+
rb = rb_parent(&node->ITRB);
202+
if (!rb)
203+
return NULL;
204+
prev = &node->ITRB;
205+
node = rb_entry(rb, ITSTRUCT, ITRB);
206+
rb = node->ITRB.rb_right;
207+
} while (prev == rb);
208+
209+
/* Check if the node intersects [start;last] */
210+
if (last < ITSTART(node)) /* !Cond1 */
211+
return NULL;
212+
else if (start <= ITLAST(node)) /* Cond2 */
213+
return node;
214+
}
215+
}

include/linux/mm.h

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include <linux/list.h>
1111
#include <linux/mmzone.h>
1212
#include <linux/rbtree.h>
13-
#include <linux/prio_tree.h>
1413
#include <linux/atomic.h>
1514
#include <linux/debug_locks.h>
1615
#include <linux/mm_types.h>
@@ -1355,22 +1354,27 @@ extern void zone_pcp_reset(struct zone *zone);
13551354
extern atomic_long_t mmap_pages_allocated;
13561355
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
13571356

1358-
/* prio_tree.c */
1359-
void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
1360-
void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
1361-
void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
1362-
struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
1363-
struct prio_tree_iter *iter);
1364-
1365-
#define vma_prio_tree_foreach(vma, iter, root, begin, end) \
1366-
for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \
1367-
(vma = vma_prio_tree_next(vma, iter)); )
1357+
/* interval_tree.c */
1358+
void vma_interval_tree_add(struct vm_area_struct *vma,
1359+
struct vm_area_struct *old,
1360+
struct address_space *mapping);
1361+
void vma_interval_tree_insert(struct vm_area_struct *node,
1362+
struct rb_root *root);
1363+
void vma_interval_tree_remove(struct vm_area_struct *node,
1364+
struct rb_root *root);
1365+
struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
1366+
unsigned long start, unsigned long last);
1367+
struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
1368+
unsigned long start, unsigned long last);
1369+
1370+
#define vma_interval_tree_foreach(vma, root, start, last) \
1371+
for (vma = vma_interval_tree_iter_first(root, start, last); \
1372+
vma; vma = vma_interval_tree_iter_next(vma, start, last))
13681373

13691374
static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
13701375
struct list_head *list)
13711376
{
1372-
vma->shared.vm_set.parent = NULL;
1373-
list_add_tail(&vma->shared.vm_set.list, list);
1377+
list_add_tail(&vma->shared.nonlinear, list);
13741378
}
13751379

13761380
/* mmap.c */

0 commit comments

Comments
 (0)