Skip to content

Commit 0cd6144

Browse files
hnaztorvalds
authored andcommitted
mm + fs: prepare for non-page entries in page cache radix trees
shmem mappings already contain exceptional entries where swap slot information is remembered. To be able to store eviction information for regular page cache, prepare every site dealing with the radix trees directly to handle entries other than pages. The common lookup functions will filter out non-page entries and return NULL for page cache holes, just as before. But provide a raw version of the API which returns non-page entries as well, and switch shmem over to use it. Signed-off-by: Johannes Weiner <[email protected]> Reviewed-by: Rik van Riel <[email protected]> Reviewed-by: Minchan Kim <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Bob Liu <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Dave Chinner <[email protected]> Cc: Greg Thelen <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Jan Kara <[email protected]> Cc: KOSAKI Motohiro <[email protected]> Cc: Luigi Semenzato <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Metin Doslu <[email protected]> Cc: Michel Lespinasse <[email protected]> Cc: Ozgun Erdogan <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Roman Gushchin <[email protected]> Cc: Ryan Mallon <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Vlastimil Babka <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent e7b563b commit 0cd6144

File tree

11 files changed

+349
-130
lines changed

11 files changed

+349
-130
lines changed

fs/btrfs/compression.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
472472
rcu_read_lock();
473473
page = radix_tree_lookup(&mapping->page_tree, pg_index);
474474
rcu_read_unlock();
475-
if (page) {
475+
if (page && !radix_tree_exceptional_entry(page)) {
476476
misses++;
477477
if (misses > 4)
478478
break;

include/linux/mm.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,14 @@ extern void show_free_areas(unsigned int flags);
10411041
extern bool skip_free_areas_node(unsigned int flags, int nid);
10421042

10431043
int shmem_zero_setup(struct vm_area_struct *);
1044+
#ifdef CONFIG_SHMEM
1045+
bool shmem_mapping(struct address_space *mapping);
1046+
#else
1047+
static inline bool shmem_mapping(struct address_space *mapping)
1048+
{
1049+
return false;
1050+
}
1051+
#endif
10441052

10451053
extern int can_do_mlock(void);
10461054
extern int user_shm_lock(size_t, struct user_struct *);

include/linux/pagemap.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,12 +248,15 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
248248
pgoff_t page_cache_prev_hole(struct address_space *mapping,
249249
pgoff_t index, unsigned long max_scan);
250250

251-
extern struct page * find_get_page(struct address_space *mapping,
252-
pgoff_t index);
253-
extern struct page * find_lock_page(struct address_space *mapping,
254-
pgoff_t index);
255-
extern struct page * find_or_create_page(struct address_space *mapping,
256-
pgoff_t index, gfp_t gfp_mask);
251+
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
252+
struct page *find_get_page(struct address_space *mapping, pgoff_t offset);
253+
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
254+
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset);
255+
struct page *find_or_create_page(struct address_space *mapping, pgoff_t index,
256+
gfp_t gfp_mask);
257+
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
258+
unsigned int nr_entries, struct page **entries,
259+
pgoff_t *indices);
257260
unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
258261
unsigned int nr_pages, struct page **pages);
259262
unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,

include/linux/pagevec.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ struct pagevec {
2222

2323
void __pagevec_release(struct pagevec *pvec);
2424
void __pagevec_lru_add(struct pagevec *pvec);
25+
unsigned pagevec_lookup_entries(struct pagevec *pvec,
26+
struct address_space *mapping,
27+
pgoff_t start, unsigned nr_entries,
28+
pgoff_t *indices);
29+
void pagevec_remove_exceptionals(struct pagevec *pvec);
2530
unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
2631
pgoff_t start, unsigned nr_pages);
2732
unsigned pagevec_lookup_tag(struct pagevec *pvec,

include/linux/shmem_fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
5151
unsigned long flags);
5252
extern int shmem_zero_setup(struct vm_area_struct *);
5353
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
54+
extern bool shmem_mapping(struct address_space *mapping);
5455
extern void shmem_unlock_mapping(struct address_space *mapping);
5556
extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
5657
pgoff_t index, gfp_t gfp_mask);

mm/filemap.c

Lines changed: 178 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,29 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
446446
}
447447
EXPORT_SYMBOL_GPL(replace_page_cache_page);
448448

449+
static int page_cache_tree_insert(struct address_space *mapping,
450+
struct page *page)
451+
{
452+
void **slot;
453+
int error;
454+
455+
slot = radix_tree_lookup_slot(&mapping->page_tree, page->index);
456+
if (slot) {
457+
void *p;
458+
459+
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
460+
if (!radix_tree_exceptional_entry(p))
461+
return -EEXIST;
462+
radix_tree_replace_slot(slot, page);
463+
mapping->nrpages++;
464+
return 0;
465+
}
466+
error = radix_tree_insert(&mapping->page_tree, page->index, page);
467+
if (!error)
468+
mapping->nrpages++;
469+
return error;
470+
}
471+
449472
/**
450473
* add_to_page_cache_locked - add a locked page to the pagecache
451474
* @page: page to add
@@ -480,11 +503,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
480503
page->index = offset;
481504

482505
spin_lock_irq(&mapping->tree_lock);
483-
error = radix_tree_insert(&mapping->page_tree, offset, page);
506+
error = page_cache_tree_insert(mapping, page);
484507
radix_tree_preload_end();
485508
if (unlikely(error))
486509
goto err_insert;
487-
mapping->nrpages++;
488510
__inc_zone_page_state(page, NR_FILE_PAGES);
489511
spin_unlock_irq(&mapping->tree_lock);
490512
trace_mm_filemap_add_to_page_cache(page);
@@ -712,7 +734,10 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
712734
unsigned long i;
713735

714736
for (i = 0; i < max_scan; i++) {
715-
if (!radix_tree_lookup(&mapping->page_tree, index))
737+
struct page *page;
738+
739+
page = radix_tree_lookup(&mapping->page_tree, index);
740+
if (!page || radix_tree_exceptional_entry(page))
716741
break;
717742
index++;
718743
if (index == 0)
@@ -750,7 +775,10 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
750775
unsigned long i;
751776

752777
for (i = 0; i < max_scan; i++) {
753-
if (!radix_tree_lookup(&mapping->page_tree, index))
778+
struct page *page;
779+
780+
page = radix_tree_lookup(&mapping->page_tree, index);
781+
if (!page || radix_tree_exceptional_entry(page))
754782
break;
755783
index--;
756784
if (index == ULONG_MAX)
@@ -762,14 +790,19 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
762790
EXPORT_SYMBOL(page_cache_prev_hole);
763791

764792
/**
765-
* find_get_page - find and get a page reference
793+
* find_get_entry - find and get a page cache entry
766794
* @mapping: the address_space to search
767-
* @offset: the page index
795+
* @offset: the page cache index
796+
*
797+
* Looks up the page cache slot at @mapping & @offset. If there is a
798+
* page cache page, it is returned with an increased refcount.
768799
*
769-
* Is there a pagecache struct page at the given (mapping, offset) tuple?
770-
* If yes, increment its refcount and return it; if no, return NULL.
800+
* If the slot holds a shadow entry of a previously evicted page, it
801+
* is returned.
802+
*
803+
* Otherwise, %NULL is returned.
771804
*/
772-
struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
805+
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
773806
{
774807
void **pagep;
775808
struct page *page;
@@ -810,24 +843,50 @@ struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
810843

811844
return page;
812845
}
813-
EXPORT_SYMBOL(find_get_page);
846+
EXPORT_SYMBOL(find_get_entry);
814847

815848
/**
816-
* find_lock_page - locate, pin and lock a pagecache page
849+
* find_get_page - find and get a page reference
817850
* @mapping: the address_space to search
818851
* @offset: the page index
819852
*
820-
* Locates the desired pagecache page, locks it, increments its reference
821-
* count and returns its address.
853+
* Looks up the page cache slot at @mapping & @offset. If there is a
854+
* page cache page, it is returned with an increased refcount.
822855
*
823-
* Returns zero if the page was not present. find_lock_page() may sleep.
856+
* Otherwise, %NULL is returned.
824857
*/
825-
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
858+
struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
859+
{
860+
struct page *page = find_get_entry(mapping, offset);
861+
862+
if (radix_tree_exceptional_entry(page))
863+
page = NULL;
864+
return page;
865+
}
866+
EXPORT_SYMBOL(find_get_page);
867+
868+
/**
869+
* find_lock_entry - locate, pin and lock a page cache entry
870+
* @mapping: the address_space to search
871+
* @offset: the page cache index
872+
*
873+
* Looks up the page cache slot at @mapping & @offset. If there is a
874+
* page cache page, it is returned locked and with an increased
875+
* refcount.
876+
*
877+
* If the slot holds a shadow entry of a previously evicted page, it
878+
* is returned.
879+
*
880+
* Otherwise, %NULL is returned.
881+
*
882+
* find_lock_entry() may sleep.
883+
*/
884+
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
826885
{
827886
struct page *page;
828887

829888
repeat:
830-
page = find_get_page(mapping, offset);
889+
page = find_get_entry(mapping, offset);
831890
if (page && !radix_tree_exception(page)) {
832891
lock_page(page);
833892
/* Has the page been truncated? */
@@ -840,6 +899,29 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
840899
}
841900
return page;
842901
}
902+
EXPORT_SYMBOL(find_lock_entry);
903+
904+
/**
905+
* find_lock_page - locate, pin and lock a pagecache page
906+
* @mapping: the address_space to search
907+
* @offset: the page index
908+
*
909+
* Looks up the page cache slot at @mapping & @offset. If there is a
910+
* page cache page, it is returned locked and with an increased
911+
* refcount.
912+
*
913+
* Otherwise, %NULL is returned.
914+
*
915+
* find_lock_page() may sleep.
916+
*/
917+
struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
918+
{
919+
struct page *page = find_lock_entry(mapping, offset);
920+
921+
if (radix_tree_exceptional_entry(page))
922+
page = NULL;
923+
return page;
924+
}
843925
EXPORT_SYMBOL(find_lock_page);
844926

845927
/**
@@ -848,16 +930,18 @@ EXPORT_SYMBOL(find_lock_page);
848930
* @index: the page's index into the mapping
849931
* @gfp_mask: page allocation mode
850932
*
851-
* Locates a page in the pagecache. If the page is not present, a new page
852-
* is allocated using @gfp_mask and is added to the pagecache and to the VM's
853-
* LRU list. The returned page is locked and has its reference count
854-
* incremented.
933+
* Looks up the page cache slot at @mapping & @offset. If there is a
934+
* page cache page, it is returned locked and with an increased
935+
* refcount.
936+
*
937+
* If the page is not present, a new page is allocated using @gfp_mask
938+
* and added to the page cache and the VM's LRU list. The page is
939+
* returned locked and with an increased refcount.
855940
*
856-
* find_or_create_page() may sleep, even if @gfp_flags specifies an atomic
857-
* allocation!
941+
* On memory exhaustion, %NULL is returned.
858942
*
859-
* find_or_create_page() returns the desired page's address, or zero on
860-
* memory exhaustion.
943+
* find_or_create_page() may sleep, even if @gfp_flags specifies an
944+
* atomic allocation!
861945
*/
862946
struct page *find_or_create_page(struct address_space *mapping,
863947
pgoff_t index, gfp_t gfp_mask)
@@ -889,6 +973,76 @@ struct page *find_or_create_page(struct address_space *mapping,
889973
}
890974
EXPORT_SYMBOL(find_or_create_page);
891975

976+
/**
977+
* find_get_entries - gang pagecache lookup
978+
* @mapping: The address_space to search
979+
* @start: The starting page cache index
980+
* @nr_entries: The maximum number of entries
981+
* @entries: Where the resulting entries are placed
982+
* @indices: The cache indices corresponding to the entries in @entries
983+
*
984+
* find_get_entries() will search for and return a group of up to
985+
* @nr_entries entries in the mapping. The entries are placed at
986+
* @entries. find_get_entries() takes a reference against any actual
987+
* pages it returns.
988+
*
989+
* The search returns a group of mapping-contiguous page cache entries
990+
* with ascending indexes. There may be holes in the indices due to
991+
* not-present pages.
992+
*
993+
* Any shadow entries of evicted pages are included in the returned
994+
* array.
995+
*
996+
* find_get_entries() returns the number of pages and shadow entries
997+
* which were found.
998+
*/
999+
unsigned find_get_entries(struct address_space *mapping,
1000+
pgoff_t start, unsigned int nr_entries,
1001+
struct page **entries, pgoff_t *indices)
1002+
{
1003+
void **slot;
1004+
unsigned int ret = 0;
1005+
struct radix_tree_iter iter;
1006+
1007+
if (!nr_entries)
1008+
return 0;
1009+
1010+
rcu_read_lock();
1011+
restart:
1012+
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1013+
struct page *page;
1014+
repeat:
1015+
page = radix_tree_deref_slot(slot);
1016+
if (unlikely(!page))
1017+
continue;
1018+
if (radix_tree_exception(page)) {
1019+
if (radix_tree_deref_retry(page))
1020+
goto restart;
1021+
/*
1022+
* Otherwise, we must be storing a swap entry
1023+
* here as an exceptional entry: so return it
1024+
* without attempting to raise page count.
1025+
*/
1026+
goto export;
1027+
}
1028+
if (!page_cache_get_speculative(page))
1029+
goto repeat;
1030+
1031+
/* Has the page moved? */
1032+
if (unlikely(page != *slot)) {
1033+
page_cache_release(page);
1034+
goto repeat;
1035+
}
1036+
export:
1037+
indices[ret] = iter.index;
1038+
entries[ret] = page;
1039+
if (++ret == nr_entries)
1040+
break;
1041+
}
1042+
rcu_read_unlock();
1043+
return ret;
1044+
}
1045+
8921046
/**
8931047
* find_get_pages - gang pagecache lookup
8941048
* @mapping: The address_space to search

mm/mincore.c

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,21 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
7070
* any other file mapping (ie. marked !present and faulted in with
7171
* tmpfs's .fault). So swapped out tmpfs mappings are tested here.
7272
*/
73-
page = find_get_page(mapping, pgoff);
7473
#ifdef CONFIG_SWAP
75-
/* shmem/tmpfs may return swap: account for swapcache page too. */
76-
if (radix_tree_exceptional_entry(page)) {
77-
swp_entry_t swap = radix_to_swp_entry(page);
78-
page = find_get_page(swap_address_space(swap), swap.val);
79-
}
74+
if (shmem_mapping(mapping)) {
75+
page = find_get_entry(mapping, pgoff);
76+
/*
77+
* shmem/tmpfs may return swap: account for swapcache
78+
* page too.
79+
*/
80+
if (radix_tree_exceptional_entry(page)) {
81+
swp_entry_t swp = radix_to_swp_entry(page);
82+
page = find_get_page(swap_address_space(swp), swp.val);
83+
}
84+
} else
85+
page = find_get_page(mapping, pgoff);
86+
#else
87+
page = find_get_page(mapping, pgoff);
8088
#endif
8189
if (page) {
8290
present = PageUptodate(page);

mm/readahead.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
179179
rcu_read_lock();
180180
page = radix_tree_lookup(&mapping->page_tree, page_offset);
181181
rcu_read_unlock();
182-
if (page)
182+
if (page && !radix_tree_exceptional_entry(page))
183183
continue;
184184

185185
page = page_cache_alloc_readahead(mapping);

0 commit comments

Comments
 (0)