Skip to content

Commit 99fbb6b

Browse files
Matthew Wilcox (Oracle)akpm00
authored andcommitted
mm: make folios_put() the basis of release_pages()
Patch series "Rearrange batched folio freeing", v3. Other than the obvious "remove calls to compound_head" changes, the fundamental belief here is that iterating a linked list is much slower than iterating an array (5-15x slower in my testing). There's also an associated belief that since we iterate the batch of folios three times, we do better when the array is small (ie 15 entries) than we do with a batch that is hundreds of entries long, which only gives us the opportunity for the first pages to fall out of cache by the time we get to the end. It is possible we should increase the size of folio_batch. Hopefully the bots let us know if this introduces any performance regressions. This patch (of 3): By making release_pages() call folios_put(), we can get rid of the calls to compound_head() for the callers that already know they have folios. We can also get rid of the lock_batch tracking as we know the size of the batch is limited by folio_batch. This does reduce the maximum number of pages for which the lruvec lock is held, from SWAP_CLUSTER_MAX (32) to PAGEVEC_SIZE (15). I do not expect this to make a significant difference, but if it does, we can increase PAGEVEC_SIZE to 31. Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Matthew Wilcox (Oracle) <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Ryan Roberts <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 5dad604 commit 99fbb6b

File tree

3 files changed

+70
-49
lines changed

3 files changed

+70
-49
lines changed

include/linux/mm.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct anon_vma;
3636
struct anon_vma_chain;
3737
struct user_struct;
3838
struct pt_regs;
39+
struct folio_batch;
3940

4041
extern int sysctl_page_lock_unfairness;
4142

@@ -1512,6 +1513,8 @@ static inline void folio_put_refs(struct folio *folio, int refs)
15121513
__folio_put(folio);
15131514
}
15141515

1516+
void folios_put_refs(struct folio_batch *folios, unsigned int *refs);
1517+
15151518
/*
15161519
* union release_pages_arg - an array of pages or folios
15171520
*
@@ -1534,18 +1537,19 @@ void release_pages(release_pages_arg, int nr);
15341537
/**
15351538
* folios_put - Decrement the reference count on an array of folios.
15361539
* @folios: The folios.
1537-
* @nr: How many folios there are.
15381540
*
1539-
* Like folio_put(), but for an array of folios. This is more efficient
1540-
* than writing the loop yourself as it will optimise the locks which
1541-
* need to be taken if the folios are freed.
1541+
* Like folio_put(), but for a batch of folios. This is more efficient
1542+
* than writing the loop yourself as it will optimise the locks which need
1543+
* to be taken if the folios are freed. The folios batch is returned
1544+
* empty and ready to be reused for another batch; there is no need to
1545+
* reinitialise it.
15421546
*
15431547
* Context: May be called in process or interrupt context, but not in NMI
15441548
* context. May be called while holding a spinlock.
15451549
*/
1546-
static inline void folios_put(struct folio **folios, unsigned int nr)
1550+
static inline void folios_put(struct folio_batch *folios)
15471551
{
1548-
release_pages(folios, nr);
1552+
folios_put_refs(folios, NULL);
15491553
}
15501554

15511555
static inline void put_page(struct page *page)

mm/mlock.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,7 @@ static void mlock_folio_batch(struct folio_batch *fbatch)
206206

207207
if (lruvec)
208208
unlock_page_lruvec_irq(lruvec);
209-
folios_put(fbatch->folios, folio_batch_count(fbatch));
210-
folio_batch_reinit(fbatch);
209+
folios_put(fbatch);
211210
}
212211

213212
void mlock_drain_local(void)

mm/swap.c

Lines changed: 59 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ static void __page_cache_release(struct folio *folio)
8989
__folio_clear_lru_flags(folio);
9090
unlock_page_lruvec_irqrestore(lruvec, flags);
9191
}
92-
/* See comment on folio_test_mlocked in release_pages() */
92+
/* See comment on folio_test_mlocked in folios_put() */
9393
if (unlikely(folio_test_mlocked(folio))) {
9494
long nr_pages = folio_nr_pages(folio);
9595

@@ -175,7 +175,7 @@ static void lru_add_fn(struct lruvec *lruvec, struct folio *folio)
175175
* while the LRU lock is held.
176176
*
177177
* (That is not true of __page_cache_release(), and not necessarily
178-
* true of release_pages(): but those only clear the mlocked flag after
178+
* true of folios_put(): but those only clear the mlocked flag after
179179
* folio_put_testzero() has excluded any other users of the folio.)
180180
*/
181181
if (folio_evictable(folio)) {
@@ -221,8 +221,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
221221

222222
if (lruvec)
223223
unlock_page_lruvec_irqrestore(lruvec, flags);
224-
folios_put(fbatch->folios, folio_batch_count(fbatch));
225-
folio_batch_reinit(fbatch);
224+
folios_put(fbatch);
226225
}
227226

228227
static void folio_batch_add_and_move(struct folio_batch *fbatch,
@@ -946,47 +945,30 @@ void lru_cache_disable(void)
946945
}
947946

948947
/**
949-
* release_pages - batched put_page()
950-
* @arg: array of pages to release
951-
* @nr: number of pages
948+
* folios_put_refs - Reduce the reference count on a batch of folios.
949+
* @folios: The folios.
950+
* @refs: The number of refs to subtract from each folio.
952951
*
953-
* Decrement the reference count on all the pages in @arg. If it
954-
* fell to zero, remove the page from the LRU and free it.
952+
* Like folio_put(), but for a batch of folios. This is more efficient
953+
* than writing the loop yourself as it will optimise the locks which need
954+
* to be taken if the folios are freed. The folios batch is returned
955+
* empty and ready to be reused for another batch; there is no need
956+
* to reinitialise it. If @refs is NULL, we subtract one from each
957+
* folio refcount.
955958
*
956-
* Note that the argument can be an array of pages, encoded pages,
957-
* or folio pointers. We ignore any encoded bits, and turn any of
958-
* them into just a folio that gets free'd.
959+
* Context: May be called in process or interrupt context, but not in NMI
960+
* context. May be called while holding a spinlock.
959961
*/
960-
void release_pages(release_pages_arg arg, int nr)
962+
void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
961963
{
962964
int i;
963-
struct encoded_page **encoded = arg.encoded_pages;
964965
LIST_HEAD(pages_to_free);
965966
struct lruvec *lruvec = NULL;
966967
unsigned long flags = 0;
967-
unsigned int lock_batch;
968968

969-
for (i = 0; i < nr; i++) {
970-
unsigned int nr_refs = 1;
971-
struct folio *folio;
972-
973-
/* Turn any of the argument types into a folio */
974-
folio = page_folio(encoded_page_ptr(encoded[i]));
975-
976-
/* Is our next entry actually "nr_pages" -> "nr_refs" ? */
977-
if (unlikely(encoded_page_flags(encoded[i]) &
978-
ENCODED_PAGE_BIT_NR_PAGES_NEXT))
979-
nr_refs = encoded_nr_pages(encoded[++i]);
980-
981-
/*
982-
* Make sure the IRQ-safe lock-holding time does not get
983-
* excessive with a continuous string of pages from the
984-
* same lruvec. The lock is held only if lruvec != NULL.
985-
*/
986-
if (lruvec && ++lock_batch == SWAP_CLUSTER_MAX) {
987-
unlock_page_lruvec_irqrestore(lruvec, flags);
988-
lruvec = NULL;
989-
}
969+
for (i = 0; i < folios->nr; i++) {
970+
struct folio *folio = folios->folios[i];
971+
unsigned int nr_refs = refs ? refs[i] : 1;
990972

991973
if (is_huge_zero_page(&folio->page))
992974
continue;
@@ -1016,13 +998,8 @@ void release_pages(release_pages_arg arg, int nr)
1016998
}
1017999

10181000
if (folio_test_lru(folio)) {
1019-
struct lruvec *prev_lruvec = lruvec;
1020-
10211001
lruvec = folio_lruvec_relock_irqsave(folio, lruvec,
10221002
&flags);
1023-
if (prev_lruvec != lruvec)
1024-
lock_batch = 0;
1025-
10261003
lruvec_del_folio(lruvec, folio);
10271004
__folio_clear_lru_flags(folio);
10281005
}
@@ -1046,6 +1023,47 @@ void release_pages(release_pages_arg arg, int nr)
10461023

10471024
mem_cgroup_uncharge_list(&pages_to_free);
10481025
free_unref_page_list(&pages_to_free);
1026+
folio_batch_reinit(folios);
1027+
}
1028+
EXPORT_SYMBOL(folios_put_refs);
1029+
1030+
/**
1031+
* release_pages - batched put_page()
1032+
* @arg: array of pages to release
1033+
* @nr: number of pages
1034+
*
1035+
* Decrement the reference count on all the pages in @arg. If it
1036+
* fell to zero, remove the page from the LRU and free it.
1037+
*
1038+
* Note that the argument can be an array of pages, encoded pages,
1039+
* or folio pointers. We ignore any encoded bits, and turn any of
1040+
* them into just a folio that gets free'd.
1041+
*/
1042+
void release_pages(release_pages_arg arg, int nr)
1043+
{
1044+
struct folio_batch fbatch;
1045+
int refs[PAGEVEC_SIZE];
1046+
struct encoded_page **encoded = arg.encoded_pages;
1047+
int i;
1048+
1049+
folio_batch_init(&fbatch);
1050+
for (i = 0; i < nr; i++) {
1051+
/* Turn any of the argument types into a folio */
1052+
struct folio *folio = page_folio(encoded_page_ptr(encoded[i]));
1053+
1054+
/* Is our next entry actually "nr_pages" -> "nr_refs" ? */
1055+
refs[fbatch.nr] = 1;
1056+
if (unlikely(encoded_page_flags(encoded[i]) &
1057+
ENCODED_PAGE_BIT_NR_PAGES_NEXT))
1058+
refs[fbatch.nr] = encoded_nr_pages(encoded[++i]);
1059+
1060+
if (folio_batch_add(&fbatch, folio) > 0)
1061+
continue;
1062+
folios_put_refs(&fbatch, refs);
1063+
}
1064+
1065+
if (fbatch.nr)
1066+
folios_put_refs(&fbatch, refs);
10491067
}
10501068
EXPORT_SYMBOL(release_pages);
10511069

0 commit comments

Comments
 (0)