Skip to content

Commit 9092c71

Browse files
Josef Baciktorvalds
authored andcommitted
mm: use sc->priority for slab shrink targets
Previously we were using the ratio of the number of lru pages scanned to the number of eligible lru pages to determine the number of slab objects to scan. The problem with this is that these two things have nothing to do with each other, so in slab heavy work loads where there is little to no page cache we can end up with the pages scanned being a very low number. This means that we reclaim next to no slab pages and waste a lot of time reclaiming small amounts of space. Consider the following scenario, where we have the following values and the rest of the memory usage is in slab Active: 58840 kB Inactive: 46860 kB Every time we do a get_scan_count() we do this scan = size >> sc->priority where sc->priority starts at DEF_PRIORITY, which is 12. The first loop through reclaim would result in a scan target of 2 pages to 11715 total inactive pages, and 3 pages to 14710 total active pages. This is a really really small target for a system that is entirely slab pages. And this is super optimistic, this assumes we even get to scan these pages. We don't increment sc->nr_scanned unless we 1) isolate the page, which assumes it's not in use, and 2) can lock the page. Under pressure these numbers could probably go down, I'm sure there's some random pages from daemons that aren't actually in use, so the targets get even smaller. Instead use sc->priority in the same way we use it to determine scan amounts for the lru's. This generally equates to pages. Consider the following slab_pages = (nr_objects * object_size) / PAGE_SIZE What we would like to do is scan = slab_pages >> sc->priority but we don't know the number of slab pages each shrinker controls, only the objects. However say that theoretically we knew how many pages a shrinker controlled, we'd still have to convert this to objects, which would look like the following scan = shrinker_pages >> sc->priority scan_objects = (PAGE_SIZE / object_size) * scan or written another way scan_objects = (shrinker_pages >> sc->priority) * (PAGE_SIZE / object_size) which can thus be written scan_objects = ((shrinker_pages * PAGE_SIZE) / object_size) >> sc->priority which is just scan_objects = nr_objects >> sc->priority We don't need to know exactly how many pages each shrinker represents, it's objects are all the information we need. Making this change allows us to place an appropriate amount of pressure on the shrinker pools for their relative size. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Josef Bacik <[email protected]> Acked-by: Johannes Weiner <[email protected]> Acked-by: Dave Chinner <[email protected]> Acked-by: Andrey Ryabinin <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Pekka Enberg <[email protected]> Cc: David Rientjes <[email protected]> Cc: Joonsoo Kim <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent fcb2b0c commit 9092c71

File tree

2 files changed

+23
-47
lines changed

2 files changed

+23
-47
lines changed

include/trace/events/vmscan.h

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -192,24 +192,23 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_re
192192

193193
TRACE_EVENT(mm_shrink_slab_start,
194194
TP_PROTO(struct shrinker *shr, struct shrink_control *sc,
195-
long nr_objects_to_shrink, unsigned long pgs_scanned,
196-
unsigned long lru_pgs, unsigned long cache_items,
197-
unsigned long long delta, unsigned long total_scan),
195+
long nr_objects_to_shrink, unsigned long cache_items,
196+
unsigned long long delta, unsigned long total_scan,
197+
int priority),
198198

199-
TP_ARGS(shr, sc, nr_objects_to_shrink, pgs_scanned, lru_pgs,
200-
cache_items, delta, total_scan),
199+
TP_ARGS(shr, sc, nr_objects_to_shrink, cache_items, delta, total_scan,
200+
priority),
201201

202202
TP_STRUCT__entry(
203203
__field(struct shrinker *, shr)
204204
__field(void *, shrink)
205205
__field(int, nid)
206206
__field(long, nr_objects_to_shrink)
207207
__field(gfp_t, gfp_flags)
208-
__field(unsigned long, pgs_scanned)
209-
__field(unsigned long, lru_pgs)
210208
__field(unsigned long, cache_items)
211209
__field(unsigned long long, delta)
212210
__field(unsigned long, total_scan)
211+
__field(int, priority)
213212
),
214213

215214
TP_fast_assign(
@@ -218,24 +217,22 @@ TRACE_EVENT(mm_shrink_slab_start,
218217
__entry->nid = sc->nid;
219218
__entry->nr_objects_to_shrink = nr_objects_to_shrink;
220219
__entry->gfp_flags = sc->gfp_mask;
221-
__entry->pgs_scanned = pgs_scanned;
222-
__entry->lru_pgs = lru_pgs;
223220
__entry->cache_items = cache_items;
224221
__entry->delta = delta;
225222
__entry->total_scan = total_scan;
223+
__entry->priority = priority;
226224
),
227225

228-
TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld",
226+
TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d",
229227
__entry->shrink,
230228
__entry->shr,
231229
__entry->nid,
232230
__entry->nr_objects_to_shrink,
233231
show_gfp_flags(__entry->gfp_flags),
234-
__entry->pgs_scanned,
235-
__entry->lru_pgs,
236232
__entry->cache_items,
237233
__entry->delta,
238-
__entry->total_scan)
234+
__entry->total_scan,
235+
__entry->priority)
239236
);
240237

241238
TRACE_EVENT(mm_shrink_slab_end,

mm/vmscan.c

Lines changed: 13 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -310,9 +310,7 @@ EXPORT_SYMBOL(unregister_shrinker);
310310
#define SHRINK_BATCH 128
311311

312312
static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
313-
struct shrinker *shrinker,
314-
unsigned long nr_scanned,
315-
unsigned long nr_eligible)
313+
struct shrinker *shrinker, int priority)
316314
{
317315
unsigned long freed = 0;
318316
unsigned long long delta;
@@ -337,9 +335,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
337335
nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
338336

339337
total_scan = nr;
340-
delta = (4 * nr_scanned) / shrinker->seeks;
341-
delta *= freeable;
342-
do_div(delta, nr_eligible + 1);
338+
delta = freeable >> priority;
339+
delta *= 4;
340+
do_div(delta, shrinker->seeks);
343341
total_scan += delta;
344342
if (total_scan < 0) {
345343
pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
@@ -373,8 +371,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
373371
total_scan = freeable * 2;
374372

375373
trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
376-
nr_scanned, nr_eligible,
377-
freeable, delta, total_scan);
374+
freeable, delta, total_scan, priority);
378375

379376
/*
380377
* Normally, we should not scan less than batch_size objects in one
@@ -434,8 +431,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
434431
* @gfp_mask: allocation context
435432
* @nid: node whose slab caches to target
436433
* @memcg: memory cgroup whose slab caches to target
437-
* @nr_scanned: pressure numerator
438-
* @nr_eligible: pressure denominator
434+
* @priority: the reclaim priority
439435
*
440436
* Call the shrink functions to age shrinkable caches.
441437
*
@@ -447,30 +443,21 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
447443
* objects from the memory cgroup specified. Otherwise, only unaware
448444
* shrinkers are called.
449445
*
450-
* @nr_scanned and @nr_eligible form a ratio that indicate how much of
451-
* the available objects should be scanned. Page reclaim for example
452-
* passes the number of pages scanned and the number of pages on the
453-
* LRU lists that it considered on @nid, plus a bias in @nr_scanned
454-
* when it encountered mapped pages. The ratio is further biased by
455-
* the ->seeks setting of the shrink function, which indicates the
456-
* cost to recreate an object relative to that of an LRU page.
446+
* @priority is sc->priority, we take the number of objects and >> by priority
447+
* in order to get the scan target.
457448
*
458449
* Returns the number of reclaimed slab objects.
459450
*/
460451
static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
461452
struct mem_cgroup *memcg,
462-
unsigned long nr_scanned,
463-
unsigned long nr_eligible)
453+
int priority)
464454
{
465455
struct shrinker *shrinker;
466456
unsigned long freed = 0;
467457

468458
if (memcg && (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)))
469459
return 0;
470460

471-
if (nr_scanned == 0)
472-
nr_scanned = SWAP_CLUSTER_MAX;
473-
474461
if (!down_read_trylock(&shrinker_rwsem)) {
475462
/*
476463
* If we would return 0, our callers would understand that we
@@ -501,7 +488,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
501488
if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
502489
sc.nid = 0;
503490

504-
freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible);
491+
freed += do_shrink_slab(&sc, shrinker, priority);
505492
}
506493

507494
up_read(&shrinker_rwsem);
@@ -519,8 +506,7 @@ void drop_slab_node(int nid)
519506

520507
freed = 0;
521508
do {
522-
freed += shrink_slab(GFP_KERNEL, nid, memcg,
523-
1000, 1000);
509+
freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
524510
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
525511
} while (freed > 10);
526512
}
@@ -2615,14 +2601,12 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
26152601

26162602
reclaimed = sc->nr_reclaimed;
26172603
scanned = sc->nr_scanned;
2618-
26192604
shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
26202605
node_lru_pages += lru_pages;
26212606

26222607
if (memcg)
26232608
shrink_slab(sc->gfp_mask, pgdat->node_id,
2624-
memcg, sc->nr_scanned - scanned,
2625-
lru_pages);
2609+
memcg, sc->priority);
26262610

26272611
/* Record the group's reclaim efficiency */
26282612
vmpressure(sc->gfp_mask, memcg, false,
@@ -2646,14 +2630,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
26462630
}
26472631
} while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
26482632

2649-
/*
2650-
* Shrink the slab caches in the same proportion that
2651-
* the eligible LRU pages were scanned.
2652-
*/
26532633
if (global_reclaim(sc))
26542634
shrink_slab(sc->gfp_mask, pgdat->node_id, NULL,
2655-
sc->nr_scanned - nr_scanned,
2656-
node_lru_pages);
2635+
sc->priority);
26572636

26582637
if (reclaim_state) {
26592638
sc->nr_reclaimed += reclaim_state->reclaimed_slab;

0 commit comments

Comments
 (0)