Skip to content

Commit 443c2ac

Browse files
vianplakpm00
authored andcommitted
mm/page_alloc: remotely drain per-cpu lists
Some setups, notably NOHZ_FULL CPUs, are too busy to handle the per-cpu drain work queued by __drain_all_pages(). So introduce a new mechanism to remotely drain the per-cpu lists. It is made possible by remotely locking 'struct per_cpu_pages' new per-cpu spinlocks. A benefit of this new scheme is that drain operations are now migration safe. There was no observed performance degradation vs. the previous scheme. Both netperf and hackbench were run in parallel to triggering the __drain_all_pages(NULL, true) code path around ~100 times per second. The new scheme performs a bit better (~5%), although the important point here is there are no performance regressions vs. the previous mechanism. Per-cpu lists draining happens only in slow paths. Minchan Kim tested an earlier version and reported; My workload is not NOHZ CPUs but run apps under heavy memory pressure so they goes to direct reclaim and be stuck on drain_all_pages until work on workqueue run. unit: nanosecond max(dur) avg(dur) count(dur) 166713013 487511.77786438033 1283 From traces, system encountered the drain_all_pages 1283 times and worst case was 166ms and avg was 487us. The other problem was alloc_contig_range in CMA. The PCP draining takes several hundred millisecond sometimes though there is no memory pressure or a few of pages to be migrated out but CPU were fully booked. Your patch perfectly removed those wasted time. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Nicolas Saenz Julienne <[email protected]> Signed-off-by: Mel Gorman <[email protected]> Tested-by: Yu Zhao <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Marcelo Tosatti <[email protected]> Cc: Marek Szyprowski <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Minchan Kim <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 4b23a68 commit 443c2ac

File tree

1 file changed

+4
-54
lines changed

1 file changed

+4
-54
lines changed

mm/page_alloc.c

Lines changed: 4 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -165,13 +165,7 @@ DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */
165165
EXPORT_PER_CPU_SYMBOL(_numa_mem_);
166166
#endif
167167

168-
/* work_structs for global per-cpu drains */
169-
struct pcpu_drain {
170-
struct zone *zone;
171-
struct work_struct work;
172-
};
173168
static DEFINE_MUTEX(pcpu_drain_mutex);
174-
static DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
175169

176170
#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
177171
volatile unsigned long latent_entropy __latent_entropy;
@@ -3109,9 +3103,6 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
31093103
* Called from the vmstat counter updater to drain pagesets of this
31103104
* currently executing processor on remote nodes after they have
31113105
* expired.
3112-
*
3113-
* Note that this function must be called with the thread pinned to
3114-
* a single processor.
31153106
*/
31163107
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
31173108
{
@@ -3136,10 +3127,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
31363127

31373128
/*
31383129
* Drain pcplists of the indicated processor and zone.
3139-
*
3140-
* The processor must either be the current processor and the
3141-
* thread pinned to the current processor or a processor that
3142-
* is not online.
31433130
*/
31443131
static void drain_pages_zone(unsigned int cpu, struct zone *zone)
31453132
{
@@ -3158,10 +3145,6 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
31583145

31593146
/*
31603147
* Drain pcplists of all zones on the indicated processor.
3161-
*
3162-
* The processor must either be the current processor and the
3163-
* thread pinned to the current processor or a processor that
3164-
* is not online.
31653148
*/
31663149
static void drain_pages(unsigned int cpu)
31673150
{
@@ -3174,9 +3157,6 @@ static void drain_pages(unsigned int cpu)
31743157

31753158
/*
31763159
* Spill all of this CPU's per-cpu pages back into the buddy allocator.
3177-
*
3178-
* The CPU has to be pinned. When zone parameter is non-NULL, spill just
3179-
* the single zone's pages.
31803160
*/
31813161
void drain_local_pages(struct zone *zone)
31823162
{
@@ -3188,24 +3168,6 @@ void drain_local_pages(struct zone *zone)
31883168
drain_pages(cpu);
31893169
}
31903170

3191-
static void drain_local_pages_wq(struct work_struct *work)
3192-
{
3193-
struct pcpu_drain *drain;
3194-
3195-
drain = container_of(work, struct pcpu_drain, work);
3196-
3197-
/*
3198-
* drain_all_pages doesn't use proper cpu hotplug protection so
3199-
* we can race with cpu offline when the WQ can move this from
3200-
* a cpu pinned worker to an unbound one. We can operate on a different
3201-
* cpu which is alright but we also have to make sure to not move to
3202-
* a different one.
3203-
*/
3204-
migrate_disable();
3205-
drain_local_pages(drain->zone);
3206-
migrate_enable();
3207-
}
3208-
32093171
/*
32103172
* The implementation of drain_all_pages(), exposing an extra parameter to
32113173
* drain on all cpus.
@@ -3226,13 +3188,6 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
32263188
*/
32273189
static cpumask_t cpus_with_pcps;
32283190

3229-
/*
3230-
* Make sure nobody triggers this path before mm_percpu_wq is fully
3231-
* initialized.
3232-
*/
3233-
if (WARN_ON_ONCE(!mm_percpu_wq))
3234-
return;
3235-
32363191
/*
32373192
* Do not drain if one is already in progress unless it's specific to
32383193
* a zone. Such callers are primarily CMA and memory hotplug and need
@@ -3282,14 +3237,11 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
32823237
}
32833238

32843239
for_each_cpu(cpu, &cpus_with_pcps) {
3285-
struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
3286-
3287-
drain->zone = zone;
3288-
INIT_WORK(&drain->work, drain_local_pages_wq);
3289-
queue_work_on(cpu, mm_percpu_wq, &drain->work);
3240+
if (zone)
3241+
drain_pages_zone(cpu, zone);
3242+
else
3243+
drain_pages(cpu);
32903244
}
3291-
for_each_cpu(cpu, &cpus_with_pcps)
3292-
flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
32933245

32943246
mutex_unlock(&pcpu_drain_mutex);
32953247
}
@@ -3298,8 +3250,6 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
32983250
* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
32993251
*
33003252
* When zone parameter is non-NULL, spill just the single zone's pages.
3301-
*
3302-
* Note that this can be extremely slow as the draining happens in a workqueue.
33033253
*/
33043254
void drain_all_pages(struct zone *zone)
33053255
{

0 commit comments

Comments
 (0)