Skip to content

Commit f19298b

Browse files
gormanmtorvalds
authored andcommitted
mm/vmstat: convert NUMA statistics to basic NUMA counters
NUMA statistics are maintained on the zone level for hits, misses, foreign etc but nothing relies on them being perfectly accurate for functional correctness. The counters are used by userspace to get a general overview of a workloads NUMA behaviour but the page allocator incurs a high cost to maintain perfect accuracy similar to what is required for a vmstat like NR_FREE_PAGES. There even is a sysctl vm.numa_stat to allow userspace to turn off the collection of NUMA statistics like NUMA_HIT. This patch converts NUMA_HIT and friends to be NUMA events with similar accuracy to VM events. There is a possibility that slight errors will be introduced but the overall trend as seen by userspace will be similar. The counters are no longer updated from vmstat_refresh context as it is unnecessary overhead for counters that may never be read by userspace. Note that counters could be maintained at the node level to save space but it would have a user-visible impact due to /proc/zoneinfo. [[email protected]: Fix misplaced closing brace for !CONFIG_NUMA] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Mel Gorman <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Acked-by: Peter Zijlstra (Intel) <[email protected]> Cc: Chuck Lever <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jesper Dangaard Brouer <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Sebastian Andrzej Siewior <[email protected]> Cc: Thomas Gleixner <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent dbbee9d commit f19298b

File tree

6 files changed

+113
-148
lines changed

6 files changed

+113
-148
lines changed

drivers/base/node.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -482,19 +482,20 @@ static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL);
482482
static ssize_t node_read_numastat(struct device *dev,
483483
struct device_attribute *attr, char *buf)
484484
{
485+
fold_vm_numa_events();
485486
return sysfs_emit(buf,
486487
"numa_hit %lu\n"
487488
"numa_miss %lu\n"
488489
"numa_foreign %lu\n"
489490
"interleave_hit %lu\n"
490491
"local_node %lu\n"
491492
"other_node %lu\n",
492-
sum_zone_numa_state(dev->id, NUMA_HIT),
493-
sum_zone_numa_state(dev->id, NUMA_MISS),
494-
sum_zone_numa_state(dev->id, NUMA_FOREIGN),
495-
sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
496-
sum_zone_numa_state(dev->id, NUMA_LOCAL),
497-
sum_zone_numa_state(dev->id, NUMA_OTHER));
493+
sum_zone_numa_event_state(dev->id, NUMA_HIT),
494+
sum_zone_numa_event_state(dev->id, NUMA_MISS),
495+
sum_zone_numa_event_state(dev->id, NUMA_FOREIGN),
496+
sum_zone_numa_event_state(dev->id, NUMA_INTERLEAVE_HIT),
497+
sum_zone_numa_event_state(dev->id, NUMA_LOCAL),
498+
sum_zone_numa_event_state(dev->id, NUMA_OTHER));
498499
}
499500
static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL);
500501

@@ -512,10 +513,11 @@ static ssize_t node_read_vmstat(struct device *dev,
512513
sum_zone_node_page_state(nid, i));
513514

514515
#ifdef CONFIG_NUMA
515-
for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
516+
fold_vm_numa_events();
517+
for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
516518
len += sysfs_emit_at(buf, len, "%s %lu\n",
517519
numa_stat_name(i),
518-
sum_zone_numa_state(nid, i));
520+
sum_zone_numa_event_state(nid, i));
519521

520522
#endif
521523
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {

include/linux/mmzone.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,10 @@ enum numa_stat_item {
135135
NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
136136
NUMA_LOCAL, /* allocation from local node */
137137
NUMA_OTHER, /* allocation from other node */
138-
NR_VM_NUMA_STAT_ITEMS
138+
NR_VM_NUMA_EVENT_ITEMS
139139
};
140140
#else
141-
#define NR_VM_NUMA_STAT_ITEMS 0
141+
#define NR_VM_NUMA_EVENT_ITEMS 0
142142
#endif
143143

144144
enum zone_stat_item {
@@ -357,7 +357,12 @@ struct per_cpu_zonestat {
357357
s8 stat_threshold;
358358
#endif
359359
#ifdef CONFIG_NUMA
360-
u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
360+
/*
361+
* Low priority inaccurate counters that are only folded
362+
* on demand. Use a large type to avoid the overhead of
363+
* folding during refresh_cpu_vm_stats.
364+
*/
365+
unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
361366
#endif
362367
};
363368

@@ -623,7 +628,7 @@ struct zone {
623628
ZONE_PADDING(_pad3_)
624629
/* Zone statistics */
625630
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
626-
atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
631+
atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
627632
} ____cacheline_internodealigned_in_smp;
628633

629634
enum pgdat_flags {

include/linux/vmstat.h

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -138,34 +138,27 @@ static inline void vm_events_fold_cpu(int cpu)
138138
* Zone and node-based page accounting with per cpu differentials.
139139
*/
140140
extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
141-
extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
142141
extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
142+
extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
143143

144144
#ifdef CONFIG_NUMA
145-
static inline void zone_numa_state_add(long x, struct zone *zone,
146-
enum numa_stat_item item)
145+
static inline void zone_numa_event_add(long x, struct zone *zone,
146+
enum numa_stat_item item)
147147
{
148-
atomic_long_add(x, &zone->vm_numa_stat[item]);
149-
atomic_long_add(x, &vm_numa_stat[item]);
148+
atomic_long_add(x, &zone->vm_numa_event[item]);
149+
atomic_long_add(x, &vm_numa_event[item]);
150150
}
151151

152-
static inline unsigned long global_numa_state(enum numa_stat_item item)
152+
static inline unsigned long zone_numa_event_state(struct zone *zone,
153+
enum numa_stat_item item)
153154
{
154-
long x = atomic_long_read(&vm_numa_stat[item]);
155-
156-
return x;
155+
return atomic_long_read(&zone->vm_numa_event[item]);
157156
}
158157

159-
static inline unsigned long zone_numa_state_snapshot(struct zone *zone,
160-
enum numa_stat_item item)
158+
static inline unsigned long
159+
global_numa_event_state(enum numa_stat_item item)
161160
{
162-
long x = atomic_long_read(&zone->vm_numa_stat[item]);
163-
int cpu;
164-
165-
for_each_online_cpu(cpu)
166-
x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_stat_diff[item];
167-
168-
return x;
161+
return atomic_long_read(&vm_numa_event[item]);
169162
}
170163
#endif /* CONFIG_NUMA */
171164

@@ -245,18 +238,22 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
245238
}
246239

247240
#ifdef CONFIG_NUMA
248-
extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item);
241+
extern void __count_numa_event(struct zone *zone, enum numa_stat_item item);
249242
extern unsigned long sum_zone_node_page_state(int node,
250243
enum zone_stat_item item);
251-
extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item);
244+
extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
252245
extern unsigned long node_page_state(struct pglist_data *pgdat,
253246
enum node_stat_item item);
254247
extern unsigned long node_page_state_pages(struct pglist_data *pgdat,
255248
enum node_stat_item item);
249+
extern void fold_vm_numa_events(void);
256250
#else
257251
#define sum_zone_node_page_state(node, item) global_zone_page_state(item)
258252
#define node_page_state(node, item) global_node_page_state(item)
259253
#define node_page_state_pages(node, item) global_node_page_state_pages(item)
254+
static inline void fold_vm_numa_events(void)
255+
{
256+
}
260257
#endif /* CONFIG_NUMA */
261258

262259
#ifdef CONFIG_SMP
@@ -428,7 +425,7 @@ static inline const char *numa_stat_name(enum numa_stat_item item)
428425
static inline const char *node_stat_name(enum node_stat_item item)
429426
{
430427
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
431-
NR_VM_NUMA_STAT_ITEMS +
428+
NR_VM_NUMA_EVENT_ITEMS +
432429
item];
433430
}
434431

@@ -440,7 +437,7 @@ static inline const char *lru_list_name(enum lru_list lru)
440437
static inline const char *writeback_stat_name(enum writeback_stat_item item)
441438
{
442439
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
443-
NR_VM_NUMA_STAT_ITEMS +
440+
NR_VM_NUMA_EVENT_ITEMS +
444441
NR_VM_NODE_STAT_ITEMS +
445442
item];
446443
}
@@ -449,7 +446,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item)
449446
static inline const char *vm_event_name(enum vm_event_item item)
450447
{
451448
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
452-
NR_VM_NUMA_STAT_ITEMS +
449+
NR_VM_NUMA_EVENT_ITEMS +
453450
NR_VM_NODE_STAT_ITEMS +
454451
NR_VM_WRITEBACK_STAT_ITEMS +
455452
item];

mm/mempolicy.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2150,7 +2150,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
21502150
return page;
21512151
if (page && page_to_nid(page) == nid) {
21522152
preempt_disable();
2153-
__inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
2153+
__count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT);
21542154
preempt_enable();
21552155
}
21562156
return page;

mm/page_alloc.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3480,12 +3480,12 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
34803480
local_stat = NUMA_OTHER;
34813481

34823482
if (zone_to_nid(z) == zone_to_nid(preferred_zone))
3483-
__inc_numa_state(z, NUMA_HIT);
3483+
__count_numa_event(z, NUMA_HIT);
34843484
else {
3485-
__inc_numa_state(z, NUMA_MISS);
3486-
__inc_numa_state(preferred_zone, NUMA_FOREIGN);
3485+
__count_numa_event(z, NUMA_MISS);
3486+
__count_numa_event(preferred_zone, NUMA_FOREIGN);
34873487
}
3488-
__inc_numa_state(z, local_stat);
3488+
__count_numa_event(z, local_stat);
34893489
#endif
34903490
}
34913491

@@ -6785,8 +6785,8 @@ void __init setup_per_cpu_pageset(void)
67856785
*/
67866786
for_each_possible_cpu(cpu) {
67876787
struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu);
6788-
memset(pzstats->vm_numa_stat_diff, 0,
6789-
sizeof(pzstats->vm_numa_stat_diff));
6788+
memset(pzstats->vm_numa_event, 0,
6789+
sizeof(pzstats->vm_numa_event));
67906790
}
67916791
#endif
67926792

0 commit comments

Comments
 (0)