Skip to content

Commit fc6daaf

Browse files
aegltorvalds
authored andcommitted
mm/memblock: add extra "flags" to memblock to allow selection of memory based on attribute
Some high end Intel Xeon systems report uncorrectable memory errors as a recoverable machine check. Linux has included code for some time to process these and just signal the affected processes (or even recover completely if the error was in a read only page that can be replaced by reading from disk). But we have no recovery path for errors encountered during kernel code execution. Except for some very specific cases were are unlikely to ever be able to recover. Enter memory mirroring. Actually 3rd generation of memory mirroing. Gen1: All memory is mirrored Pro: No s/w enabling - h/w just gets good data from other side of the mirror Con: Halves effective memory capacity available to OS/applications Gen2: Partial memory mirror - just mirror memory begind some memory controllers Pro: Keep more of the capacity Con: Nightmare to enable. Have to choose between allocating from mirrored memory for safety vs. NUMA local memory for performance Gen3: Address range partial memory mirror - some mirror on each memory controller Pro: Can tune the amount of mirror and keep NUMA performance Con: I have to write memory management code to implement The current plan is just to use mirrored memory for kernel allocations. This has been broken into two phases: 1) This patch series - find the mirrored memory, use it for boot time allocations 2) Wade into mm/page_alloc.c and define a ZONE_MIRROR to pick up the unused mirrored memory from mm/memblock.c and only give it out to select kernel allocations (this is still being scoped because page_alloc.c is scary). This patch (of 3): Add extra "flags" to memblock to allow selection of memory based on attribute. No functional changes Signed-off-by: Tony Luck <[email protected]> Cc: Xishi Qiu <[email protected]> Cc: Hanjun Guo <[email protected]> Cc: Xiexiuqi <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Yinghai Lu <[email protected]> Cc: Naoya Horiguchi <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 6afdb85 commit fc6daaf

File tree

10 files changed

+83
-47
lines changed

10 files changed

+83
-47
lines changed

arch/s390/kernel/crash_dump.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,12 @@ static struct memblock_type oldmem_type = {
3333
};
3434

3535
#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \
36-
for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \
36+
for (i = 0, __next_mem_range(&i, nid, MEMBLOCK_NONE, \
37+
&memblock.physmem, \
3738
&oldmem_type, p_start, \
3839
p_end, p_nid); \
3940
i != (u64)ULLONG_MAX; \
40-
__next_mem_range(&i, nid, &memblock.physmem, \
41+
__next_mem_range(&i, nid, MEMBLOCK_NONE, &memblock.physmem,\
4142
&oldmem_type, \
4243
p_start, p_end, p_nid))
4344

arch/sparc/mm/init_64.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,7 +1966,8 @@ static phys_addr_t __init available_memory(void)
19661966
phys_addr_t pa_start, pa_end;
19671967
u64 i;
19681968

1969-
for_each_free_mem_range(i, NUMA_NO_NODE, &pa_start, &pa_end, NULL)
1969+
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
1970+
&pa_end, NULL)
19701971
available = available + (pa_end - pa_start);
19711972

19721973
return available;
@@ -1992,7 +1993,8 @@ static void __init reduce_memory(phys_addr_t limit_ram)
19921993
if (limit_ram >= avail_ram)
19931994
return;
19941995

1995-
for_each_free_mem_range(i, NUMA_NO_NODE, &pa_start, &pa_end, NULL) {
1996+
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
1997+
&pa_end, NULL) {
19961998
phys_addr_t region_size = pa_end - pa_start;
19971999
phys_addr_t clip_start = pa_start;
19982000

arch/x86/kernel/check.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ void __init setup_bios_corruption_check(void)
9191

9292
corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
9393

94-
for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) {
94+
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
95+
NULL) {
9596
start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
9697
PAGE_SIZE, corruption_check_size);
9798
end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),

arch/x86/kernel/e820.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1123,7 +1123,8 @@ void __init memblock_find_dma_reserve(void)
11231123
nr_pages += end_pfn - start_pfn;
11241124
}
11251125

1126-
for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) {
1126+
for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
1127+
NULL) {
11271128
start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
11281129
end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
11291130
if (start_pfn < end_pfn)

arch/x86/mm/init_32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ void __init add_highpages_with_active_regions(int nid,
433433
phys_addr_t start, end;
434434
u64 i;
435435

436-
for_each_free_mem_range(i, nid, &start, &end, NULL) {
436+
for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
437437
unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
438438
start_pfn, end_pfn);
439439
unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),

include/linux/memblock.h

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121
#define INIT_PHYSMEM_REGIONS 4
2222

2323
/* Definition of memblock flags. */
24-
#define MEMBLOCK_HOTPLUG 0x1 /* hotpluggable region */
24+
enum {
25+
MEMBLOCK_NONE = 0x0, /* No special request */
26+
MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
27+
};
2528

2629
struct memblock_region {
2730
phys_addr_t base;
@@ -61,7 +64,7 @@ extern bool movable_node_enabled;
6164

6265
phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
6366
phys_addr_t start, phys_addr_t end,
64-
int nid);
67+
int nid, ulong flags);
6568
phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
6669
phys_addr_t size, phys_addr_t align);
6770
phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
@@ -85,11 +88,13 @@ int memblock_remove_range(struct memblock_type *type,
8588
phys_addr_t base,
8689
phys_addr_t size);
8790

88-
void __next_mem_range(u64 *idx, int nid, struct memblock_type *type_a,
91+
void __next_mem_range(u64 *idx, int nid, ulong flags,
92+
struct memblock_type *type_a,
8993
struct memblock_type *type_b, phys_addr_t *out_start,
9094
phys_addr_t *out_end, int *out_nid);
9195

92-
void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
96+
void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
97+
struct memblock_type *type_a,
9398
struct memblock_type *type_b, phys_addr_t *out_start,
9499
phys_addr_t *out_end, int *out_nid);
95100

@@ -100,16 +105,17 @@ void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
100105
* @type_a: ptr to memblock_type to iterate
101106
* @type_b: ptr to memblock_type which excludes from the iteration
102107
* @nid: node selector, %NUMA_NO_NODE for all nodes
108+
* @flags: pick from blocks based on memory attributes
103109
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
104110
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
105111
* @p_nid: ptr to int for nid of the range, can be %NULL
106112
*/
107-
#define for_each_mem_range(i, type_a, type_b, nid, \
113+
#define for_each_mem_range(i, type_a, type_b, nid, flags, \
108114
p_start, p_end, p_nid) \
109-
for (i = 0, __next_mem_range(&i, nid, type_a, type_b, \
115+
for (i = 0, __next_mem_range(&i, nid, flags, type_a, type_b, \
110116
p_start, p_end, p_nid); \
111117
i != (u64)ULLONG_MAX; \
112-
__next_mem_range(&i, nid, type_a, type_b, \
118+
__next_mem_range(&i, nid, flags, type_a, type_b, \
113119
p_start, p_end, p_nid))
114120

115121
/**
@@ -119,17 +125,18 @@ void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
119125
* @type_a: ptr to memblock_type to iterate
120126
* @type_b: ptr to memblock_type which excludes from the iteration
121127
* @nid: node selector, %NUMA_NO_NODE for all nodes
128+
* @flags: pick from blocks based on memory attributes
122129
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
123130
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
124131
* @p_nid: ptr to int for nid of the range, can be %NULL
125132
*/
126-
#define for_each_mem_range_rev(i, type_a, type_b, nid, \
133+
#define for_each_mem_range_rev(i, type_a, type_b, nid, flags, \
127134
p_start, p_end, p_nid) \
128135
for (i = (u64)ULLONG_MAX, \
129-
__next_mem_range_rev(&i, nid, type_a, type_b, \
136+
__next_mem_range_rev(&i, nid, flags, type_a, type_b,\
130137
p_start, p_end, p_nid); \
131138
i != (u64)ULLONG_MAX; \
132-
__next_mem_range_rev(&i, nid, type_a, type_b, \
139+
__next_mem_range_rev(&i, nid, flags, type_a, type_b, \
133140
p_start, p_end, p_nid))
134141

135142
#ifdef CONFIG_MOVABLE_NODE
@@ -181,13 +188,14 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
181188
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
182189
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
183190
* @p_nid: ptr to int for nid of the range, can be %NULL
191+
* @flags: pick from blocks based on memory attributes
184192
*
185193
* Walks over free (memory && !reserved) areas of memblock. Available as
186194
* soon as memblock is initialized.
187195
*/
188-
#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \
196+
#define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid) \
189197
for_each_mem_range(i, &memblock.memory, &memblock.reserved, \
190-
nid, p_start, p_end, p_nid)
198+
nid, flags, p_start, p_end, p_nid)
191199

192200
/**
193201
* for_each_free_mem_range_reverse - rev-iterate through free memblock areas
@@ -196,13 +204,15 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
196204
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
197205
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
198206
* @p_nid: ptr to int for nid of the range, can be %NULL
207+
* @flags: pick from blocks based on memory attributes
199208
*
200209
* Walks over free (memory && !reserved) areas of memblock in reverse
201210
* order. Available as soon as memblock is initialized.
202211
*/
203-
#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \
212+
#define for_each_free_mem_range_reverse(i, nid, flags, p_start, p_end, \
213+
p_nid) \
204214
for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \
205-
nid, p_start, p_end, p_nid)
215+
nid, flags, p_start, p_end, p_nid)
206216

207217
static inline void memblock_set_region_flags(struct memblock_region *r,
208218
unsigned long flags)
@@ -273,7 +283,8 @@ static inline bool memblock_bottom_up(void) { return false; }
273283
#define MEMBLOCK_ALLOC_ACCESSIBLE 0
274284

275285
phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
276-
phys_addr_t start, phys_addr_t end);
286+
phys_addr_t start, phys_addr_t end,
287+
ulong flags);
277288
phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
278289
phys_addr_t max_addr);
279290
phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,

mm/cma.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,13 +316,15 @@ int __init cma_declare_contiguous(phys_addr_t base,
316316
*/
317317
if (base < highmem_start && limit > highmem_start) {
318318
addr = memblock_alloc_range(size, alignment,
319-
highmem_start, limit);
319+
highmem_start, limit,
320+
MEMBLOCK_NONE);
320321
limit = highmem_start;
321322
}
322323

323324
if (!addr) {
324325
addr = memblock_alloc_range(size, alignment, base,
325-
limit);
326+
limit,
327+
MEMBLOCK_NONE);
326328
if (!addr) {
327329
ret = -ENOMEM;
328330
goto err;

0 commit comments

Comments
 (0)