Skip to content

Commit eb39c88

Browse files
maheshsalozbenh
authored andcommitted
fadump: Reserve the memory for firmware assisted dump.
Reserve the memory during early boot to preserve CPU state data, HPTE region and RMA (real mode area) region data in case of kernel crash. At the time of crash, powerpc firmware will store CPU state data, HPTE region data and move RMA region data to the reserved memory area. If the firmware-assisted dump fails to reserve the memory, then fallback to existing kexec-based kdump. Most of the code implementation to reserve memory has been adapted from phyp assisted dump implementation written by Linas Vepstas and Manish Ahuja This patch also introduces a config option CONFIG_FA_DUMP for firmware assisted dump feature on Powerpc (ppc64) architecture. Signed-off-by: Mahesh Salgaonkar <[email protected]> Signed-off-by: Benjamin Herrenschmidt <[email protected]>
1 parent 8e0aa6d commit eb39c88

File tree

5 files changed

+345
-1
lines changed

5 files changed

+345
-1
lines changed

arch/powerpc/Kconfig

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,19 @@ config PHYP_DUMP
386386

387387
If unsure, say "N"
388388

389+
config FA_DUMP
390+
bool "Firmware-assisted dump"
391+
depends on PPC64 && PPC_RTAS && CRASH_DUMP
392+
help
393+
A robust mechanism to get reliable kernel crash dump with
394+
assistance from firmware. This approach does not use kexec,
395+
instead firmware assists in booting the kdump kernel
396+
while preserving memory contents. Firmware-assisted dump
397+
is meant to be a kdump replacement offering robustness and
398+
speed not possible without system firmware assistance.
399+
400+
If unsure, say "N"
401+
389402
config IRQ_ALL_CPUS
390403
bool "Distribute interrupts on all CPUs by default"
391404
depends on SMP && !MV64360

arch/powerpc/include/asm/fadump.h

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Firmware Assisted dump header file.
3+
*
4+
* This program is free software; you can redistribute it and/or modify
5+
* it under the terms of the GNU General Public License as published by
6+
* the Free Software Foundation; either version 2 of the License, or
7+
* (at your option) any later version.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
* GNU General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU General Public License
15+
* along with this program; if not, write to the Free Software
16+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17+
*
18+
* Copyright 2011 IBM Corporation
19+
* Author: Mahesh Salgaonkar <[email protected]>
20+
*/
21+
22+
#ifndef __PPC64_FA_DUMP_H__
23+
#define __PPC64_FA_DUMP_H__
24+
25+
#ifdef CONFIG_FA_DUMP
26+
27+
/*
28+
* The RMA region will be saved for later dumping when kernel crashes.
29+
* RMA is Real Mode Area, the first block of logical memory address owned
30+
* by logical partition, containing the storage that may be accessed with
31+
* translate off.
32+
*/
33+
#define RMA_START 0x0
34+
#define RMA_END (ppc64_rma_size)
35+
36+
/*
37+
* On some Power systems where RMO is 128MB, it still requires minimum of
38+
* 256MB for kernel to boot successfully. When kdump infrastructure is
39+
* configured to save vmcore over network, we run into OOM issue while
40+
* loading modules related to network setup. Hence we need aditional 64M
41+
* of memory to avoid OOM issue.
42+
*/
43+
#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
44+
+ (0x1UL << 26))
45+
46+
/* Firmware provided dump sections */
47+
#define FADUMP_CPU_STATE_DATA 0x0001
48+
#define FADUMP_HPTE_REGION 0x0002
49+
#define FADUMP_REAL_MODE_REGION 0x0011
50+
51+
struct fw_dump {
52+
unsigned long cpu_state_data_size;
53+
unsigned long hpte_region_size;
54+
unsigned long boot_memory_size;
55+
unsigned long reserve_dump_area_start;
56+
unsigned long reserve_dump_area_size;
57+
/* cmd line option during boot */
58+
unsigned long reserve_bootvar;
59+
60+
int ibm_configure_kernel_dump;
61+
62+
unsigned long fadump_enabled:1;
63+
unsigned long fadump_supported:1;
64+
unsigned long dump_active:1;
65+
};
66+
67+
extern int early_init_dt_scan_fw_dump(unsigned long node,
68+
const char *uname, int depth, void *data);
69+
extern int fadump_reserve_mem(void);
70+
#endif
71+
#endif

arch/powerpc/kernel/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ obj-$(CONFIG_IBMVIO) += vio.o
6060
obj-$(CONFIG_IBMEBUS) += ibmebus.o
6161
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
6262
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
63+
obj-$(CONFIG_FA_DUMP) += fadump.o
6364
ifeq ($(CONFIG_PPC32),y)
6465
obj-$(CONFIG_E500) += idle_e500.o
6566
endif

arch/powerpc/kernel/fadump.c

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
/*
2+
* Firmware Assisted dump: A robust mechanism to get reliable kernel crash
3+
* dump with assistance from firmware. This approach does not use kexec,
4+
* instead firmware assists in booting the kdump kernel while preserving
5+
* memory contents. The most of the code implementation has been adapted
6+
* from phyp assisted dump implementation written by Linas Vepstas and
7+
* Manish Ahuja
8+
*
9+
* This program is free software; you can redistribute it and/or modify
10+
* it under the terms of the GNU General Public License as published by
11+
* the Free Software Foundation; either version 2 of the License, or
12+
* (at your option) any later version.
13+
*
14+
* This program is distributed in the hope that it will be useful,
15+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+
* GNU General Public License for more details.
18+
*
19+
* You should have received a copy of the GNU General Public License
20+
* along with this program; if not, write to the Free Software
21+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22+
*
23+
* Copyright 2011 IBM Corporation
24+
* Author: Mahesh Salgaonkar <[email protected]>
25+
*/
26+
27+
#undef DEBUG
28+
#define pr_fmt(fmt) "fadump: " fmt
29+
30+
#include <linux/string.h>
31+
#include <linux/memblock.h>
32+
33+
#include <asm/page.h>
34+
#include <asm/prom.h>
35+
#include <asm/rtas.h>
36+
#include <asm/fadump.h>
37+
38+
static struct fw_dump fw_dump;
39+
40+
/* Scan the Firmware Assisted dump configuration details. */
41+
int __init early_init_dt_scan_fw_dump(unsigned long node,
42+
const char *uname, int depth, void *data)
43+
{
44+
__be32 *sections;
45+
int i, num_sections;
46+
unsigned long size;
47+
const int *token;
48+
49+
if (depth != 1 || strcmp(uname, "rtas") != 0)
50+
return 0;
51+
52+
/*
53+
* Check if Firmware Assisted dump is supported. if yes, check
54+
* if dump has been initiated on last reboot.
55+
*/
56+
token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
57+
if (!token)
58+
return 0;
59+
60+
fw_dump.fadump_supported = 1;
61+
fw_dump.ibm_configure_kernel_dump = *token;
62+
63+
/*
64+
* The 'ibm,kernel-dump' rtas node is present only if there is
65+
* dump data waiting for us.
66+
*/
67+
if (of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL))
68+
fw_dump.dump_active = 1;
69+
70+
/* Get the sizes required to store dump data for the firmware provided
71+
* dump sections.
72+
* For each dump section type supported, a 32bit cell which defines
73+
* the ID of a supported section followed by two 32 bit cells which
74+
* gives teh size of the section in bytes.
75+
*/
76+
sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
77+
&size);
78+
79+
if (!sections)
80+
return 0;
81+
82+
num_sections = size / (3 * sizeof(u32));
83+
84+
for (i = 0; i < num_sections; i++, sections += 3) {
85+
u32 type = (u32)of_read_number(sections, 1);
86+
87+
switch (type) {
88+
case FADUMP_CPU_STATE_DATA:
89+
fw_dump.cpu_state_data_size =
90+
of_read_ulong(&sections[1], 2);
91+
break;
92+
case FADUMP_HPTE_REGION:
93+
fw_dump.hpte_region_size =
94+
of_read_ulong(&sections[1], 2);
95+
break;
96+
}
97+
}
98+
return 1;
99+
}
100+
101+
/**
102+
* fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
103+
*
104+
* Function to find the largest memory size we need to reserve during early
105+
* boot process. This will be the size of the memory that is required for a
106+
* kernel to boot successfully.
107+
*
108+
* This function has been taken from phyp-assisted dump feature implementation.
109+
*
110+
* returns larger of 256MB or 5% rounded down to multiples of 256MB.
111+
*
112+
* TODO: Come up with better approach to find out more accurate memory size
113+
* that is required for a kernel to boot successfully.
114+
*
115+
*/
116+
static inline unsigned long fadump_calculate_reserve_size(void)
117+
{
118+
unsigned long size;
119+
120+
/*
121+
* Check if the size is specified through fadump_reserve_mem= cmdline
122+
* option. If yes, then use that.
123+
*/
124+
if (fw_dump.reserve_bootvar)
125+
return fw_dump.reserve_bootvar;
126+
127+
/* divide by 20 to get 5% of value */
128+
size = memblock_end_of_DRAM() / 20;
129+
130+
/* round it down in multiples of 256 */
131+
size = size & ~0x0FFFFFFFUL;
132+
133+
/* Truncate to memory_limit. We don't want to over reserve the memory.*/
134+
if (memory_limit && size > memory_limit)
135+
size = memory_limit;
136+
137+
return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
138+
}
139+
140+
/*
141+
* Calculate the total memory size required to be reserved for
142+
* firmware-assisted dump registration.
143+
*/
144+
static unsigned long get_fadump_area_size(void)
145+
{
146+
unsigned long size = 0;
147+
148+
size += fw_dump.cpu_state_data_size;
149+
size += fw_dump.hpte_region_size;
150+
size += fw_dump.boot_memory_size;
151+
152+
size = PAGE_ALIGN(size);
153+
return size;
154+
}
155+
156+
int __init fadump_reserve_mem(void)
157+
{
158+
unsigned long base, size, memory_boundary;
159+
160+
if (!fw_dump.fadump_enabled)
161+
return 0;
162+
163+
if (!fw_dump.fadump_supported) {
164+
printk(KERN_INFO "Firmware-assisted dump is not supported on"
165+
" this hardware\n");
166+
fw_dump.fadump_enabled = 0;
167+
return 0;
168+
}
169+
/* Initialize boot memory size */
170+
fw_dump.boot_memory_size = fadump_calculate_reserve_size();
171+
172+
/*
173+
* Calculate the memory boundary.
174+
* If memory_limit is less than actual memory boundary then reserve
175+
* the memory for fadump beyond the memory_limit and adjust the
176+
* memory_limit accordingly, so that the running kernel can run with
177+
* specified memory_limit.
178+
*/
179+
if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
180+
size = get_fadump_area_size();
181+
if ((memory_limit + size) < memblock_end_of_DRAM())
182+
memory_limit += size;
183+
else
184+
memory_limit = memblock_end_of_DRAM();
185+
printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
186+
" dump, now %#016llx\n",
187+
(unsigned long long)memory_limit);
188+
}
189+
if (memory_limit)
190+
memory_boundary = memory_limit;
191+
else
192+
memory_boundary = memblock_end_of_DRAM();
193+
194+
if (fw_dump.dump_active) {
195+
printk(KERN_INFO "Firmware-assisted dump is active.\n");
196+
/*
197+
* If last boot has crashed then reserve all the memory
198+
* above boot_memory_size so that we don't touch it until
199+
* dump is written to disk by userspace tool. This memory
200+
* will be released for general use once the dump is saved.
201+
*/
202+
base = fw_dump.boot_memory_size;
203+
size = memory_boundary - base;
204+
memblock_reserve(base, size);
205+
printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
206+
"for saving crash dump\n",
207+
(unsigned long)(size >> 20),
208+
(unsigned long)(base >> 20));
209+
} else {
210+
/* Reserve the memory at the top of memory. */
211+
size = get_fadump_area_size();
212+
base = memory_boundary - size;
213+
memblock_reserve(base, size);
214+
printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
215+
"for firmware-assisted dump\n",
216+
(unsigned long)(size >> 20),
217+
(unsigned long)(base >> 20));
218+
}
219+
fw_dump.reserve_dump_area_start = base;
220+
fw_dump.reserve_dump_area_size = size;
221+
return 1;
222+
}
223+
224+
/* Look for fadump= cmdline option. */
225+
static int __init early_fadump_param(char *p)
226+
{
227+
if (!p)
228+
return 1;
229+
230+
if (strncmp(p, "on", 2) == 0)
231+
fw_dump.fadump_enabled = 1;
232+
else if (strncmp(p, "off", 3) == 0)
233+
fw_dump.fadump_enabled = 0;
234+
235+
return 0;
236+
}
237+
early_param("fadump", early_fadump_param);
238+
239+
/* Look for fadump_reserve_mem= cmdline option */
240+
static int __init early_fadump_reserve_mem(char *p)
241+
{
242+
if (p)
243+
fw_dump.reserve_bootvar = memparse(p, &p);
244+
return 0;
245+
}
246+
early_param("fadump_reserve_mem", early_fadump_reserve_mem);

arch/powerpc/kernel/prom.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
#include <asm/phyp_dump.h>
5656
#include <asm/kexec.h>
5757
#include <asm/opal.h>
58+
#include <asm/fadump.h>
5859

5960
#include <mm/mmu_decl.h>
6061

@@ -719,6 +720,11 @@ void __init early_init_devtree(void *params)
719720
of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL);
720721
#endif
721722

723+
#ifdef CONFIG_FA_DUMP
724+
/* scan tree to see if dump is active during last boot */
725+
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
726+
#endif
727+
722728
/* Pre-initialize the cmd_line with the content of boot_commmand_line,
723729
* which will be empty except when the content of the variable has
724730
* been overriden by a bootloading mechanism. This happens typically
@@ -750,7 +756,14 @@ void __init early_init_devtree(void *params)
750756
if (PHYSICAL_START > MEMORY_START)
751757
memblock_reserve(MEMORY_START, 0x8000);
752758
reserve_kdump_trampoline();
753-
reserve_crashkernel();
759+
#ifdef CONFIG_FA_DUMP
760+
/*
761+
* If we fail to reserve memory for firmware-assisted dump then
762+
* fallback to kexec based kdump.
763+
*/
764+
if (fadump_reserve_mem() == 0)
765+
#endif
766+
reserve_crashkernel();
754767
early_reserve_mem();
755768
phyp_dump_reserve_mem();
756769

0 commit comments

Comments
 (0)