Skip to content

Commit 27f4488

Browse files
committed
powerpc/powernv: Add OPAL takeover from PowerVM
On machines supporting the OPAL firmware version 1, the system is initially booted under pHyp. We then use a special hypercall to verify if OPAL is available and if it is, we then trigger a "takeover" which disables pHyp and loads the OPAL runtime firmware, giving control to the kernel in hypervisor mode. This patch add the necessary code to detect that the OPAL takeover capability is present when running under PowerVM (aka pHyp) and perform said takeover to get hypervisor control of the processor. To perform the takeover, we must first use RTAS (within Open Firmware runtime environment) to start all processors & threads, in order to give control to OPAL on all of them. We then call the takeover hypercall on everybody, OPAL will re-enter the kernel main entry point passing it a flat device-tree. Signed-off-by: Benjamin Herrenschmidt <[email protected]>
1 parent 344eb01 commit 27f4488

File tree

6 files changed

+419
-19
lines changed

6 files changed

+419
-19
lines changed

arch/powerpc/include/asm/opal.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* PowerNV OPAL definitions.
3+
*
4+
* Copyright 2011 IBM Corp.
5+
*
6+
* This program is free software; you can redistribute it and/or
7+
* modify it under the terms of the GNU General Public License
8+
* as published by the Free Software Foundation; either version
9+
* 2 of the License, or (at your option) any later version.
10+
*/
11+
12+
#ifndef __OPAL_H
13+
#define __OPAL_H
14+
15+
/****** Takeover interface ********/
16+
17+
/* PAPR H-Call used to querty the HAL existence and/or instanciate
18+
* it from within pHyp (tech preview only).
19+
*
20+
* This is exclusively used in prom_init.c
21+
*/
22+
23+
#ifndef __ASSEMBLY__
24+
25+
struct opal_takeover_args {
26+
u64 k_image; /* r4 */
27+
u64 k_size; /* r5 */
28+
u64 k_entry; /* r6 */
29+
u64 k_entry2; /* r7 */
30+
u64 hal_addr; /* r8 */
31+
u64 rd_image; /* r9 */
32+
u64 rd_size; /* r10 */
33+
u64 rd_loc; /* r11 */
34+
};
35+
36+
extern long opal_query_takeover(u64 *hal_size, u64 *hal_align);
37+
38+
extern long opal_do_takeover(struct opal_takeover_args *args);
39+
40+
extern int opal_enter_rtas(struct rtas_args *args,
41+
unsigned long data,
42+
unsigned long entry);
43+
44+
45+
#endif /* __ASSEMBLY__ */
46+
47+
/****** OPAL APIs ******/
48+
49+
50+
#endif /* __OPAL_H */

arch/powerpc/kernel/head_64.S

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
* For pSeries or server processors:
5252
* 1. The MMU is off & open firmware is running in real mode.
5353
* 2. The kernel is entered at __start
54+
* -or- For OPAL entry:
55+
* 1. The MMU is off, processor in HV mode, primary CPU enters at 0
56+
* with device-tree in gpr3
57+
* 2. Secondary processors enter at 0x60 with PIR in gpr3
5458
*
5559
* For iSeries:
5660
* 1. The MMU is on (as it always is for iSeries)

arch/powerpc/kernel/prom_init.c

Lines changed: 222 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <asm/btext.h>
4444
#include <asm/sections.h>
4545
#include <asm/machdep.h>
46+
#include <asm/opal.h>
4647

4748
#include <linux/linux_logo.h>
4849

@@ -185,6 +186,7 @@ static unsigned long __initdata prom_tce_alloc_end;
185186
#define PLATFORM_LPAR 0x0001
186187
#define PLATFORM_POWERMAC 0x0400
187188
#define PLATFORM_GENERIC 0x0500
189+
#define PLATFORM_OPAL 0x0600
188190

189191
static int __initdata of_platform;
190192

@@ -644,7 +646,7 @@ static void __init early_cmdline_parse(void)
644646
}
645647
}
646648

647-
#ifdef CONFIG_PPC_PSERIES
649+
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
648650
/*
649651
* There are two methods for telling firmware what our capabilities are.
650652
* Newer machines have an "ibm,client-architecture-support" method on the
@@ -1274,6 +1276,195 @@ static void __init prom_init_mem(void)
12741276
prom_printf(" ram_top : %x\n", RELOC(ram_top));
12751277
}
12761278

1279+
static void __init prom_close_stdin(void)
1280+
{
1281+
struct prom_t *_prom = &RELOC(prom);
1282+
ihandle val;
1283+
1284+
if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0)
1285+
call_prom("close", 1, 0, val);
1286+
}
1287+
1288+
#ifdef CONFIG_PPC_POWERNV
1289+
1290+
static u64 __initdata prom_opal_size;
1291+
static u64 __initdata prom_opal_align;
1292+
static int __initdata prom_rtas_start_cpu;
1293+
static u64 __initdata prom_rtas_data;
1294+
static u64 __initdata prom_rtas_entry;
1295+
1296+
/* XXX Don't change this structure without updating opal-takeover.S */
1297+
static struct opal_secondary_data {
1298+
s64 ack; /* 0 */
1299+
u64 go; /* 8 */
1300+
struct opal_takeover_args args; /* 16 */
1301+
} opal_secondary_data;
1302+
1303+
extern char opal_secondary_entry;
1304+
1305+
static void prom_query_opal(void)
1306+
{
1307+
long rc;
1308+
1309+
prom_printf("Querying for OPAL presence... ");
1310+
rc = opal_query_takeover(&RELOC(prom_opal_size),
1311+
&RELOC(prom_opal_align));
1312+
prom_debug("(rc = %ld) ", rc);
1313+
if (rc != 0) {
1314+
prom_printf("not there.\n");
1315+
return;
1316+
}
1317+
RELOC(of_platform) = PLATFORM_OPAL;
1318+
prom_printf(" there !\n");
1319+
prom_debug(" opal_size = 0x%lx\n", RELOC(prom_opal_size));
1320+
prom_debug(" opal_align = 0x%lx\n", RELOC(prom_opal_align));
1321+
if (RELOC(prom_opal_align) < 0x10000)
1322+
RELOC(prom_opal_align) = 0x10000;
1323+
}
1324+
1325+
static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...)
1326+
{
1327+
struct rtas_args rtas_args;
1328+
va_list list;
1329+
int i;
1330+
1331+
rtas_args.token = token;
1332+
rtas_args.nargs = nargs;
1333+
rtas_args.nret = nret;
1334+
rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]);
1335+
va_start(list, outputs);
1336+
for (i = 0; i < nargs; ++i)
1337+
rtas_args.args[i] = va_arg(list, rtas_arg_t);
1338+
va_end(list);
1339+
1340+
for (i = 0; i < nret; ++i)
1341+
rtas_args.rets[i] = 0;
1342+
1343+
opal_enter_rtas(&rtas_args, RELOC(prom_rtas_data),
1344+
RELOC(prom_rtas_entry));
1345+
1346+
if (nret > 1 && outputs != NULL)
1347+
for (i = 0; i < nret-1; ++i)
1348+
outputs[i] = rtas_args.rets[i+1];
1349+
return (nret > 0)? rtas_args.rets[0]: 0;
1350+
}
1351+
1352+
static void __init prom_opal_hold_cpus(void)
1353+
{
1354+
int i, cnt, cpu, rc;
1355+
long j;
1356+
phandle node;
1357+
char type[64];
1358+
u32 servers[8];
1359+
struct prom_t *_prom = &RELOC(prom);
1360+
void *entry = (unsigned long *)&RELOC(opal_secondary_entry);
1361+
struct opal_secondary_data *data = &RELOC(opal_secondary_data);
1362+
1363+
prom_debug("prom_opal_hold_cpus: start...\n");
1364+
prom_debug(" - entry = 0x%x\n", entry);
1365+
prom_debug(" - data = 0x%x\n", data);
1366+
1367+
data->ack = -1;
1368+
data->go = 0;
1369+
1370+
/* look for cpus */
1371+
for (node = 0; prom_next_node(&node); ) {
1372+
type[0] = 0;
1373+
prom_getprop(node, "device_type", type, sizeof(type));
1374+
if (strcmp(type, RELOC("cpu")) != 0)
1375+
continue;
1376+
1377+
/* Skip non-configured cpus. */
1378+
if (prom_getprop(node, "status", type, sizeof(type)) > 0)
1379+
if (strcmp(type, RELOC("okay")) != 0)
1380+
continue;
1381+
1382+
cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers,
1383+
sizeof(servers));
1384+
if (cnt == PROM_ERROR)
1385+
break;
1386+
cnt >>= 2;
1387+
for (i = 0; i < cnt; i++) {
1388+
cpu = servers[i];
1389+
prom_debug("CPU %d ... ", cpu);
1390+
if (cpu == _prom->cpu) {
1391+
prom_debug("booted !\n");
1392+
continue;
1393+
}
1394+
prom_debug("starting ... ");
1395+
1396+
/* Init the acknowledge var which will be reset by
1397+
* the secondary cpu when it awakens from its OF
1398+
* spinloop.
1399+
*/
1400+
data->ack = -1;
1401+
rc = prom_rtas_call(RELOC(prom_rtas_start_cpu), 3, 1,
1402+
NULL, cpu, entry, data);
1403+
prom_debug("rtas rc=%d ...", rc);
1404+
1405+
for (j = 0; j < 100000000 && data->ack == -1; j++) {
1406+
HMT_low();
1407+
mb();
1408+
}
1409+
HMT_medium();
1410+
if (data->ack != -1)
1411+
prom_debug("done, PIR=0x%x\n", data->ack);
1412+
else
1413+
prom_debug("timeout !\n");
1414+
}
1415+
}
1416+
prom_debug("prom_opal_hold_cpus: end...\n");
1417+
}
1418+
1419+
static void prom_opal_takeover(void)
1420+
{
1421+
struct opal_secondary_data *data = &RELOC(opal_secondary_data);
1422+
struct opal_takeover_args *args = &data->args;
1423+
u64 align = RELOC(prom_opal_align);
1424+
u64 top_addr, opal_addr;
1425+
1426+
args->k_image = (u64)RELOC(_stext);
1427+
args->k_size = _end - _stext;
1428+
args->k_entry = 0;
1429+
args->k_entry2 = 0x60;
1430+
1431+
top_addr = _ALIGN_UP(args->k_size, align);
1432+
1433+
if (RELOC(prom_initrd_start) != 0) {
1434+
args->rd_image = RELOC(prom_initrd_start);
1435+
args->rd_size = RELOC(prom_initrd_end) - args->rd_image;
1436+
args->rd_loc = top_addr;
1437+
top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align);
1438+
}
1439+
1440+
/* Pickup an address for the HAL. We want to go really high
1441+
* up to avoid problem with future kexecs. On the other hand
1442+
* we don't want to be all over the TCEs on P5IOC2 machines
1443+
* which are going to be up there too. We assume the machine
1444+
* has plenty of memory, and we ask for the HAL for now to
1445+
* be just below the 1G point, or above the initrd
1446+
*/
1447+
opal_addr = _ALIGN_DOWN(0x40000000 - RELOC(prom_opal_size), align);
1448+
if (opal_addr < top_addr)
1449+
opal_addr = top_addr;
1450+
args->hal_addr = opal_addr;
1451+
1452+
prom_debug(" k_image = 0x%lx\n", args->k_image);
1453+
prom_debug(" k_size = 0x%lx\n", args->k_size);
1454+
prom_debug(" k_entry = 0x%lx\n", args->k_entry);
1455+
prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2);
1456+
prom_debug(" hal_addr = 0x%lx\n", args->hal_addr);
1457+
prom_debug(" rd_image = 0x%lx\n", args->rd_image);
1458+
prom_debug(" rd_size = 0x%lx\n", args->rd_size);
1459+
prom_debug(" rd_loc = 0x%lx\n", args->rd_loc);
1460+
prom_printf("Performing OPAL takeover,this can take a few minutes..\n");
1461+
prom_close_stdin();
1462+
mb();
1463+
data->go = 1;
1464+
for (;;)
1465+
opal_do_takeover(args);
1466+
}
1467+
#endif /* CONFIG_PPC_POWERNV */
12771468

12781469
/*
12791470
* Allocate room for and instantiate RTAS
@@ -1326,6 +1517,12 @@ static void __init prom_instantiate_rtas(void)
13261517
prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
13271518
&entry, sizeof(entry));
13281519

1520+
#ifdef CONFIG_PPC_POWERNV
1521+
/* PowerVN takeover hack */
1522+
RELOC(prom_rtas_data) = base;
1523+
RELOC(prom_rtas_entry) = entry;
1524+
prom_getprop(rtas_node, "start-cpu", &RELOC(prom_rtas_start_cpu), 4);
1525+
#endif
13291526
prom_debug("rtas base = 0x%x\n", base);
13301527
prom_debug("rtas entry = 0x%x\n", entry);
13311528
prom_debug("rtas size = 0x%x\n", (long)size);
@@ -1543,7 +1740,7 @@ static void __init prom_hold_cpus(void)
15431740
*acknowledge = (unsigned long)-1;
15441741

15451742
if (reg != _prom->cpu) {
1546-
/* Primary Thread of non-boot cpu */
1743+
/* Primary Thread of non-boot cpu or any thread */
15471744
prom_printf("starting cpu hw idx %lu... ", reg);
15481745
call_prom("start-cpu", 3, 0, node,
15491746
secondary_hold, reg);
@@ -1652,15 +1849,6 @@ static void __init prom_init_stdout(void)
16521849
prom_setprop(val, path, "linux,boot-display", NULL, 0);
16531850
}
16541851

1655-
static void __init prom_close_stdin(void)
1656-
{
1657-
struct prom_t *_prom = &RELOC(prom);
1658-
ihandle val;
1659-
1660-
if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0)
1661-
call_prom("close", 1, 0, val);
1662-
}
1663-
16641852
static int __init prom_find_machine_type(void)
16651853
{
16661854
struct prom_t *_prom = &RELOC(prom);
@@ -2504,6 +2692,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
25042692
#endif /* CONFIG_BLK_DEV_INITRD */
25052693
}
25062694

2695+
25072696
/*
25082697
* We enter here early on, when the Open Firmware prom is still
25092698
* handling exceptions and the MMU hash table for us.
@@ -2565,7 +2754,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
25652754
*/
25662755
prom_check_initrd(r3, r4);
25672756

2568-
#ifdef CONFIG_PPC_PSERIES
2757+
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
25692758
/*
25702759
* On pSeries, inform the firmware about our capabilities
25712760
*/
@@ -2611,14 +2800,30 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
26112800
#endif
26122801

26132802
/*
2614-
* On non-powermacs, try to instantiate RTAS and puts all CPUs
2615-
* in spin-loops. PowerMacs don't have a working RTAS and use
2616-
* a different way to spin CPUs
2803+
* On non-powermacs, try to instantiate RTAS. PowerMacs don't
2804+
* have a usable RTAS implementation.
26172805
*/
2618-
if (RELOC(of_platform) != PLATFORM_POWERMAC) {
2806+
if (RELOC(of_platform) != PLATFORM_POWERMAC)
26192807
prom_instantiate_rtas();
2620-
prom_hold_cpus();
2808+
2809+
#ifdef CONFIG_PPC_POWERNV
2810+
/* Detect HAL and try instanciating it & doing takeover */
2811+
if (RELOC(of_platform) == PLATFORM_PSERIES_LPAR) {
2812+
prom_query_opal();
2813+
if (RELOC(of_platform) == PLATFORM_OPAL) {
2814+
prom_opal_hold_cpus();
2815+
prom_opal_takeover();
2816+
}
26212817
}
2818+
#endif
2819+
2820+
/*
2821+
* On non-powermacs, put all CPUs in spin-loops.
2822+
*
2823+
* PowerMacs use a different mechanism to spin CPUs
2824+
*/
2825+
if (RELOC(of_platform) != PLATFORM_POWERMAC)
2826+
prom_hold_cpus();
26222827

26232828
/*
26242829
* Fill in some infos for use by the kernel later on

arch/powerpc/kernel/prom_init_check.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
2020
_end enter_prom memcpy memset reloc_offset __secondary_hold
2121
__secondary_hold_acknowledge __secondary_hold_spinloop __start
2222
strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
23-
reloc_got2 kernstart_addr memstart_addr linux_banner"
23+
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
24+
opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry"
2425

2526
NM="$1"
2627
OBJ="$2"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
obj-y += setup.o
1+
obj-y += setup.o opal-takeover.o
22
obj-$(CONFIG_SMP) += smp.o

0 commit comments

Comments
 (0)