Skip to content

Commit 5a090f7

Browse files
wxiong38mpe
authored andcommitted
powerpc/pseries: PCIE PHB reset
Several device drivers hit EEH(Extended Error handling) when triggering kdump on Pseries PowerVM. This patch implemented a reset of the PHBs in pci general code when triggering kdump. PHB reset stop all PCI transactions from normal kernel. We have tested the patch in several enviroments: - direct slot adapters - adapters under the switch - a VF adapter in PowerVM - a VF adapter/adapter in KVM guest. Signed-off-by: Wen Xiong <[email protected]> [mpe: Fix broken whitespace, subject & SOB formatting] Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 2384b36 commit 5a090f7

File tree

1 file changed

+169
-63
lines changed

1 file changed

+169
-63
lines changed

arch/powerpc/platforms/pseries/eeh_pseries.c

Lines changed: 169 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/sched.h>
2525
#include <linux/seq_file.h>
2626
#include <linux/spinlock.h>
27+
#include <linux/crash_dump.h>
2728

2829
#include <asm/eeh.h>
2930
#include <asm/eeh_event.h>
@@ -80,6 +81,152 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
8081
eeh_probe_device(pdev);
8182
}
8283

84+
85+
/**
86+
* pseries_eeh_get_config_addr - Retrieve config address
87+
*
88+
* Retrieve the assocated config address. Actually, there're 2 RTAS
89+
* function calls dedicated for the purpose. We need implement
90+
* it through the new function and then the old one. Besides,
91+
* you should make sure the config address is figured out from
92+
* FDT node before calling the function.
93+
*
94+
* It's notable that zero'ed return value means invalid PE config
95+
* address.
96+
*/
97+
static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr)
98+
{
99+
int ret = 0;
100+
int rets[3];
101+
102+
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
103+
/*
104+
* First of all, we need to make sure there has one PE
105+
* associated with the device. Otherwise, PE address is
106+
* meaningless.
107+
*/
108+
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
109+
config_addr, BUID_HI(phb->buid),
110+
BUID_LO(phb->buid), 1);
111+
if (ret || (rets[0] == 0))
112+
return 0;
113+
114+
/* Retrieve the associated PE config address */
115+
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
116+
config_addr, BUID_HI(phb->buid),
117+
BUID_LO(phb->buid), 0);
118+
if (ret) {
119+
pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
120+
__func__, phb->global_number, config_addr);
121+
return 0;
122+
}
123+
124+
return rets[0];
125+
}
126+
127+
if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
128+
ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
129+
config_addr, BUID_HI(phb->buid),
130+
BUID_LO(phb->buid), 0);
131+
if (ret) {
132+
pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
133+
__func__, phb->global_number, config_addr);
134+
return 0;
135+
}
136+
137+
return rets[0];
138+
}
139+
140+
return ret;
141+
}
142+
143+
/**
144+
* pseries_eeh_phb_reset - Reset the specified PHB
145+
* @phb: PCI controller
146+
* @config_adddr: the associated config address
147+
* @option: reset option
148+
*
149+
* Reset the specified PHB/PE
150+
*/
151+
static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option)
152+
{
153+
int ret;
154+
155+
/* Reset PE through RTAS call */
156+
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
157+
config_addr, BUID_HI(phb->buid),
158+
BUID_LO(phb->buid), option);
159+
160+
/* If fundamental-reset not supported, try hot-reset */
161+
if (option == EEH_RESET_FUNDAMENTAL &&
162+
ret == -8) {
163+
option = EEH_RESET_HOT;
164+
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
165+
config_addr, BUID_HI(phb->buid),
166+
BUID_LO(phb->buid), option);
167+
}
168+
169+
/* We need reset hold or settlement delay */
170+
if (option == EEH_RESET_FUNDAMENTAL ||
171+
option == EEH_RESET_HOT)
172+
msleep(EEH_PE_RST_HOLD_TIME);
173+
else
174+
msleep(EEH_PE_RST_SETTLE_TIME);
175+
176+
return ret;
177+
}
178+
179+
/**
180+
* pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE
181+
* @phb: PCI controller
182+
* @config_adddr: the associated config address
183+
*
184+
* The function will be called to reconfigure the bridges included
185+
* in the specified PE so that the mulfunctional PE would be recovered
186+
* again.
187+
*/
188+
static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr)
189+
{
190+
int ret;
191+
/* Waiting 0.2s maximum before skipping configuration */
192+
int max_wait = 200;
193+
194+
while (max_wait > 0) {
195+
ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
196+
config_addr, BUID_HI(phb->buid),
197+
BUID_LO(phb->buid));
198+
199+
if (!ret)
200+
return ret;
201+
if (ret < 0)
202+
break;
203+
204+
/*
205+
* If RTAS returns a delay value that's above 100ms, cut it
206+
* down to 100ms in case firmware made a mistake. For more
207+
* on how these delay values work see rtas_busy_delay_time
208+
*/
209+
if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
210+
ret <= RTAS_EXTENDED_DELAY_MAX)
211+
ret = RTAS_EXTENDED_DELAY_MIN+2;
212+
213+
max_wait -= rtas_busy_delay_time(ret);
214+
215+
if (max_wait < 0)
216+
break;
217+
218+
rtas_busy_delay(ret);
219+
}
220+
221+
pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
222+
__func__, phb->global_number, config_addr, ret);
223+
/* PAPR defines -3 as "Parameter Error" for this function: */
224+
if (ret == -3)
225+
return -EINVAL;
226+
else
227+
return -EIO;
228+
}
229+
83230
/*
84231
* Buffer for reporting slot-error-detail rtas calls. Its here
85232
* in BSS, and not dynamically alloced, so that it ends up in
@@ -96,6 +243,10 @@ static int eeh_error_buf_size;
96243
*/
97244
static int pseries_eeh_init(void)
98245
{
246+
struct pci_controller *phb;
247+
struct pci_dn *pdn;
248+
int addr, config_addr;
249+
99250
/* figure out EEH RTAS function call tokens */
100251
ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
101252
ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
@@ -148,6 +299,22 @@ static int pseries_eeh_init(void)
148299
/* Set EEH machine dependent code */
149300
ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
150301

302+
if (is_kdump_kernel() || reset_devices) {
303+
pr_info("Issue PHB reset ...\n");
304+
list_for_each_entry(phb, &hose_list, list_node) {
305+
pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
306+
addr = (pdn->busno << 16) | (pdn->devfn << 8);
307+
config_addr = pseries_eeh_get_config_addr(phb, addr);
308+
/* invalid PE config addr */
309+
if (config_addr == 0)
310+
continue;
311+
312+
pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
313+
pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
314+
pseries_eeh_phb_configure_bridge(phb, config_addr);
315+
}
316+
}
317+
151318
return 0;
152319
}
153320

@@ -569,35 +736,13 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
569736
static int pseries_eeh_reset(struct eeh_pe *pe, int option)
570737
{
571738
int config_addr;
572-
int ret;
573739

574740
/* Figure out PE address */
575741
config_addr = pe->config_addr;
576742
if (pe->addr)
577743
config_addr = pe->addr;
578744

579-
/* Reset PE through RTAS call */
580-
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
581-
config_addr, BUID_HI(pe->phb->buid),
582-
BUID_LO(pe->phb->buid), option);
583-
584-
/* If fundamental-reset not supported, try hot-reset */
585-
if (option == EEH_RESET_FUNDAMENTAL &&
586-
ret == -8) {
587-
option = EEH_RESET_HOT;
588-
ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
589-
config_addr, BUID_HI(pe->phb->buid),
590-
BUID_LO(pe->phb->buid), option);
591-
}
592-
593-
/* We need reset hold or settlement delay */
594-
if (option == EEH_RESET_FUNDAMENTAL ||
595-
option == EEH_RESET_HOT)
596-
msleep(EEH_PE_RST_HOLD_TIME);
597-
else
598-
msleep(EEH_PE_RST_SETTLE_TIME);
599-
600-
return ret;
745+
return pseries_eeh_phb_reset(pe->phb, config_addr, option);
601746
}
602747

603748
/**
@@ -641,56 +786,17 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u
641786
* pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
642787
* @pe: EEH PE
643788
*
644-
* The function will be called to reconfigure the bridges included
645-
* in the specified PE so that the mulfunctional PE would be recovered
646-
* again.
647789
*/
648790
static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
649791
{
650792
int config_addr;
651-
int ret;
652-
/* Waiting 0.2s maximum before skipping configuration */
653-
int max_wait = 200;
654793

655794
/* Figure out the PE address */
656795
config_addr = pe->config_addr;
657796
if (pe->addr)
658797
config_addr = pe->addr;
659798

660-
while (max_wait > 0) {
661-
ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
662-
config_addr, BUID_HI(pe->phb->buid),
663-
BUID_LO(pe->phb->buid));
664-
665-
if (!ret)
666-
return ret;
667-
if (ret < 0)
668-
break;
669-
670-
/*
671-
* If RTAS returns a delay value that's above 100ms, cut it
672-
* down to 100ms in case firmware made a mistake. For more
673-
* on how these delay values work see rtas_busy_delay_time
674-
*/
675-
if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
676-
ret <= RTAS_EXTENDED_DELAY_MAX)
677-
ret = RTAS_EXTENDED_DELAY_MIN+2;
678-
679-
max_wait -= rtas_busy_delay_time(ret);
680-
681-
if (max_wait < 0)
682-
break;
683-
684-
rtas_busy_delay(ret);
685-
}
686-
687-
pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
688-
__func__, pe->phb->global_number, pe->addr, ret);
689-
/* PAPR defines -3 as "Parameter Error" for this function: */
690-
if (ret == -3)
691-
return -EINVAL;
692-
else
693-
return -EIO;
799+
return pseries_eeh_phb_configure_bridge(pe->phb, config_addr);
694800
}
695801

696802
/**

0 commit comments

Comments
 (0)