Skip to content

Commit bd6461c

Browse files
oohalmpe
authored andcommitted
powerpc/eeh: Add a eeh_dev_break debugfs interface
Add an interface to debugfs for generating an EEH event on a given device. This works by disabling memory accesses to and from the device by setting the PCI_COMMAND register (or the VF Memory Space Enable on the parent PF). This is a somewhat portable alternative to using the platform specific error injection mechanisms since those tend to be either hard to use, or straight up broken. For pseries the interfaces also requires the use of /dev/mem which is probably going to go away in a post-LOCKDOWN world (and it's a horrific hack to begin with) so moving to a kernel-provided interface makes sense and provides a sane, cross-platform interface for userspace so we can write more generic testing scripts. Signed-off-by: Oliver O'Halloran <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 22cda7c commit bd6461c

File tree

1 file changed

+138
-1
lines changed

1 file changed

+138
-1
lines changed

arch/powerpc/kernel/eeh.c

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1892,7 +1892,8 @@ static ssize_t eeh_dev_check_write(struct file *filp,
18921892
char buf[20];
18931893
int ret;
18941894

1895-
ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
1895+
memset(buf, 0, sizeof(buf));
1896+
ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
18961897
if (!ret)
18971898
return -EFAULT;
18981899

@@ -1929,6 +1930,139 @@ static const struct file_operations eeh_dev_check_fops = {
19291930
.read = eeh_debugfs_dev_usage,
19301931
};
19311932

1933+
static int eeh_debugfs_break_device(struct pci_dev *pdev)
1934+
{
1935+
struct resource *bar = NULL;
1936+
void __iomem *mapped;
1937+
u16 old, bit;
1938+
int i, pos;
1939+
1940+
/* Do we have an MMIO BAR to disable? */
1941+
for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
1942+
struct resource *r = &pdev->resource[i];
1943+
1944+
if (!r->flags || !r->start)
1945+
continue;
1946+
if (r->flags & IORESOURCE_IO)
1947+
continue;
1948+
if (r->flags & IORESOURCE_UNSET)
1949+
continue;
1950+
1951+
bar = r;
1952+
break;
1953+
}
1954+
1955+
if (!bar) {
1956+
pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
1957+
return -ENXIO;
1958+
}
1959+
1960+
pci_err(pdev, "Going to break: %pR\n", bar);
1961+
1962+
if (pdev->is_virtfn) {
1963+
#ifndef CONFIG_IOV
1964+
return -ENXIO;
1965+
#else
1966+
/*
1967+
* VFs don't have a per-function COMMAND register, so the best
1968+
* we can do is clear the Memory Space Enable bit in the PF's
1969+
* SRIOV control reg.
1970+
*
1971+
* Unfortunately, this requires that we have a PF (i.e doesn't
1972+
* work for a passed-through VF) and it has the potential side
1973+
* effect of also causing an EEH on every other VF under the
1974+
* PF. Oh well.
1975+
*/
1976+
pdev = pdev->physfn;
1977+
if (!pdev)
1978+
return -ENXIO; /* passed through VFs have no PF */
1979+
1980+
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
1981+
pos += PCI_SRIOV_CTRL;
1982+
bit = PCI_SRIOV_CTRL_MSE;
1983+
#endif /* !CONFIG_IOV */
1984+
} else {
1985+
bit = PCI_COMMAND_MEMORY;
1986+
pos = PCI_COMMAND;
1987+
}
1988+
1989+
/*
1990+
* Process here is:
1991+
*
1992+
* 1. Disable Memory space.
1993+
*
1994+
* 2. Perform an MMIO to the device. This should result in an error
1995+
* (CA / UR) being raised by the device which results in an EEH
1996+
* PE freeze. Using the in_8() accessor skips the eeh detection hook
1997+
* so the freeze hook so the EEH Detection machinery won't be
1998+
* triggered here. This is to match the usual behaviour of EEH
1999+
* where the HW will asyncronously freeze a PE and it's up to
2000+
* the kernel to notice and deal with it.
2001+
*
2002+
* 3. Turn Memory space back on. This is more important for VFs
2003+
* since recovery will probably fail if we don't. For normal
2004+
* the COMMAND register is reset as a part of re-initialising
2005+
* the device.
2006+
*
2007+
* Breaking stuff is the point so who cares if it's racy ;)
2008+
*/
2009+
pci_read_config_word(pdev, pos, &old);
2010+
2011+
mapped = ioremap(bar->start, PAGE_SIZE);
2012+
if (!mapped) {
2013+
pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
2014+
return -ENXIO;
2015+
}
2016+
2017+
pci_write_config_word(pdev, pos, old & ~bit);
2018+
in_8(mapped);
2019+
pci_write_config_word(pdev, pos, old);
2020+
2021+
iounmap(mapped);
2022+
2023+
return 0;
2024+
}
2025+
2026+
static ssize_t eeh_dev_break_write(struct file *filp,
2027+
const char __user *user_buf,
2028+
size_t count, loff_t *ppos)
2029+
{
2030+
uint32_t domain, bus, dev, fn;
2031+
struct pci_dev *pdev;
2032+
char buf[20];
2033+
int ret;
2034+
2035+
memset(buf, 0, sizeof(buf));
2036+
ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
2037+
if (!ret)
2038+
return -EFAULT;
2039+
2040+
ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
2041+
if (ret != 4) {
2042+
pr_err("%s: expected 4 args, got %d\n", __func__, ret);
2043+
return -EINVAL;
2044+
}
2045+
2046+
pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
2047+
if (!pdev)
2048+
return -ENODEV;
2049+
2050+
ret = eeh_debugfs_break_device(pdev);
2051+
pci_dev_put(pdev);
2052+
2053+
if (ret < 0)
2054+
return ret;
2055+
2056+
return count;
2057+
}
2058+
2059+
static const struct file_operations eeh_dev_break_fops = {
2060+
.open = simple_open,
2061+
.llseek = no_llseek,
2062+
.write = eeh_dev_break_write,
2063+
.read = eeh_debugfs_dev_usage,
2064+
};
2065+
19322066
#endif
19332067

19342068
static int __init eeh_init_proc(void)
@@ -1947,6 +2081,9 @@ static int __init eeh_init_proc(void)
19472081
debugfs_create_file_unsafe("eeh_dev_check", 0600,
19482082
powerpc_debugfs_root, NULL,
19492083
&eeh_dev_check_fops);
2084+
debugfs_create_file_unsafe("eeh_dev_break", 0600,
2085+
powerpc_debugfs_root, NULL,
2086+
&eeh_dev_break_fops);
19502087
debugfs_create_file_unsafe("eeh_force_recover", 0600,
19512088
powerpc_debugfs_root, NULL,
19522089
&eeh_force_recover_fops);

0 commit comments

Comments
 (0)