Skip to content

Commit a940904

Browse files
aikmpe
authored andcommitted
powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains
Up until now PPC64 managed to avoid using iommu_ops. The VFIO driver uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in the Type1 VFIO driver. Recent development added 2 uses of iommu_ops to the generic VFIO which broke POWER: - a coherency capability check; - blocking IOMMU domain - iommu_group_dma_owner_claimed()/... This adds a simple iommu_ops which reports support for cache coherency and provides a basic support for blocking domains. No other domain types are implemented so the default domain is NULL. Since now iommu_ops controls the group ownership, this takes it out of VFIO. This adds an IOMMU device into a pci_controller (=PHB) and registers it in the IOMMU subsystem, iommu_ops is registered at this point. This setup is done in postcore_initcall_sync. This replaces iommu_group_add_device() with iommu_probe_device() as the former misses necessary steps in connecting PCI devices to IOMMU devices. This adds a comment about why explicit iommu_probe_device() is still needed. The previous discussion is here: https://lore.kernel.org/r/[email protected]/ https://lore.kernel.org/r/[email protected]/ Fixes: e8ae0e1 ("vfio: Require that devices support DMA cache coherence") Fixes: 70693f4 ("vfio: Set DMA ownership for VFIO devices") Signed-off-by: Alexey Kardashevskiy <[email protected]> Signed-off-by: Timothy Pearson <[email protected]> Acked-by: Alex Williamson <[email protected]> [mpe: Fix CONFIG_IOMMU_API=n build] Signed-off-by: Michael Ellerman <[email protected]> Link: https://msgid.link/2000135730.16998523.1678123860135.JavaMail.zimbra@raptorengineeringinc.com
1 parent 76f3510 commit a940904

File tree

7 files changed

+218
-10
lines changed

7 files changed

+218
-10
lines changed

arch/powerpc/include/asm/pci-bridge.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/list.h>
99
#include <linux/ioport.h>
1010
#include <linux/numa.h>
11+
#include <linux/iommu.h>
1112

1213
struct device_node;
1314

@@ -44,6 +45,9 @@ struct pci_controller_ops {
4445
#endif
4546

4647
void (*shutdown)(struct pci_controller *hose);
48+
49+
struct iommu_group *(*device_group)(struct pci_controller *hose,
50+
struct pci_dev *pdev);
4751
};
4852

4953
/*
@@ -131,6 +135,9 @@ struct pci_controller {
131135
struct irq_domain *dev_domain;
132136
struct irq_domain *msi_domain;
133137
struct fwnode_handle *fwnode;
138+
139+
/* iommu_ops support */
140+
struct iommu_device iommu;
134141
};
135142

136143
/* These are used for config access before all the PCI probing

arch/powerpc/kernel/iommu.c

Lines changed: 146 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <asm/vio.h>
3636
#include <asm/tce.h>
3737
#include <asm/mmu_context.h>
38+
#include <asm/ppc-pci.h>
3839

3940
#define DBG(...)
4041

@@ -1156,8 +1157,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
11561157

11571158
pr_debug("%s: Adding %s to iommu group %d\n",
11581159
__func__, dev_name(dev), iommu_group_id(table_group->group));
1159-
1160-
return iommu_group_add_device(table_group->group, dev);
1160+
/*
1161+
* This is still not adding devices via the IOMMU bus notifier because
1162+
* of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
1163+
* pcibios_scan_phb() first (and this guy adds devices and triggers
1164+
* the notifier) and only then it calls pci_bus_add_devices() which
1165+
* configures DMA for buses which also creates PEs and IOMMU groups.
1166+
*/
1167+
return iommu_probe_device(dev);
11611168
}
11621169
EXPORT_SYMBOL_GPL(iommu_add_device);
11631170

@@ -1237,6 +1244,7 @@ static long spapr_tce_take_ownership(struct iommu_table_group *table_group)
12371244
rc = iommu_take_ownership(tbl);
12381245
if (!rc)
12391246
continue;
1247+
12401248
for (j = 0; j < i; ++j)
12411249
iommu_release_ownership(table_group->tables[j]);
12421250
return rc;
@@ -1269,4 +1277,140 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = {
12691277
.release_ownership = spapr_tce_release_ownership,
12701278
};
12711279

1280+
/*
1281+
* A simple iommu_ops to allow less cruft in generic VFIO code.
1282+
*/
1283+
static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
1284+
struct device *dev)
1285+
{
1286+
struct iommu_group *grp = iommu_group_get(dev);
1287+
struct iommu_table_group *table_group;
1288+
int ret = -EINVAL;
1289+
1290+
if (!grp)
1291+
return -ENODEV;
1292+
1293+
table_group = iommu_group_get_iommudata(grp);
1294+
ret = table_group->ops->take_ownership(table_group);
1295+
iommu_group_put(grp);
1296+
1297+
return ret;
1298+
}
1299+
1300+
static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev)
1301+
{
1302+
struct iommu_group *grp = iommu_group_get(dev);
1303+
struct iommu_table_group *table_group;
1304+
1305+
table_group = iommu_group_get_iommudata(grp);
1306+
table_group->ops->release_ownership(table_group);
1307+
}
1308+
1309+
static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = {
1310+
.attach_dev = spapr_tce_blocking_iommu_attach_dev,
1311+
};
1312+
1313+
static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
1314+
{
1315+
switch (cap) {
1316+
case IOMMU_CAP_CACHE_COHERENCY:
1317+
return true;
1318+
default:
1319+
break;
1320+
}
1321+
1322+
return false;
1323+
}
1324+
1325+
static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
1326+
{
1327+
struct iommu_domain *dom;
1328+
1329+
if (type != IOMMU_DOMAIN_BLOCKED)
1330+
return NULL;
1331+
1332+
dom = kzalloc(sizeof(*dom), GFP_KERNEL);
1333+
if (!dom)
1334+
return NULL;
1335+
1336+
dom->ops = &spapr_tce_blocking_domain_ops;
1337+
1338+
return dom;
1339+
}
1340+
1341+
static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
1342+
{
1343+
struct pci_dev *pdev;
1344+
struct pci_controller *hose;
1345+
1346+
if (!dev_is_pci(dev))
1347+
return ERR_PTR(-EPERM);
1348+
1349+
pdev = to_pci_dev(dev);
1350+
hose = pdev->bus->sysdata;
1351+
1352+
return &hose->iommu;
1353+
}
1354+
1355+
static void spapr_tce_iommu_release_device(struct device *dev)
1356+
{
1357+
}
1358+
1359+
static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
1360+
{
1361+
struct pci_controller *hose;
1362+
struct pci_dev *pdev;
1363+
1364+
pdev = to_pci_dev(dev);
1365+
hose = pdev->bus->sysdata;
1366+
1367+
if (!hose->controller_ops.device_group)
1368+
return ERR_PTR(-ENOENT);
1369+
1370+
return hose->controller_ops.device_group(hose, pdev);
1371+
}
1372+
1373+
static const struct iommu_ops spapr_tce_iommu_ops = {
1374+
.capable = spapr_tce_iommu_capable,
1375+
.domain_alloc = spapr_tce_iommu_domain_alloc,
1376+
.probe_device = spapr_tce_iommu_probe_device,
1377+
.release_device = spapr_tce_iommu_release_device,
1378+
.device_group = spapr_tce_iommu_device_group,
1379+
.set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma,
1380+
};
1381+
1382+
static struct attribute *spapr_tce_iommu_attrs[] = {
1383+
NULL,
1384+
};
1385+
1386+
static struct attribute_group spapr_tce_iommu_group = {
1387+
.name = "spapr-tce-iommu",
1388+
.attrs = spapr_tce_iommu_attrs,
1389+
};
1390+
1391+
static const struct attribute_group *spapr_tce_iommu_groups[] = {
1392+
&spapr_tce_iommu_group,
1393+
NULL,
1394+
};
1395+
1396+
/*
1397+
* This registers IOMMU devices of PHBs. This needs to happen
1398+
* after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
1399+
* before subsys_initcall(iommu_subsys_init).
1400+
*/
1401+
static int __init spapr_tce_setup_phb_iommus_initcall(void)
1402+
{
1403+
struct pci_controller *hose;
1404+
1405+
list_for_each_entry(hose, &hose_list, list_node) {
1406+
iommu_device_sysfs_add(&hose->iommu, hose->parent,
1407+
spapr_tce_iommu_groups, "iommu-phb%04x",
1408+
hose->global_number);
1409+
iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
1410+
hose->parent);
1411+
}
1412+
return 0;
1413+
}
1414+
postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);
1415+
12721416
#endif /* CONFIG_IOMMU_API */

arch/powerpc/platforms/powernv/pci-ioda.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,6 +1899,13 @@ static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
18991899
/* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
19001900
struct iommu_table *tbl = pe->table_group.tables[0];
19011901

1902+
/*
1903+
* iommu_ops transfers the ownership per a device and we mode
1904+
* the group ownership with the first device in the group.
1905+
*/
1906+
if (!tbl)
1907+
return 0;
1908+
19021909
pnv_pci_ioda2_set_bypass(pe, false);
19031910
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
19041911
if (pe->pbus)
@@ -1915,6 +1922,9 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
19151922
struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
19161923
table_group);
19171924

1925+
/* See the comment about iommu_ops above */
1926+
if (pe->table_group.tables[0])
1927+
return;
19181928
pnv_pci_ioda2_setup_default_config(pe);
19191929
if (pe->pbus)
19201930
pnv_ioda_setup_bus_dma(pe, pe->pbus);
@@ -2921,6 +2931,27 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
29212931
}
29222932
}
29232933

2934+
#ifdef CONFIG_IOMMU_API
2935+
static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose,
2936+
struct pci_dev *pdev)
2937+
{
2938+
struct pnv_phb *phb = hose->private_data;
2939+
struct pnv_ioda_pe *pe;
2940+
2941+
if (WARN_ON(!phb))
2942+
return ERR_PTR(-ENODEV);
2943+
2944+
pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8));
2945+
if (!pe)
2946+
return ERR_PTR(-ENODEV);
2947+
2948+
if (!pe->table_group.group)
2949+
return ERR_PTR(-ENODEV);
2950+
2951+
return iommu_group_ref_get(pe->table_group.group);
2952+
}
2953+
#endif
2954+
29242955
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
29252956
.dma_dev_setup = pnv_pci_ioda_dma_dev_setup,
29262957
.dma_bus_setup = pnv_pci_ioda_dma_bus_setup,
@@ -2931,6 +2962,9 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
29312962
.setup_bridge = pnv_pci_fixup_bridge_resources,
29322963
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
29332964
.shutdown = pnv_pci_ioda_shutdown,
2965+
#ifdef CONFIG_IOMMU_API
2966+
.device_group = pnv_pci_device_group,
2967+
#endif
29342968
};
29352969

29362970
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {

arch/powerpc/platforms/pseries/iommu.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,3 +1729,27 @@ static int __init tce_iommu_bus_notifier_init(void)
17291729
return 0;
17301730
}
17311731
machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
1732+
1733+
#ifdef CONFIG_SPAPR_TCE_IOMMU
1734+
struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
1735+
struct pci_dev *pdev)
1736+
{
1737+
struct device_node *pdn, *dn = pdev->dev.of_node;
1738+
struct iommu_group *grp;
1739+
struct pci_dn *pci;
1740+
1741+
pdn = pci_dma_find(dn, NULL);
1742+
if (!pdn || !PCI_DN(pdn))
1743+
return ERR_PTR(-ENODEV);
1744+
1745+
pci = PCI_DN(pdn);
1746+
if (!pci->table_group)
1747+
return ERR_PTR(-ENODEV);
1748+
1749+
grp = pci->table_group->group;
1750+
if (!grp)
1751+
return ERR_PTR(-ENODEV);
1752+
1753+
return iommu_group_ref_get(grp);
1754+
}
1755+
#endif

arch/powerpc/platforms/pseries/pseries.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,5 +123,9 @@ static inline void pseries_lpar_read_hblkrm_characteristics(void) { }
123123
#endif
124124

125125
void pseries_rng_init(void);
126+
#ifdef CONFIG_SPAPR_TCE_IOMMU
127+
struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
128+
struct pci_dev *pdev);
129+
#endif
126130

127131
#endif /* _PSERIES_PSERIES_H */

arch/powerpc/platforms/pseries/setup.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,9 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
11181118

11191119
struct pci_controller_ops pseries_pci_controller_ops = {
11201120
.probe_mode = pSeries_pci_probe_mode,
1121+
#ifdef CONFIG_SPAPR_TCE_IOMMU
1122+
.device_group = pSeries_pci_device_group,
1123+
#endif
11211124
};
11221125

11231126
define_machine(pseries) {

drivers/vfio/vfio_iommu_spapr_tce.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,19 +1200,13 @@ static void tce_iommu_release_ownership(struct tce_container *container,
12001200
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
12011201
if (container->tables[i])
12021202
table_group->ops->unset_window(table_group, i);
1203-
1204-
table_group->ops->release_ownership(table_group);
12051203
}
12061204

12071205
static long tce_iommu_take_ownership(struct tce_container *container,
12081206
struct iommu_table_group *table_group)
12091207
{
12101208
long i, ret = 0;
12111209

1212-
ret = table_group->ops->take_ownership(table_group);
1213-
if (ret)
1214-
return ret;
1215-
12161210
/* Set all windows to the new group */
12171211
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
12181212
struct iommu_table *tbl = container->tables[i];
@@ -1231,8 +1225,6 @@ static long tce_iommu_take_ownership(struct tce_container *container,
12311225
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
12321226
table_group->ops->unset_window(table_group, i);
12331227

1234-
table_group->ops->release_ownership(table_group);
1235-
12361228
return ret;
12371229
}
12381230

0 commit comments

Comments
 (0)