Skip to content

Commit 9d67c94

Browse files
aikmpe
authored andcommitted
powerpc/iommu: Add "borrowing" iommu_table_group_ops
PPC64 IOMMU API defines iommu_table_group_ops which handles DMA windows for PEs: control the ownership, create/set/unset a table the hardware for dynamic DMA windows (DDW). VFIO uses the API to implement support on POWER. So far only PowerNV IODA2 (POWER8 and newer machines) implemented this and other cases (POWER7 or nested KVM) did not and instead reused existing iommu_table structs. This means 1) no DDW 2) ownership transfer is done directly in the VFIO SPAPR TCE driver. Soon POWER is going to get its own iommu_ops and ownership control is going to move there. This implements spapr_tce_table_group_ops which borrows iommu_table tables. The upside is that VFIO needs to know less about POWER. The new ops returns the existing table from create_table() and only checks if the same window is already set. This is only going to work if the default DMA window starts table_group.tce32_start and as big as pe->table_group.tce32_size (not the case for IODA2+ PowerNV). This changes iommu_table_group_ops::take_ownership() to return an error if borrowing a table failed. This should not cause any visible change in behavior for PowerNV. pSeries was not that well tested/supported anyway. Signed-off-by: Alexey Kardashevskiy <[email protected]> Signed-off-by: Timothy Pearson <[email protected]> Acked-by: Alex Williamson <[email protected]> [mpe: Fix CONFIG_IOMMU_API=n build (skiroot_defconfig), & formatting] Signed-off-by: Michael Ellerman <[email protected]> Link: https://msgid.link/525438831.16998517.1678123820075.JavaMail.zimbra@raptorengineeringinc.com
1 parent eeac8ed commit 9d67c94

File tree

5 files changed

+125
-86
lines changed

5 files changed

+125
-86
lines changed

arch/powerpc/include/asm/iommu.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ struct iommu_table_group_ops {
175175
long (*unset_window)(struct iommu_table_group *table_group,
176176
int num);
177177
/* Switch ownership from platform code to external user (e.g. VFIO) */
178-
void (*take_ownership)(struct iommu_table_group *table_group);
178+
long (*take_ownership)(struct iommu_table_group *table_group);
179179
/* Switch ownership from external user (e.g. VFIO) back to core */
180180
void (*release_ownership)(struct iommu_table_group *table_group);
181181
};
@@ -215,6 +215,8 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
215215
enum dma_data_direction *direction);
216216
extern void iommu_tce_kill(struct iommu_table *tbl,
217217
unsigned long entry, unsigned long pages);
218+
219+
extern struct iommu_table_group_ops spapr_tce_table_group_ops;
218220
#else
219221
static inline void iommu_register_group(struct iommu_table_group *table_group,
220222
int pci_domain_number,
@@ -303,8 +305,6 @@ extern int iommu_tce_check_gpa(unsigned long page_shift,
303305
iommu_tce_check_gpa((tbl)->it_page_shift, (gpa)))
304306

305307
extern void iommu_flush_tce(struct iommu_table *tbl);
306-
extern int iommu_take_ownership(struct iommu_table *tbl);
307-
extern void iommu_release_ownership(struct iommu_table *tbl);
308308

309309
extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
310310
extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);

arch/powerpc/kernel/iommu.c

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,7 +1086,7 @@ void iommu_tce_kill(struct iommu_table *tbl,
10861086
}
10871087
EXPORT_SYMBOL_GPL(iommu_tce_kill);
10881088

1089-
int iommu_take_ownership(struct iommu_table *tbl)
1089+
static int iommu_take_ownership(struct iommu_table *tbl)
10901090
{
10911091
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
10921092
int ret = 0;
@@ -1118,9 +1118,8 @@ int iommu_take_ownership(struct iommu_table *tbl)
11181118

11191119
return ret;
11201120
}
1121-
EXPORT_SYMBOL_GPL(iommu_take_ownership);
11221121

1123-
void iommu_release_ownership(struct iommu_table *tbl)
1122+
static void iommu_release_ownership(struct iommu_table *tbl)
11241123
{
11251124
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
11261125

@@ -1137,7 +1136,6 @@ void iommu_release_ownership(struct iommu_table *tbl)
11371136
spin_unlock(&tbl->pools[i].lock);
11381137
spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
11391138
}
1140-
EXPORT_SYMBOL_GPL(iommu_release_ownership);
11411139

11421140
int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
11431141
{
@@ -1179,4 +1177,96 @@ void iommu_del_device(struct device *dev)
11791177
iommu_group_remove_device(dev);
11801178
}
11811179
EXPORT_SYMBOL_GPL(iommu_del_device);
1180+
1181+
/*
1182+
* A simple iommu_table_group_ops which only allows reusing the existing
1183+
* iommu_table. This handles VFIO for POWER7 or the nested KVM.
1184+
* The ops does not allow creating windows and only allows reusing the existing
1185+
* one if it matches table_group->tce32_start/tce32_size/page_shift.
1186+
*/
1187+
static unsigned long spapr_tce_get_table_size(__u32 page_shift,
1188+
__u64 window_size, __u32 levels)
1189+
{
1190+
unsigned long size;
1191+
1192+
if (levels > 1)
1193+
return ~0U;
1194+
size = window_size >> (page_shift - 3);
1195+
return size;
1196+
}
1197+
1198+
static long spapr_tce_create_table(struct iommu_table_group *table_group, int num,
1199+
__u32 page_shift, __u64 window_size, __u32 levels,
1200+
struct iommu_table **ptbl)
1201+
{
1202+
struct iommu_table *tbl = table_group->tables[0];
1203+
1204+
if (num > 0)
1205+
return -EPERM;
1206+
1207+
if (tbl->it_page_shift != page_shift ||
1208+
tbl->it_size != (window_size >> page_shift) ||
1209+
tbl->it_indirect_levels != levels - 1)
1210+
return -EINVAL;
1211+
1212+
*ptbl = iommu_tce_table_get(tbl);
1213+
return 0;
1214+
}
1215+
1216+
static long spapr_tce_set_window(struct iommu_table_group *table_group,
1217+
int num, struct iommu_table *tbl)
1218+
{
1219+
return tbl == table_group->tables[num] ? 0 : -EPERM;
1220+
}
1221+
1222+
static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num)
1223+
{
1224+
return 0;
1225+
}
1226+
1227+
static long spapr_tce_take_ownership(struct iommu_table_group *table_group)
1228+
{
1229+
int i, j, rc = 0;
1230+
1231+
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1232+
struct iommu_table *tbl = table_group->tables[i];
1233+
1234+
if (!tbl || !tbl->it_map)
1235+
continue;
1236+
1237+
rc = iommu_take_ownership(tbl);
1238+
if (!rc)
1239+
continue;
1240+
for (j = 0; j < i; ++j)
1241+
iommu_release_ownership(table_group->tables[j]);
1242+
return rc;
1243+
}
1244+
return 0;
1245+
}
1246+
1247+
static void spapr_tce_release_ownership(struct iommu_table_group *table_group)
1248+
{
1249+
int i;
1250+
1251+
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1252+
struct iommu_table *tbl = table_group->tables[i];
1253+
1254+
if (!tbl)
1255+
continue;
1256+
1257+
iommu_table_clear(tbl);
1258+
if (tbl->it_map)
1259+
iommu_release_ownership(tbl);
1260+
}
1261+
}
1262+
1263+
struct iommu_table_group_ops spapr_tce_table_group_ops = {
1264+
.get_table_size = spapr_tce_get_table_size,
1265+
.create_table = spapr_tce_create_table,
1266+
.set_window = spapr_tce_set_window,
1267+
.unset_window = spapr_tce_unset_window,
1268+
.take_ownership = spapr_tce_take_ownership,
1269+
.release_ownership = spapr_tce_release_ownership,
1270+
};
1271+
11821272
#endif /* CONFIG_IOMMU_API */

arch/powerpc/platforms/powernv/pci-ioda.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,10 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
15541554
if (WARN_ON(!tbl))
15551555
return;
15561556

1557+
#ifdef CONFIG_IOMMU_API
1558+
pe->table_group.ops = &spapr_tce_table_group_ops;
1559+
pe->table_group.pgsizes = SZ_4K;
1560+
#endif
15571561
iommu_register_group(&pe->table_group, phb->hose->global_number,
15581562
pe->pe_number);
15591563
pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
@@ -1888,7 +1892,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
18881892
}
18891893
}
18901894

1891-
static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
1895+
static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
18921896
{
18931897
struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
18941898
table_group);
@@ -1902,6 +1906,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
19021906
else if (pe->pdev)
19031907
set_iommu_table_base(&pe->pdev->dev, NULL);
19041908
iommu_tce_table_put(tbl);
1909+
1910+
return 0;
19051911
}
19061912

19071913
static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)

arch/powerpc/platforms/pseries/iommu.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
7474
if (!table_group)
7575
return NULL;
7676

77+
#ifdef CONFIG_IOMMU_API
78+
table_group->ops = &spapr_tce_table_group_ops;
79+
table_group->pgsizes = SZ_4K;
80+
#endif
81+
7782
table_group->tables[0] = iommu_pseries_alloc_table(node);
7883
if (table_group->tables[0])
7984
return table_group;

drivers/vfio/vfio_iommu_spapr_tce.c

Lines changed: 16 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,52 +1189,6 @@ static long tce_iommu_ioctl(void *iommu_data,
11891189

11901190
static void tce_iommu_release_ownership(struct tce_container *container,
11911191
struct iommu_table_group *table_group)
1192-
{
1193-
int i;
1194-
1195-
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1196-
struct iommu_table *tbl = container->tables[i];
1197-
1198-
if (!tbl)
1199-
continue;
1200-
1201-
tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1202-
if (tbl->it_map)
1203-
iommu_release_ownership(tbl);
1204-
1205-
container->tables[i] = NULL;
1206-
}
1207-
}
1208-
1209-
static int tce_iommu_take_ownership(struct tce_container *container,
1210-
struct iommu_table_group *table_group)
1211-
{
1212-
int i, j, rc = 0;
1213-
1214-
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1215-
struct iommu_table *tbl = table_group->tables[i];
1216-
1217-
if (!tbl || !tbl->it_map)
1218-
continue;
1219-
1220-
rc = iommu_take_ownership(tbl);
1221-
if (rc) {
1222-
for (j = 0; j < i; ++j)
1223-
iommu_release_ownership(
1224-
table_group->tables[j]);
1225-
1226-
return rc;
1227-
}
1228-
}
1229-
1230-
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1231-
container->tables[i] = table_group->tables[i];
1232-
1233-
return 0;
1234-
}
1235-
1236-
static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1237-
struct iommu_table_group *table_group)
12381192
{
12391193
long i;
12401194

@@ -1250,18 +1204,14 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
12501204
table_group->ops->release_ownership(table_group);
12511205
}
12521206

1253-
static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1207+
static long tce_iommu_take_ownership(struct tce_container *container,
12541208
struct iommu_table_group *table_group)
12551209
{
12561210
long i, ret = 0;
12571211

1258-
if (!table_group->ops->create_table || !table_group->ops->set_window ||
1259-
!table_group->ops->release_ownership) {
1260-
WARN_ON_ONCE(1);
1261-
return -EFAULT;
1262-
}
1263-
1264-
table_group->ops->take_ownership(table_group);
1212+
ret = table_group->ops->take_ownership(table_group);
1213+
if (ret)
1214+
return ret;
12651215

12661216
/* Set all windows to the new group */
12671217
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
@@ -1307,9 +1257,14 @@ static int tce_iommu_attach_group(void *iommu_data,
13071257
goto unlock_exit;
13081258
}
13091259

1310-
if (tce_groups_attached(container) && (!table_group->ops ||
1311-
!table_group->ops->take_ownership ||
1312-
!table_group->ops->release_ownership)) {
1260+
/* v2 requires full support of dynamic DMA windows */
1261+
if (container->v2 && table_group->max_dynamic_windows_supported == 0) {
1262+
ret = -EINVAL;
1263+
goto unlock_exit;
1264+
}
1265+
1266+
/* v1 reuses TCE tables and does not share them among PEs */
1267+
if (!container->v2 && tce_groups_attached(container)) {
13131268
ret = -EBUSY;
13141269
goto unlock_exit;
13151270
}
@@ -1344,29 +1299,15 @@ static int tce_iommu_attach_group(void *iommu_data,
13441299
goto unlock_exit;
13451300
}
13461301

1347-
if (!table_group->ops || !table_group->ops->take_ownership ||
1348-
!table_group->ops->release_ownership) {
1349-
if (container->v2) {
1350-
ret = -EPERM;
1351-
goto free_exit;
1352-
}
1353-
ret = tce_iommu_take_ownership(container, table_group);
1354-
} else {
1355-
if (!container->v2) {
1356-
ret = -EPERM;
1357-
goto free_exit;
1358-
}
1359-
ret = tce_iommu_take_ownership_ddw(container, table_group);
1360-
if (!tce_groups_attached(container) && !container->tables[0])
1361-
container->def_window_pending = true;
1362-
}
1302+
ret = tce_iommu_take_ownership(container, table_group);
1303+
if (!tce_groups_attached(container) && !container->tables[0])
1304+
container->def_window_pending = true;
13631305

13641306
if (!ret) {
13651307
tcegrp->grp = iommu_group;
13661308
list_add(&tcegrp->next, &container->group_list);
13671309
}
13681310

1369-
free_exit:
13701311
if (ret && tcegrp)
13711312
kfree(tcegrp);
13721313

@@ -1405,10 +1346,7 @@ static void tce_iommu_detach_group(void *iommu_data,
14051346
table_group = iommu_group_get_iommudata(iommu_group);
14061347
BUG_ON(!table_group);
14071348

1408-
if (!table_group->ops || !table_group->ops->release_ownership)
1409-
tce_iommu_release_ownership(container, table_group);
1410-
else
1411-
tce_iommu_release_ownership_ddw(container, table_group);
1349+
tce_iommu_release_ownership(container, table_group);
14121350

14131351
unlock_exit:
14141352
mutex_unlock(&container->lock);

0 commit comments

Comments
 (0)