Skip to content

Commit 8698cb9

Browse files
shayshyiSaeed Mahameed
authored andcommitted
net/mlx5: Perform DMA operations in the right locations
The cited patch change mlx5 driver so that during probe DMA operations were performed before pci_enable_device(), and during teardown DMA operations were performed after pci_disable_device(). DMA operations require PCI to be enabled. Hence, The above leads to the following oops in PPC systems[1]. On s390x systems, as reported by Niklas Schnelle, this is a problem because mlx5_pci_init() is where the DMA and coherent mask is set but mlx5_cmd_init() already does a dma_alloc_coherent(). Thus a DMA allocation is done during probe before the correct mask is set. This causes probe to fail initialization of the cmdif SW structs on s390x after that is converted to the common dma-iommu code. This is because on s390x DMA addresses below 4 GiB are reserved on current machines and unlike the old s390x specific DMA API implementation common code enforces DMA masks. Fix it by performing the DMA operations during probe after pci_enable_device() and after the dma mask is set, and during teardown before pci_disable_device(). [1] Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user iptable_nat xt_addrtype xt_conntrack nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 netconsole rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser ib_umad rdma_cm ib_ipoib iw_cm libiscsi scsi_transport_iscsi ib_cm ib_uverbs ib_core mlx5_core(-) ptp pps_core fuse vmx_crypto crc32c_vpmsum [last unloaded: mlx5_ib] CPU: 1 PID: 8937 Comm: modprobe Not tainted 6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02 #1 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c000000000423388 LR: c0000000001e733c CTR: c0000000001e4720 REGS: c0000000055636d0 TRAP: 0380 Not tainted (6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02) MSR: 8000000000009033 CR: 24008884 XER: 20040000 CFAR: c0000000001e7338 IRQMASK: 0 NIP [c000000000423388] __free_pages+0x28/0x160 LR [c0000000001e733c] dma_direct_free+0xac/0x190 Call Trace: [c000000005563970] [5deadbeef0000100] 0x5deadbeef0000100 (unreliable) [c0000000055639b0] [c0000000003d46cc] kfree+0x7c/0x150 [c000000005563a40] [c0000000001e47c8] dma_free_attrs+0xa8/0x1a0 [c000000005563aa0] [c008000000d0064c] mlx5_cmd_cleanup+0xa4/0x100 [mlx5_core] [c000000005563ad0] [c008000000cf629c] mlx5_mdev_uninit+0xf4/0x140 [mlx5_core] [c000000005563b00] [c008000000cf6448] remove_one+0x160/0x1d0 [mlx5_core] [c000000005563b40] [c000000000958540] pci_device_remove+0x60/0x110 [c000000005563b80] [c000000000a35e80] device_remove+0x70/0xd0 [c000000005563bb0] [c000000000a37a38] device_release_driver_internal+0x2a8/0x330 [c000000005563c00] [c000000000a37b8c] driver_detach+0x8c/0x160 [c000000005563c40] [c000000000a35350] bus_remove_driver+0x90/0x110 [c000000005563c80] [c000000000a38948] driver_unregister+0x48/0x90 [c000000005563cf0] [c000000000957e38] pci_unregister_driver+0x38/0x150 [c000000005563d40] [c008000000eb6140] mlx5_cleanup+0x38/0x90 [mlx5_core] Fixes: 06cd555 ("net/mlx5: split mlx5_cmd_init() to probe and reload routines") Signed-off-by: Shay Drory <[email protected]> Reviewed-by: Moshe Shemesh <[email protected]> Reviewed-by: Tariq Toukan <[email protected]> Reviewed-by: Leon Romanovsky <[email protected]> Reviewed-by: Niklas Schnelle <[email protected]> Tested-by: Niklas Schnelle <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent b91e840 commit 8698cb9

File tree

1 file changed

+28
-36
lines changed
  • drivers/net/ethernet/mellanox/mlx5/core

1 file changed

+28
-36
lines changed

drivers/net/ethernet/mellanox/mlx5/core/cmd.c

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2186,52 +2186,23 @@ static u16 cmdif_rev(struct mlx5_core_dev *dev)
21862186

21872187
int mlx5_cmd_init(struct mlx5_core_dev *dev)
21882188
{
2189-
int size = sizeof(struct mlx5_cmd_prot_block);
2190-
int align = roundup_pow_of_two(size);
21912189
struct mlx5_cmd *cmd = &dev->cmd;
2192-
u32 cmd_l;
2193-
int err;
2194-
2195-
cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
2196-
if (!cmd->pool)
2197-
return -ENOMEM;
21982190

2199-
err = alloc_cmd_page(dev, cmd);
2200-
if (err)
2201-
goto err_free_pool;
2202-
2203-
cmd_l = (u32)(cmd->dma);
2204-
if (cmd_l & 0xfff) {
2205-
mlx5_core_err(dev, "invalid command queue address\n");
2206-
err = -ENOMEM;
2207-
goto err_cmd_page;
2208-
}
22092191
cmd->checksum_disabled = 1;
22102192

22112193
spin_lock_init(&cmd->alloc_lock);
22122194
spin_lock_init(&cmd->token_lock);
22132195

2214-
create_msg_cache(dev);
2215-
22162196
set_wqname(dev);
22172197
cmd->wq = create_singlethread_workqueue(cmd->wq_name);
22182198
if (!cmd->wq) {
22192199
mlx5_core_err(dev, "failed to create command workqueue\n");
2220-
err = -ENOMEM;
2221-
goto err_cache;
2200+
return -ENOMEM;
22222201
}
22232202

22242203
mlx5_cmdif_debugfs_init(dev);
22252204

22262205
return 0;
2227-
2228-
err_cache:
2229-
destroy_msg_cache(dev);
2230-
err_cmd_page:
2231-
free_cmd_page(dev, cmd);
2232-
err_free_pool:
2233-
dma_pool_destroy(cmd->pool);
2234-
return err;
22352206
}
22362207

22372208
void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
@@ -2240,15 +2211,15 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
22402211

22412212
mlx5_cmdif_debugfs_cleanup(dev);
22422213
destroy_workqueue(cmd->wq);
2243-
destroy_msg_cache(dev);
2244-
free_cmd_page(dev, cmd);
2245-
dma_pool_destroy(cmd->pool);
22462214
}
22472215

22482216
int mlx5_cmd_enable(struct mlx5_core_dev *dev)
22492217
{
2218+
int size = sizeof(struct mlx5_cmd_prot_block);
2219+
int align = roundup_pow_of_two(size);
22502220
struct mlx5_cmd *cmd = &dev->cmd;
22512221
u32 cmd_h, cmd_l;
2222+
int err;
22522223

22532224
memset(&cmd->vars, 0, sizeof(cmd->vars));
22542225
cmd->vars.cmdif_rev = cmdif_rev(dev);
@@ -2281,10 +2252,21 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev)
22812252
sema_init(&cmd->vars.pages_sem, 1);
22822253
sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2));
22832254

2255+
cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
2256+
if (!cmd->pool)
2257+
return -ENOMEM;
2258+
2259+
err = alloc_cmd_page(dev, cmd);
2260+
if (err)
2261+
goto err_free_pool;
2262+
22842263
cmd_h = (u32)((u64)(cmd->dma) >> 32);
22852264
cmd_l = (u32)(cmd->dma);
2286-
if (WARN_ON(cmd_l & 0xfff))
2287-
return -EINVAL;
2265+
if (cmd_l & 0xfff) {
2266+
mlx5_core_err(dev, "invalid command queue address\n");
2267+
err = -ENOMEM;
2268+
goto err_cmd_page;
2269+
}
22882270

22892271
iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h);
22902272
iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz);
@@ -2297,17 +2279,27 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev)
22972279
cmd->mode = CMD_MODE_POLLING;
22982280
cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;
22992281

2282+
create_msg_cache(dev);
23002283
create_debugfs_files(dev);
23012284

23022285
return 0;
2286+
2287+
err_cmd_page:
2288+
free_cmd_page(dev, cmd);
2289+
err_free_pool:
2290+
dma_pool_destroy(cmd->pool);
2291+
return err;
23032292
}
23042293

23052294
void mlx5_cmd_disable(struct mlx5_core_dev *dev)
23062295
{
23072296
struct mlx5_cmd *cmd = &dev->cmd;
23082297

2309-
clean_debug_files(dev);
23102298
flush_workqueue(cmd->wq);
2299+
clean_debug_files(dev);
2300+
destroy_msg_cache(dev);
2301+
free_cmd_page(dev, cmd);
2302+
dma_pool_destroy(cmd->pool);
23112303
}
23122304

23132305
void mlx5_cmd_set_state(struct mlx5_core_dev *dev,

0 commit comments

Comments
 (0)