Skip to content

Commit 90b5c1d

Browse files
yhuang-intelbjorn-helgaas
authored andcommitted
PCI/PM: Fix deadlock when unbinding device if parent in D3cold
If a PCI device and its parents are put into D3cold, unbinding the device will trigger deadlock as follow: - driver_unbind - device_release_driver - device_lock(dev) <--- previous lock here - __device_release_driver - pm_runtime_get_sync ... - rpm_resume(dev) - rpm_resume(dev->parent) ... - pci_pm_runtime_resume ... - pci_set_power_state - __pci_start_power_transition - pci_wakeup_bus(dev->parent->subordinate) - pci_walk_bus - device_lock(dev) <--- deadlock here If we do not do device_lock in pci_walk_bus, we can avoid deadlock. Device_lock in pci_walk_bus is introduced in commit: d71374d, corresponding email thread is: https://lkml.org/lkml/2006/5/26/38. The patch author Zhang Yanmin said device_lock is added to pci_walk_bus because: Some error handling functions call pci_walk_bus. For example, PCIe aer. Here we lock the device, so the driver wouldn't detach from the device, as the cb might call driver's callback function. So I fixed the deadlock as follows: - remove device_lock from pci_walk_bus - add device_lock into callback if callback will call driver's callback I checked pci_walk_bus users one by one, and found only PCIe aer needs device lock. Signed-off-by: Huang Ying <[email protected]> Signed-off-by: Bjorn Helgaas <[email protected]> Acked-by: Rafael J. Wysocki <[email protected]> CC: [email protected] # v3.6+ CC: Zhang Yanmin <[email protected]>
1 parent 8f0d816 commit 90b5c1d

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

drivers/pci/bus.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,10 +320,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
320320
} else
321321
next = dev->bus_list.next;
322322

323-
/* Run device routines with the device locked */
324-
device_lock(&dev->dev);
325323
retval = cb(dev, userdata);
326-
device_unlock(&dev->dev);
327324
if (retval)
328325
break;
329326
}

drivers/pci/pcie/aer/aerdrv_core.c

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ static int report_error_detected(struct pci_dev *dev, void *data)
213213
struct aer_broadcast_data *result_data;
214214
result_data = (struct aer_broadcast_data *) data;
215215

216+
device_lock(&dev->dev);
216217
dev->error_state = result_data->state;
217218

218219
if (!dev->driver ||
@@ -231,12 +232,14 @@ static int report_error_detected(struct pci_dev *dev, void *data)
231232
dev->driver ?
232233
"no AER-aware driver" : "no driver");
233234
}
234-
return 0;
235+
goto out;
235236
}
236237

237238
err_handler = dev->driver->err_handler;
238239
vote = err_handler->error_detected(dev, result_data->state);
239240
result_data->result = merge_result(result_data->result, vote);
241+
out:
242+
device_unlock(&dev->dev);
240243
return 0;
241244
}
242245

@@ -247,14 +250,17 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data)
247250
struct aer_broadcast_data *result_data;
248251
result_data = (struct aer_broadcast_data *) data;
249252

253+
device_lock(&dev->dev);
250254
if (!dev->driver ||
251255
!dev->driver->err_handler ||
252256
!dev->driver->err_handler->mmio_enabled)
253-
return 0;
257+
goto out;
254258

255259
err_handler = dev->driver->err_handler;
256260
vote = err_handler->mmio_enabled(dev);
257261
result_data->result = merge_result(result_data->result, vote);
262+
out:
263+
device_unlock(&dev->dev);
258264
return 0;
259265
}
260266

@@ -265,30 +271,36 @@ static int report_slot_reset(struct pci_dev *dev, void *data)
265271
struct aer_broadcast_data *result_data;
266272
result_data = (struct aer_broadcast_data *) data;
267273

274+
device_lock(&dev->dev);
268275
if (!dev->driver ||
269276
!dev->driver->err_handler ||
270277
!dev->driver->err_handler->slot_reset)
271-
return 0;
278+
goto out;
272279

273280
err_handler = dev->driver->err_handler;
274281
vote = err_handler->slot_reset(dev);
275282
result_data->result = merge_result(result_data->result, vote);
283+
out:
284+
device_unlock(&dev->dev);
276285
return 0;
277286
}
278287

279288
static int report_resume(struct pci_dev *dev, void *data)
280289
{
281290
const struct pci_error_handlers *err_handler;
282291

292+
device_lock(&dev->dev);
283293
dev->error_state = pci_channel_io_normal;
284294

285295
if (!dev->driver ||
286296
!dev->driver->err_handler ||
287297
!dev->driver->err_handler->resume)
288-
return 0;
298+
goto out;
289299

290300
err_handler = dev->driver->err_handler;
291301
err_handler->resume(dev);
302+
out:
303+
device_unlock(&dev->dev);
292304
return 0;
293305
}
294306

0 commit comments

Comments
 (0)