Skip to content

Commit 7dabb1b

Browse files
brettcreeleyawilliam
authored andcommitted
vfio/pds: Add support for firmware recovery
It's possible that the device firmware crashes and is able to recover due to some configuration and/or other issue. If a live migration is in progress while the firmware crashes, the live migration will fail. However, the VF PCI device should still be functional post crash recovery and subsequent migrations should go through as expected. When the pds_core device notices that firmware crashes it sends an event to all its client drivers. When the pds_vfio driver receives this event while migration is in progress it will request a deferred reset on the next migration state transition. This state transition will report failure as well as any subsequent state transition requests from the VMM/VFIO. Based on uapi/vfio.h the only way out of VFIO_DEVICE_STATE_ERROR is by issuing VFIO_DEVICE_RESET. Once this reset is done, the migration state will be reset to VFIO_DEVICE_STATE_RUNNING and migration can be performed. If the event is received while no migration is in progress (i.e. the VM is in normal operating mode), then no actions are taken and the migration state remains VFIO_DEVICE_STATE_RUNNING. Signed-off-by: Brett Creeley <[email protected]> Signed-off-by: Shannon Nelson <[email protected]> Reviewed-by: Simon Horman <[email protected]> Reviewed-by: Kevin Tian <[email protected]> Reviewed-by: Shameer Kolothum <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alex Williamson <[email protected]>
1 parent f232836 commit 7dabb1b

File tree

3 files changed

+131
-2
lines changed

3 files changed

+131
-2
lines changed

drivers/vfio/pci/pds/pci_drv.c

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,113 @@
1919
#define PDS_VFIO_DRV_DESCRIPTION "AMD/Pensando VFIO Device Driver"
2020
#define PCI_VENDOR_ID_PENSANDO 0x1dd8
2121

22+
static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio)
23+
{
24+
bool deferred_reset_needed = false;
25+
26+
/*
27+
* Documentation states that the kernel migration driver must not
28+
* generate asynchronous device state transitions outside of
29+
* manipulation by the user or the VFIO_DEVICE_RESET ioctl.
30+
*
31+
* Since recovery is an asynchronous event received from the device,
32+
* initiate a deferred reset. Issue a deferred reset in the following
33+
* situations:
34+
* 1. Migration is in progress, which will cause the next step of
35+
* the migration to fail.
36+
* 2. If the device is in a state that will be set to
37+
* VFIO_DEVICE_STATE_RUNNING on the next action (i.e. VM is
38+
* shutdown and device is in VFIO_DEVICE_STATE_STOP).
39+
*/
40+
mutex_lock(&pds_vfio->state_mutex);
41+
if ((pds_vfio->state != VFIO_DEVICE_STATE_RUNNING &&
42+
pds_vfio->state != VFIO_DEVICE_STATE_ERROR) ||
43+
(pds_vfio->state == VFIO_DEVICE_STATE_RUNNING &&
44+
pds_vfio_dirty_is_enabled(pds_vfio)))
45+
deferred_reset_needed = true;
46+
mutex_unlock(&pds_vfio->state_mutex);
47+
48+
/*
49+
* On the next user initiated state transition, the device will
50+
* transition to the VFIO_DEVICE_STATE_ERROR. At this point it's the user's
51+
* responsibility to reset the device.
52+
*
53+
* If a VFIO_DEVICE_RESET is requested post recovery and before the next
54+
* state transition, then the deferred reset state will be set to
55+
* VFIO_DEVICE_STATE_RUNNING.
56+
*/
57+
if (deferred_reset_needed) {
58+
spin_lock(&pds_vfio->reset_lock);
59+
pds_vfio->deferred_reset = true;
60+
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR;
61+
spin_unlock(&pds_vfio->reset_lock);
62+
}
63+
}
64+
65+
static int pds_vfio_pci_notify_handler(struct notifier_block *nb,
66+
unsigned long ecode, void *data)
67+
{
68+
struct pds_vfio_pci_device *pds_vfio =
69+
container_of(nb, struct pds_vfio_pci_device, nb);
70+
struct device *dev = pds_vfio_to_dev(pds_vfio);
71+
union pds_core_notifyq_comp *event = data;
72+
73+
dev_dbg(dev, "%s: event code %lu\n", __func__, ecode);
74+
75+
/*
76+
* We don't need to do anything for RESET state==0 as there is no notify
77+
* or feedback mechanism available, and it is possible that we won't
78+
* even see a state==0 event since the pds_core recovery is pending.
79+
*
80+
* Any requests from VFIO while state==0 will fail, which will return
81+
* error and may cause migration to fail.
82+
*/
83+
if (ecode == PDS_EVENT_RESET) {
84+
dev_info(dev, "%s: PDS_EVENT_RESET event received, state==%d\n",
85+
__func__, event->reset.state);
86+
/*
87+
* pds_core device finished recovery and sent us the
88+
* notification (state == 1) to allow us to recover
89+
*/
90+
if (event->reset.state == 1)
91+
pds_vfio_recovery(pds_vfio);
92+
}
93+
94+
return 0;
95+
}
96+
97+
static int
98+
pds_vfio_pci_register_event_handler(struct pds_vfio_pci_device *pds_vfio)
99+
{
100+
struct device *dev = pds_vfio_to_dev(pds_vfio);
101+
struct notifier_block *nb = &pds_vfio->nb;
102+
int err;
103+
104+
if (!nb->notifier_call) {
105+
nb->notifier_call = pds_vfio_pci_notify_handler;
106+
err = pdsc_register_notify(nb);
107+
if (err) {
108+
nb->notifier_call = NULL;
109+
dev_err(dev,
110+
"failed to register pds event handler: %pe\n",
111+
ERR_PTR(err));
112+
return -EINVAL;
113+
}
114+
dev_dbg(dev, "pds event handler registered\n");
115+
}
116+
117+
return 0;
118+
}
119+
120+
static void
121+
pds_vfio_pci_unregister_event_handler(struct pds_vfio_pci_device *pds_vfio)
122+
{
123+
if (pds_vfio->nb.notifier_call) {
124+
pdsc_unregister_notify(&pds_vfio->nb);
125+
pds_vfio->nb.notifier_call = NULL;
126+
}
127+
}
128+
22129
static int pds_vfio_pci_probe(struct pci_dev *pdev,
23130
const struct pci_device_id *id)
24131
{
@@ -43,8 +150,14 @@ static int pds_vfio_pci_probe(struct pci_dev *pdev,
43150
goto out_unregister_coredev;
44151
}
45152

153+
err = pds_vfio_pci_register_event_handler(pds_vfio);
154+
if (err)
155+
goto out_unregister_client;
156+
46157
return 0;
47158

159+
out_unregister_client:
160+
pds_vfio_unregister_client_cmd(pds_vfio);
48161
out_unregister_coredev:
49162
vfio_pci_core_unregister_device(&pds_vfio->vfio_coredev);
50163
out_put_vdev:
@@ -56,6 +169,7 @@ static void pds_vfio_pci_remove(struct pci_dev *pdev)
56169
{
57170
struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev);
58171

172+
pds_vfio_pci_unregister_event_handler(pds_vfio);
59173
pds_vfio_unregister_client_cmd(pds_vfio);
60174
vfio_pci_core_unregister_device(&pds_vfio->vfio_coredev);
61175
vfio_put_device(&pds_vfio->vfio_coredev.vdev);

drivers/vfio/pci/pds/vfio_dev.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,12 @@ void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio)
3333
if (pds_vfio->deferred_reset) {
3434
pds_vfio->deferred_reset = false;
3535
if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) {
36-
pds_vfio->state = VFIO_DEVICE_STATE_RUNNING;
3736
pds_vfio_put_restore_file(pds_vfio);
3837
pds_vfio_put_save_file(pds_vfio);
3938
pds_vfio_dirty_disable(pds_vfio, false);
4039
}
40+
pds_vfio->state = pds_vfio->deferred_reset_state;
41+
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
4142
spin_unlock(&pds_vfio->reset_lock);
4243
goto again;
4344
}
@@ -49,6 +50,7 @@ void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio)
4950
{
5051
spin_lock(&pds_vfio->reset_lock);
5152
pds_vfio->deferred_reset = true;
53+
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
5254
if (!mutex_trylock(&pds_vfio->state_mutex)) {
5355
spin_unlock(&pds_vfio->reset_lock);
5456
return;
@@ -67,7 +69,14 @@ pds_vfio_set_device_state(struct vfio_device *vdev,
6769
struct file *res = NULL;
6870

6971
mutex_lock(&pds_vfio->state_mutex);
70-
while (new_state != pds_vfio->state) {
72+
/*
73+
* only way to transition out of VFIO_DEVICE_STATE_ERROR is via
74+
* VFIO_DEVICE_RESET, so prevent the state machine from running since
75+
* vfio_mig_get_next_state() will throw a WARN_ON() when transitioning
76+
* from VFIO_DEVICE_STATE_ERROR to any other state
77+
*/
78+
while (pds_vfio->state != VFIO_DEVICE_STATE_ERROR &&
79+
new_state != pds_vfio->state) {
7180
enum vfio_device_mig_state next_state;
7281

7382
int err = vfio_mig_get_next_state(vdev, pds_vfio->state,
@@ -89,6 +98,9 @@ pds_vfio_set_device_state(struct vfio_device *vdev,
8998
}
9099
}
91100
pds_vfio_state_mutex_unlock(pds_vfio);
101+
/* still waiting on a deferred_reset */
102+
if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR)
103+
res = ERR_PTR(-EIO);
92104

93105
return res;
94106
}
@@ -169,6 +181,7 @@ static int pds_vfio_open_device(struct vfio_device *vdev)
169181

170182
mutex_init(&pds_vfio->state_mutex);
171183
pds_vfio->state = VFIO_DEVICE_STATE_RUNNING;
184+
pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
172185

173186
vfio_pci_core_finish_enable(&pds_vfio->vfio_coredev);
174187

drivers/vfio/pci/pds/vfio_dev.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ struct pds_vfio_pci_device {
2020
enum vfio_device_mig_state state;
2121
spinlock_t reset_lock; /* protect reset_done flow */
2222
u8 deferred_reset;
23+
enum vfio_device_mig_state deferred_reset_state;
24+
struct notifier_block nb;
2325

2426
int vf_id;
2527
u16 client_id;

0 commit comments

Comments
 (0)