Skip to content

Commit 16f7c60

Browse files
author
Gustavo Santos
committed
Reattach mdevs to guest on resume
When suspending a VM in OpenStack, Nova detaches all the mediated devices from the guest machine, but does not reattach them on the resume operation. This patch makes Nova reattach the mdevs that were detached when the guest was suspended. This behavior is due to libvirt not supporting the hot-unplug of mediated devices at the time the feature was being developed. The limitation has been lifted since then, and now we have to amend the resume function so it will reattach the mediated devices that were detached on suspension. Closes-bug: #1948705 Signed-off-by: Gustavo Santos <[email protected]> Change-Id: I083929f36d9e78bf7713a87cae6d581e0d946867
1 parent 00452a4 commit 16f7c60

File tree

4 files changed

+107
-7
lines changed

4 files changed

+107
-7
lines changed

doc/source/admin/virtual-gpu.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,10 @@ Caveats
301301
that will cause the instance to be set back to ACTIVE. The ``suspend`` action
302302
in the ``os-instance-actions`` API will have an *Error* state.
303303

304+
.. versionchanged:: 25.0.0
305+
306+
This has been resolved in the Yoga release. See `bug 1948705`_.
307+
304308
* Resizing an instance with a new flavor that has vGPU resources doesn't
305309
allocate those vGPUs to the instance (the instance is created without
306310
vGPU resources). The proposed workaround is to rebuild the instance after
@@ -350,6 +354,7 @@ For nested vGPUs:
350354

351355
.. _bug 1778563: https://bugs.launchpad.net/nova/+bug/1778563
352356
.. _bug 1762688: https://bugs.launchpad.net/nova/+bug/1762688
357+
.. _bug 1948705: https://bugs.launchpad.net/nova/+bug/1948705
353358

354359
.. Links
355360
.. _Intel GVT-g: https://01.org/igvt-g

nova/tests/unit/virt/libvirt/test_driver.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16563,9 +16563,15 @@ def test_resume(self):
1656316563
mock.patch.object(guest, 'sync_guest_time'),
1656416564
mock.patch.object(drvr, '_wait_for_running',
1656516565
side_effect=loopingcall.LoopingCallDone()),
16566+
mock.patch.object(drvr,
16567+
'_get_mdevs_from_guest_config',
16568+
return_value='fake_mdevs'),
16569+
mock.patch.object(drvr, '_attach_mediated_devices'),
1656616570
) as (_get_existing_domain_xml, _create_guest_with_network,
1656716571
_attach_pci_devices, get_instance_pci_devs, get_image_metadata,
16568-
mock_sync_time, mock_wait):
16572+
mock_sync_time, mock_wait,
16573+
_get_mdevs_from_guest_config,
16574+
_attach_mediated_devices):
1656916575
get_image_metadata.return_value = {'bar': 234}
1657016576

1657116577
drvr.resume(self.context, instance, network_info,
@@ -16580,6 +16586,9 @@ def test_resume(self):
1658016586
self.assertTrue(mock_sync_time.called)
1658116587
_attach_pci_devices.assert_has_calls([mock.call(guest,
1658216588
'fake_pci_devs')])
16589+
_attach_mediated_devices.assert_has_calls(
16590+
[mock.call(guest, 'fake_mdevs')]
16591+
)
1658316592

1658416593
@mock.patch.object(host.Host, '_get_domain')
1658516594
@mock.patch.object(libvirt_driver.LibvirtDriver, 'get_info')
@@ -26073,6 +26082,55 @@ def test_detach_mediated_devices_raises_exc(self):
2607326082
self.assertRaises(test.TestingException,
2607426083
self._test_detach_mediated_devices, exc)
2607526084

26085+
@mock.patch.object(libvirt_guest.Guest, 'attach_device')
26086+
def _test_attach_mediated_devices(self, side_effect, attach_device):
26087+
dom_without_vgpu = (
26088+
"""<domain> <devices>
26089+
<disk type='file' device='disk'>
26090+
<driver name='qemu' type='qcow2' cache='none'/>
26091+
<source file='xxx'/>
26092+
<target dev='vda' bus='virtio'/>
26093+
<alias name='virtio-disk0'/>
26094+
<address type='pci' domain='0x0000' bus='0x00'
26095+
slot='0x04' function='0x0'/>
26096+
</disk>
26097+
</devices></domain>""")
26098+
26099+
vgpu_xml = (
26100+
"""<domain> <devices>
26101+
<hostdev mode='subsystem' type='mdev' managed='no'
26102+
model='vfio-pci'>
26103+
<source>
26104+
<address uuid='81db53c6-6659-42a0-a34c-1507fdc72983'/>
26105+
</source>
26106+
<alias name='hostdev0'/>
26107+
<address type='pci' domain='0x0000' bus='0x00' slot='0x05'
26108+
function='0x0'/>
26109+
</hostdev>
26110+
</devices></domain>""")
26111+
26112+
attach_device.side_effect = side_effect
26113+
26114+
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
26115+
guest = libvirt_guest.Guest(FakeVirtDomain(fake_xml=dom_without_vgpu))
26116+
mdevs = drvr._get_mdevs_from_guest_config(vgpu_xml)
26117+
drvr._attach_mediated_devices(guest, mdevs)
26118+
return attach_device
26119+
26120+
def test_attach_mediated_devices(self):
26121+
def fake_attach_device(cfg_obj, **kwargs):
26122+
self.assertIsInstance(cfg_obj,
26123+
vconfig.LibvirtConfigGuestHostdevMDEV)
26124+
26125+
attach_mock = self._test_attach_mediated_devices(fake_attach_device)
26126+
attach_mock.assert_called_once_with(mock.ANY, live=True)
26127+
26128+
def test_attach_mediated_devices_raises_exc(self):
26129+
exc = test.TestingException()
26130+
26131+
self.assertRaises(test.TestingException,
26132+
self._test_attach_mediated_devices, exc)
26133+
2607626134
def test_storage_bus_traits__qemu_kvm(self):
2607726135
"""Test getting storage bus traits per virt type.
2607826136
"""

nova/virt/libvirt/driver.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3966,6 +3966,10 @@ def resume(self, context, instance, network_info, block_device_info=None):
39663966
"""resume the specified instance."""
39673967
xml = self._get_existing_domain_xml(instance, network_info,
39683968
block_device_info)
3969+
# NOTE(gsantos): The mediated devices that were removed on suspension
3970+
# are still present in the xml. Let's take their references from it
3971+
# and re-attach them.
3972+
mdevs = self._get_mdevs_from_guest_config(xml)
39693973
# NOTE(efried): The instance should already have a vtpm_secret_uuid
39703974
# registered if appropriate.
39713975
guest = self._create_guest_with_network(
@@ -3975,6 +3979,7 @@ def resume(self, context, instance, network_info, block_device_info=None):
39753979
pci_manager.get_instance_pci_devs(instance))
39763980
self._attach_direct_passthrough_ports(
39773981
context, instance, guest, network_info)
3982+
self._attach_mediated_devices(guest, mdevs)
39783983
timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_running,
39793984
instance)
39803985
timer.start(interval=0.5).wait()
@@ -8000,12 +8005,6 @@ def _detach_mediated_devices(self, guest):
80008005
guest.detach_device(mdev_cfg, live=True)
80018006
except libvirt.libvirtError as ex:
80028007
error_code = ex.get_error_code()
8003-
# NOTE(sbauza): There is a pending issue with libvirt that
8004-
# doesn't allow to hot-unplug mediated devices. Let's
8005-
# short-circuit the suspend action and set the instance back
8006-
# to ACTIVE.
8007-
# TODO(sbauza): Once libvirt supports this, amend the resume()
8008-
# operation to support reallocating mediated devices.
80098008
if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED:
80108009
reason = _("Suspend is not supported for instances having "
80118010
"attached mediated devices.")
@@ -8014,6 +8013,38 @@ def _detach_mediated_devices(self, guest):
80148013
else:
80158014
raise
80168015

8016+
def _attach_mediated_devices(self, guest, devs):
8017+
for mdev_cfg in devs:
8018+
try:
8019+
guest.attach_device(mdev_cfg, live=True)
8020+
except libvirt.libvirtError as ex:
8021+
error_code = ex.get_error_code()
8022+
if error_code == libvirt.VIR_ERR_DEVICE_MISSING:
8023+
LOG.warning("The mediated device %s was not found and "
8024+
"won't be reattached to %s.", mdev_cfg, guest)
8025+
else:
8026+
raise
8027+
8028+
def _get_mdevs_from_guest_config(self, xml):
8029+
"""Get all libvirt's mediated devices from a guest's config (XML) file.
8030+
We don't have to worry about those devices being used by another guest,
8031+
since they remain allocated for the current guest as long as they are
8032+
present in the XML.
8033+
8034+
:param xml: The XML from the guest we want to get a list of mdevs from.
8035+
8036+
:returns: A list containing the objects that represent the mediated
8037+
devices attached to the guest's config passed as argument.
8038+
"""
8039+
config = vconfig.LibvirtConfigGuest()
8040+
config.parse_str(xml)
8041+
8042+
devs = []
8043+
for dev in config.devices:
8044+
if isinstance(dev, vconfig.LibvirtConfigGuestHostdevMDEV):
8045+
devs.append(dev)
8046+
return devs
8047+
80178048
def _has_numa_support(self):
80188049
# This means that the host can support LibvirtConfigGuestNUMATune
80198050
# and the nodeset field in LibvirtConfigGuestMemoryBackingPage
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
fixes:
3+
- |
4+
Amended the guest resume operation to support mediated devices, as
5+
libvirt's minimum required version (v6.0.0) supports the hot-plug/unplug of
6+
mediated devices, which was addressed in v4.3.0.

0 commit comments

Comments
 (0)