Skip to content

Commit a28b907

Browse files
wolsensbauza
authored andcommitted
Handle mdev devices in libvirt 7.7+
Libvirt 7.7 changed the mdev device naming to include the parent PCI device when listing node devices. The domain, however, will still only see the UUID and not see the parent PCI device. Changing the parsing to simply drop the PCI identifier is not enough as the device cannot be found when attempting to lookup the new ID. Modify the Libvirt Driver's _get_mediated_device_information to tolerate different formats of the mdev name. This first uses the legacy behavior by trying to lookup the device name that is passed in (typically mdev_<uuid> format) and if that is not found, iterates the list of mdev node devices until the right UUID is found and selects that one. Note that the lookup of the mdev device by UUID are needed in order to keep the ability to recreate assigned mediated devices on a reboot of the compute node. Additionally, the libvirt utils parsing method mdev_name2uuid, has been updated to tolerate both mdev_<uuid> and mdev_<uuid>_<pciid> formats. Closes-Bug: 1951656 Change-Id: Ifed0fa16053228990a6a8df8d4c666521db7e329
1 parent 1852019 commit a28b907

File tree

6 files changed

+97
-27
lines changed

6 files changed

+97
-27
lines changed

nova/tests/functional/regressions/test_bug_1951656.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -63,21 +63,11 @@ def test_create_servers_with_vgpu(self):
6363
flavor_id=self.flavor, host=self.compute1.host,
6464
networks='auto', expected_state='ACTIVE')
6565

66-
# TODO(sbauza): Modify this once bug #1851656 is fixed.
67-
# mdev_name2uuid() raises a badly formed hexadecimal UUID string error
68-
self.assertRaises(ValueError,
69-
self.assert_mdev_usage,
70-
self.compute1, expected_amount=1)
71-
72-
# Now, the problem is that we can't create new instances with VGPUs
73-
# from this host.
74-
server = self._create_server(
66+
self.assert_mdev_usage(self.compute1, expected_amount=1)
67+
68+
self._create_server(
7569
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
7670
flavor_id=self.flavor, host=self.compute1.host,
77-
networks='auto', expected_state='ERROR')
78-
# The error is due to a bad mdev name parsing
79-
self.assertIn('fault', server)
80-
# since we only have one host, we have a RescheduledException as this
81-
# service was creating an exception and we can't use another one.
82-
self.assertIn('Exceeded maximum number of retries',
83-
server['fault']['message'])
71+
networks='auto', expected_state='ACTIVE')
72+
73+
self.assert_mdev_usage(self.compute1, expected_amount=2)

nova/tests/unit/virt/libvirt/test_config.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3181,6 +3181,32 @@ def test_config_mdev_device(self):
31813181
config.LibvirtConfigNodeDeviceMdevInformation)
31823182
self.assertEqual("nvidia-11", obj.mdev_information.type)
31833183
self.assertEqual(12, obj.mdev_information.iommu_group)
3184+
self.assertIsNone(obj.mdev_information.uuid)
3185+
3186+
def test_config_mdev_device_uuid(self):
3187+
xmlin = """
3188+
<device>
3189+
<name>mdev_b2107403_110c_45b0_af87_32cc91597b8a_0000_41_00_0</name>
3190+
<path>/sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0/b2107403-110c-45b0-af87-32cc91597b8a</path>
3191+
<parent>pci_0000_41_00_0</parent>
3192+
<driver>
3193+
<name>vfio_mdev</name>
3194+
</driver>
3195+
<capability type='mdev'>
3196+
<type id='nvidia-442'/>
3197+
<uuid>b2107403-110c-45b0-af87-32cc91597b8a</uuid>
3198+
<iommuGroup number='57'/>
3199+
</capability>
3200+
</device>"""
3201+
3202+
obj = config.LibvirtConfigNodeDevice()
3203+
obj.parse_str(xmlin)
3204+
self.assertIsInstance(obj.mdev_information,
3205+
config.LibvirtConfigNodeDeviceMdevInformation)
3206+
self.assertEqual("nvidia-442", obj.mdev_information.type)
3207+
self.assertEqual(57, obj.mdev_information.iommu_group)
3208+
self.assertEqual("b2107403-110c-45b0-af87-32cc91597b8a",
3209+
obj.mdev_information.uuid)
31843210

31853211
def test_config_vdpa_device(self):
31863212
xmlin = """

nova/virt/libvirt/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3382,6 +3382,7 @@ def __init__(self, **kwargs):
33823382
root_name="capability", **kwargs)
33833383
self.type = None
33843384
self.iommu_group = None
3385+
self.uuid = None
33853386

33863387
def parse_dom(self, xmldoc):
33873388
super(LibvirtConfigNodeDeviceMdevInformation,
@@ -3391,6 +3392,8 @@ def parse_dom(self, xmldoc):
33913392
self.type = c.get('id')
33923393
if c.tag == "iommuGroup":
33933394
self.iommu_group = int(c.get('number'))
3395+
if c.tag == "uuid":
3396+
self.uuid = c.text
33943397

33953398

33963399
class LibvirtConfigNodeDeviceVpdCap(LibvirtConfigObject):

nova/virt/libvirt/driver.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8227,15 +8227,52 @@ def _get_mdev_capable_devices(self, types=None):
82278227

82288228
def _get_mediated_device_information(self, devname):
82298229
"""Returns a dict of a mediated device."""
8230-
virtdev = self._host.device_lookup_by_name(devname)
8230+
# LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI
8231+
# address of the parent device (e.g. mdev_<uuid>_<pci_address>) due to
8232+
# the mdevctl allowing for multiple mediated devs having the same UUID
8233+
# defined (only one can be active at a time). Since the guest
8234+
# information doesn't have the parent ID, try to lookup which
8235+
# mediated device is available that matches the UUID. If multiple
8236+
# devices are found that match the UUID, then this is an error
8237+
# condition.
8238+
try:
8239+
virtdev = self._host.device_lookup_by_name(devname)
8240+
except libvirt.libvirtError as ex:
8241+
if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE:
8242+
raise
8243+
mdevs = [dev for dev in self._host.list_mediated_devices()
8244+
if dev.startswith(devname)]
8245+
# If no matching devices are found, simply raise the original
8246+
# exception indicating that no devices are found.
8247+
if not mdevs:
8248+
raise
8249+
elif len(mdevs) > 1:
8250+
msg = ("The mediated device name %(devname)s refers to a UUID "
8251+
"that is present in multiple libvirt mediated devices. "
8252+
"Matching libvirt mediated devices are %(devices)s. "
8253+
"Mediated device UUIDs must be unique for Nova." %
8254+
{'devname': devname,
8255+
'devices': ', '.join(mdevs)})
8256+
raise exception.InvalidLibvirtMdevConfig(reason=msg)
8257+
8258+
LOG.debug('Found requested device %s as %s. Using that.',
8259+
devname, mdevs[0])
8260+
virtdev = self._host.device_lookup_by_name(mdevs[0])
82318261
xmlstr = virtdev.XMLDesc(0)
82328262
cfgdev = vconfig.LibvirtConfigNodeDevice()
82338263
cfgdev.parse_str(xmlstr)
8264+
# Starting with Libvirt 7.3, the uuid information is available in the
8265+
# node device information. If its there, use that. Otherwise,
8266+
# fall back to the previous behavior of parsing the uuid from the
8267+
# devname.
8268+
if cfgdev.mdev_information.uuid:
8269+
mdev_uuid = cfgdev.mdev_information.uuid
8270+
else:
8271+
mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name)
82348272

82358273
device = {
82368274
"dev_id": cfgdev.name,
8237-
# name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4
8238-
"uuid": libvirt_utils.mdev_name2uuid(cfgdev.name),
8275+
"uuid": mdev_uuid,
82398276
# the physical GPU PCI device
82408277
"parent": cfgdev.parent,
82418278
"type": cfgdev.mdev_information.type,

nova/virt/libvirt/host.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1566,7 +1566,7 @@ def list_mdev_capable_devices(self, flags=0):
15661566
def list_mediated_devices(self, flags=0):
15671567
"""Lookup mediated devices.
15681568
1569-
:returns: a list of virNodeDevice instance
1569+
:returns: a list of strings with the name of the instance
15701570
"""
15711571
return self._list_devices("mdev", flags=flags)
15721572

nova/virt/libvirt/utils.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -575,17 +575,31 @@ def get_default_machine_type(arch: str) -> ty.Optional[str]:
575575

576576

577577
def mdev_name2uuid(mdev_name: str) -> str:
578-
"""Convert an mdev name (of the form mdev_<uuid_with_underscores>) to a
579-
uuid (of the form 8-4-4-4-12).
578+
"""Convert an mdev name (of the form mdev_<uuid_with_underscores> or
579+
mdev_<uuid_with_underscores>_<pciaddress>) to a uuid
580+
(of the form 8-4-4-4-12).
581+
582+
:param mdev_name: the name of the mdev to parse the UUID from
583+
:returns: string containing the uuid
580584
"""
581-
return str(uuid.UUID(mdev_name[5:].replace('_', '-')))
585+
mdev_uuid = mdev_name[5:].replace('_', '-')
586+
# Unconditionnally remove the PCI address from the name
587+
mdev_uuid = mdev_uuid[:36]
588+
return str(uuid.UUID(mdev_uuid))
589+
582590

591+
def mdev_uuid2name(mdev_uuid: str, parent: str = None) -> str:
592+
"""Convert an mdev uuid (of the form 8-4-4-4-12) and optionally its parent
593+
device to a name (of the form mdev_<uuid_with_underscores>[_<pciid>]).
583594
584-
def mdev_uuid2name(mdev_uuid: str) -> str:
585-
"""Convert an mdev uuid (of the form 8-4-4-4-12) to a name (of the form
586-
mdev_<uuid_with_underscores>).
595+
:param mdev_uuid: the uuid of the mediated device
596+
:param parent: the parent device id for the mediated device
597+
:returns: name of the mdev to reference in libvirt
587598
"""
588-
return "mdev_" + mdev_uuid.replace('-', '_')
599+
name = "mdev_" + mdev_uuid.replace('-', '_')
600+
if parent and parent.startswith('pci_'):
601+
name = name + parent[4:]
602+
return name
589603

590604

591605
def get_flags_by_flavor_specs(flavor: 'objects.Flavor') -> ty.Set[str]:

0 commit comments

Comments
 (0)