Skip to content

Commit 2805391

Browse files
wolsentobias-urdin
authored andcommitted
Handle mdev devices in libvirt 7.7+
Libvirt 7.7 changed the mdev device naming to include the parent PCI device when listing node devices. The domain, however, will still only see the UUID and not see the parent PCI device. Changing the parsing to simply drop the PCI identifier is not enough as the device cannot be found when attempting to lookup the new ID. Modify the Libvirt Driver's _get_mediated_device_information to tolerate different formats of the mdev name. This first uses the legacy behavior by trying to lookup the device name that is passed in (typically mdev_<uuid> format) and if that is not found, iterates the list of mdev node devices until the right UUID is found and selects that one. Note that the lookup of the mdev device by UUID are needed in order to keep the ability to recreate assigned mediated devices on a reboot of the compute node. Additionally, the libvirt utils parsing method mdev_name2uuid, has been updated to tolerate both mdev_<uuid> and mdev_<uuid>_<pciid> formats. Closes-Bug: 1951656 Change-Id: Ifed0fa16053228990a6a8df8d4c666521db7e329 (cherry picked from commit a28b907) (cherry picked from commit 98d8c9e)
1 parent 71aa17a commit 2805391

File tree

6 files changed

+97
-27
lines changed

6 files changed

+97
-27
lines changed

nova/tests/functional/regressions/test_bug_1951656.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -63,21 +63,11 @@ def test_create_servers_with_vgpu(self):
6363
flavor_id=self.flavor, host=self.compute1.host,
6464
networks='auto', expected_state='ACTIVE')
6565

66-
# TODO(sbauza): Modify this once bug #1851656 is fixed.
67-
# mdev_name2uuid() raises a badly formed hexadecimal UUID string error
68-
self.assertRaises(ValueError,
69-
self.assert_mdev_usage,
70-
self.compute1, expected_amount=1)
71-
72-
# Now, the problem is that we can't create new instances with VGPUs
73-
# from this host.
74-
server = self._create_server(
66+
self.assert_mdev_usage(self.compute1, expected_amount=1)
67+
68+
self._create_server(
7569
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
7670
flavor_id=self.flavor, host=self.compute1.host,
77-
networks='auto', expected_state='ERROR')
78-
# The error is due to a bad mdev name parsing
79-
self.assertIn('fault', server)
80-
# since we only have one host, we have a RescheduledException as this
81-
# service was creating an exception and we can't use another one.
82-
self.assertIn('Exceeded maximum number of retries',
83-
server['fault']['message'])
71+
networks='auto', expected_state='ACTIVE')
72+
73+
self.assert_mdev_usage(self.compute1, expected_amount=2)

nova/tests/unit/virt/libvirt/test_config.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3135,6 +3135,32 @@ def test_config_mdev_device(self):
31353135
config.LibvirtConfigNodeDeviceMdevInformation)
31363136
self.assertEqual("nvidia-11", obj.mdev_information.type)
31373137
self.assertEqual(12, obj.mdev_information.iommu_group)
3138+
self.assertIsNone(obj.mdev_information.uuid)
3139+
3140+
def test_config_mdev_device_uuid(self):
3141+
xmlin = """
3142+
<device>
3143+
<name>mdev_b2107403_110c_45b0_af87_32cc91597b8a_0000_41_00_0</name>
3144+
<path>/sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0/b2107403-110c-45b0-af87-32cc91597b8a</path>
3145+
<parent>pci_0000_41_00_0</parent>
3146+
<driver>
3147+
<name>vfio_mdev</name>
3148+
</driver>
3149+
<capability type='mdev'>
3150+
<type id='nvidia-442'/>
3151+
<uuid>b2107403-110c-45b0-af87-32cc91597b8a</uuid>
3152+
<iommuGroup number='57'/>
3153+
</capability>
3154+
</device>"""
3155+
3156+
obj = config.LibvirtConfigNodeDevice()
3157+
obj.parse_str(xmlin)
3158+
self.assertIsInstance(obj.mdev_information,
3159+
config.LibvirtConfigNodeDeviceMdevInformation)
3160+
self.assertEqual("nvidia-442", obj.mdev_information.type)
3161+
self.assertEqual(57, obj.mdev_information.iommu_group)
3162+
self.assertEqual("b2107403-110c-45b0-af87-32cc91597b8a",
3163+
obj.mdev_information.uuid)
31383164

31393165
def test_config_vdpa_device(self):
31403166
xmlin = """

nova/virt/libvirt/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3299,6 +3299,7 @@ def __init__(self, **kwargs):
32993299
root_name="capability", **kwargs)
33003300
self.type = None
33013301
self.iommu_group = None
3302+
self.uuid = None
33023303

33033304
def parse_dom(self, xmldoc):
33043305
super(LibvirtConfigNodeDeviceMdevInformation,
@@ -3308,6 +3309,8 @@ def parse_dom(self, xmldoc):
33083309
self.type = c.get('id')
33093310
if c.tag == "iommuGroup":
33103311
self.iommu_group = int(c.get('number'))
3312+
if c.tag == "uuid":
3313+
self.uuid = c.text
33113314

33123315

33133316
class LibvirtConfigNodeDeviceVpdCap(LibvirtConfigObject):

nova/virt/libvirt/driver.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8019,15 +8019,52 @@ def _get_mdev_capable_devices(self, types=None):
80198019

80208020
def _get_mediated_device_information(self, devname):
80218021
"""Returns a dict of a mediated device."""
8022-
virtdev = self._host.device_lookup_by_name(devname)
8022+
# LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI
8023+
# address of the parent device (e.g. mdev_<uuid>_<pci_address>) due to
8024+
# the mdevctl allowing for multiple mediated devs having the same UUID
8025+
# defined (only one can be active at a time). Since the guest
8026+
# information doesn't have the parent ID, try to lookup which
8027+
# mediated device is available that matches the UUID. If multiple
8028+
# devices are found that match the UUID, then this is an error
8029+
# condition.
8030+
try:
8031+
virtdev = self._host.device_lookup_by_name(devname)
8032+
except libvirt.libvirtError as ex:
8033+
if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE:
8034+
raise
8035+
mdevs = [dev for dev in self._host.list_mediated_devices()
8036+
if dev.startswith(devname)]
8037+
# If no matching devices are found, simply raise the original
8038+
# exception indicating that no devices are found.
8039+
if not mdevs:
8040+
raise
8041+
elif len(mdevs) > 1:
8042+
msg = ("The mediated device name %(devname)s refers to a UUID "
8043+
"that is present in multiple libvirt mediated devices. "
8044+
"Matching libvirt mediated devices are %(devices)s. "
8045+
"Mediated device UUIDs must be unique for Nova." %
8046+
{'devname': devname,
8047+
'devices': ', '.join(mdevs)})
8048+
raise exception.InvalidLibvirtMdevConfig(reason=msg)
8049+
8050+
LOG.debug('Found requested device %s as %s. Using that.',
8051+
devname, mdevs[0])
8052+
virtdev = self._host.device_lookup_by_name(mdevs[0])
80238053
xmlstr = virtdev.XMLDesc(0)
80248054
cfgdev = vconfig.LibvirtConfigNodeDevice()
80258055
cfgdev.parse_str(xmlstr)
8056+
# Starting with Libvirt 7.3, the uuid information is available in the
8057+
# node device information. If its there, use that. Otherwise,
8058+
# fall back to the previous behavior of parsing the uuid from the
8059+
# devname.
8060+
if cfgdev.mdev_information.uuid:
8061+
mdev_uuid = cfgdev.mdev_information.uuid
8062+
else:
8063+
mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name)
80268064

80278065
device = {
80288066
"dev_id": cfgdev.name,
8029-
# name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4
8030-
"uuid": libvirt_utils.mdev_name2uuid(cfgdev.name),
8067+
"uuid": mdev_uuid,
80318068
# the physical GPU PCI device
80328069
"parent": cfgdev.parent,
80338070
"type": cfgdev.mdev_information.type,

nova/virt/libvirt/host.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1503,7 +1503,7 @@ def list_mdev_capable_devices(self, flags=0):
15031503
def list_mediated_devices(self, flags=0):
15041504
"""Lookup mediated devices.
15051505
1506-
:returns: a list of virNodeDevice instance
1506+
:returns: a list of strings with the name of the instance
15071507
"""
15081508
return self._list_devices("mdev", flags=flags)
15091509

nova/virt/libvirt/utils.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -581,17 +581,31 @@ def get_default_machine_type(arch: str) -> ty.Optional[str]:
581581

582582

583583
def mdev_name2uuid(mdev_name: str) -> str:
584-
"""Convert an mdev name (of the form mdev_<uuid_with_underscores>) to a
585-
uuid (of the form 8-4-4-4-12).
584+
"""Convert an mdev name (of the form mdev_<uuid_with_underscores> or
585+
mdev_<uuid_with_underscores>_<pciaddress>) to a uuid
586+
(of the form 8-4-4-4-12).
587+
588+
:param mdev_name: the name of the mdev to parse the UUID from
589+
:returns: string containing the uuid
586590
"""
587-
return str(uuid.UUID(mdev_name[5:].replace('_', '-')))
591+
mdev_uuid = mdev_name[5:].replace('_', '-')
592+
# Unconditionnally remove the PCI address from the name
593+
mdev_uuid = mdev_uuid[:36]
594+
return str(uuid.UUID(mdev_uuid))
595+
588596

597+
def mdev_uuid2name(mdev_uuid: str, parent: str = None) -> str:
598+
"""Convert an mdev uuid (of the form 8-4-4-4-12) and optionally its parent
599+
device to a name (of the form mdev_<uuid_with_underscores>[_<pciid>]).
589600
590-
def mdev_uuid2name(mdev_uuid: str) -> str:
591-
"""Convert an mdev uuid (of the form 8-4-4-4-12) to a name (of the form
592-
mdev_<uuid_with_underscores>).
601+
:param mdev_uuid: the uuid of the mediated device
602+
:param parent: the parent device id for the mediated device
603+
:returns: name of the mdev to reference in libvirt
593604
"""
594-
return "mdev_" + mdev_uuid.replace('-', '_')
605+
name = "mdev_" + mdev_uuid.replace('-', '_')
606+
if parent and parent.startswith('pci_'):
607+
name = name + parent[4:]
608+
return name
595609

596610

597611
def get_flags_by_flavor_specs(flavor: 'objects.Flavor') -> ty.Set[str]:

0 commit comments

Comments
 (0)