Skip to content

Commit fd8fdc9

Browse files
committed
SR-IOV Live migration indirect port support
This patch, builds on previous patches and enables Live migration with SR-IOV indirect ports. Prior to this change migration would have either: - Failed with instance running on the source node. - Failed with two VMs booted on both source and destination nodes, VM state is set to migrating, duplicate MACs on source and destination node and improper PCI resource claiming. This scenario is observed in the case of macvtap port type and neutron does not support multiple port binding API extension. With very little, non user friendly information in the log. Conductor Changes: - Allow live migration only with VIF related PCI devices to allow properly claiming PCI resources on the destination node. With this change live migration with generic flavor based PCI passthrough devices will not be supported due to libvirt and qemu constraint. - Add a check to allow live migration with VIF related PCI allocation only when neutron supports multiple ports binding API extension and compute nodes are up to date. - update the migrating VIF with the correct profile when binding the ports on the destination host, this will allow proper binding against the destination host and ensure VIF will be plugged correctly by Nova. Compute Changes: - Create VIFMigrateData for all VIFs in check_can_live_migrate_destination() - For every VIF that contains a PCI device in its profile claim a PCI device on the destination node using the matching InstancePCIRequest of the instance being migrated. - Update the relevant VIFMigrateData profile with the newly claimed PCI device. - Free PCI devices on source and allocate on destination upon a successful migration or free claimed PCI devices on destination upon failure. NeutronV2 Changes: - Don't update binding profile with PCI devices if migration type is live-migration as the profile was already updated when an inactive port binding was created during bind_ports_to_host() call from conductor. Note: This builds on multiple ports binding API. Change-Id: I734cc01dce13f9e75a16639faf890ddb1661b7eb Partial-Implement: blueprint libvirt-neutron-sriov-livemigration
1 parent fc38906 commit fd8fdc9

File tree

12 files changed

+591
-54
lines changed

12 files changed

+591
-54
lines changed

nova/compute/manager.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import base64
2929
import binascii
3030
import contextlib
31+
import copy
3132
import functools
3233
import inspect
3334
import sys
@@ -85,6 +86,7 @@
8586
from nova.objects import fields
8687
from nova.objects import instance as obj_instance
8788
from nova.objects import migrate_data as migrate_data_obj
89+
from nova.pci import request as pci_req_module
8890
from nova.pci import whitelist
8991
from nova import rpc
9092
from nova import safe_utils
@@ -6221,6 +6223,15 @@ def check_can_live_migrate_destination(self, ctxt, instance,
62216223
migrate_data = self.compute_rpcapi.\
62226224
check_can_live_migrate_source(ctxt, instance,
62236225
dest_check_data)
6226+
# Create migrate_data vifs
6227+
migrate_data.vifs = \
6228+
migrate_data_obj.LiveMigrateData.create_skeleton_migrate_vifs(
6229+
instance.get_network_info())
6230+
# Claim PCI devices for VIFs on destination (if needed)
6231+
port_id_to_pci = self._claim_pci_for_instance_vifs(ctxt, instance)
6232+
# Update migrate VIFs with the newly claimed PCI devices
6233+
self._update_migrate_vifs_profile_with_pci(migrate_data.vifs,
6234+
port_id_to_pci)
62246235
finally:
62256236
self.driver.cleanup_live_migration_destination_check(ctxt,
62266237
dest_check_data)
@@ -6853,6 +6864,9 @@ def _post_live_migration(self, ctxt, instance, dest,
68536864
# method
68546865
destroy_vifs = True
68556866

6867+
# Free instance allocations on source before claims are allocated on
6868+
# destination node
6869+
self.rt.free_pci_device_allocations_for_instance(ctxt, instance)
68566870
# NOTE(danms): Save source node before calling post method on
68576871
# destination, which will update it
68586872
source_node = instance.node
@@ -6955,7 +6969,8 @@ def post_live_migration_at_destination(self, context, instance,
69556969
self.network_api.setup_networks_on_host(context, instance,
69566970
self.host)
69576971
migration = {'source_compute': instance.host,
6958-
'dest_compute': self.host, }
6972+
'dest_compute': self.host,
6973+
'migration_type': 'live-migration'}
69596974
self.network_api.migrate_instance_finish(context,
69606975
instance,
69616976
migration)
@@ -6970,6 +6985,8 @@ def post_live_migration_at_destination(self, context, instance,
69706985
phase=fields.NotificationPhase.START)
69716986
block_device_info = self._get_instance_block_device_info(context,
69726987
instance)
6988+
# Allocate the claimed PCI resources at destination.
6989+
self.rt.allocate_pci_devices_for_instance(context, instance)
69736990

69746991
try:
69756992
self.driver.post_live_migration_at_destination(
@@ -7187,6 +7204,10 @@ def rollback_live_migration_at_destination(self, context, instance,
71877204
# from remote volumes if necessary
71887205
block_device_info = self._get_instance_block_device_info(context,
71897206
instance)
7207+
# free any instance PCI claims done on destination during
7208+
# check_can_live_migrate_destination()
7209+
self.rt.free_pci_device_claims_for_instance(context, instance)
7210+
71907211
self.driver.rollback_live_migration_at_destination(
71917212
context, instance, network_info, block_device_info,
71927213
destroy_disks=destroy_disks, migrate_data=migrate_data)
@@ -8532,3 +8553,70 @@ def _cleanup_expired_console_auth_tokens(self, context):
85328553
"""
85338554
objects.ConsoleAuthToken.clean_expired_console_auths_for_host(
85348555
context, self.host)
8556+
8557+
def _claim_pci_for_instance_vifs(self, ctxt, instance):
8558+
"""Claim PCI devices for the instance's VIFs on the compute node
8559+
8560+
:param ctxt: Context
8561+
:param instance: Instance object
8562+
:return: <port ID: PciDevice> mapping for the VIFs that yielded a
8563+
PCI claim on the compute node
8564+
"""
8565+
pci_req_id_to_port_id = {}
8566+
pci_reqs = []
8567+
port_id_to_pci_dev = {}
8568+
8569+
for vif in instance.get_network_info():
8570+
pci_req = pci_req_module.get_instance_pci_request_from_vif(
8571+
ctxt,
8572+
instance,
8573+
vif)
8574+
if pci_req:
8575+
pci_req_id_to_port_id[pci_req.request_id] = vif['id']
8576+
pci_reqs.append(pci_req)
8577+
8578+
if pci_reqs:
8579+
# Create PCI requests and claim against PCI resource tracker
8580+
# NOTE(adrianc): We claim against the same requests as on the
8581+
# source node.
8582+
vif_pci_requests = objects.InstancePCIRequests(
8583+
requests=pci_reqs,
8584+
instance_uuid=instance.uuid)
8585+
8586+
claimed_pci_devices_objs = self.rt.claim_pci_devices(
8587+
ctxt,
8588+
vif_pci_requests)
8589+
8590+
# Update VIFMigrateData profile with the newly claimed PCI
8591+
# device
8592+
for pci_dev in claimed_pci_devices_objs:
8593+
LOG.debug("PCI device: %s Claimed on destination node",
8594+
pci_dev.address)
8595+
port_id = pci_req_id_to_port_id[pci_dev.request_id]
8596+
port_id_to_pci_dev[port_id] = pci_dev
8597+
8598+
return port_id_to_pci_dev
8599+
8600+
def _update_migrate_vifs_profile_with_pci(self,
8601+
migrate_vifs,
8602+
port_id_to_pci_dev):
8603+
"""Update migrate vifs profile with the claimed PCI devices
8604+
8605+
:param migrate_vifs: list of VIFMigrateData objects
8606+
:param port_id_to_pci_dev: a <port_id: PciDevice> mapping
8607+
:return: None.
8608+
"""
8609+
for mig_vif in migrate_vifs:
8610+
port_id = mig_vif.port_id
8611+
if port_id not in port_id_to_pci_dev:
8612+
continue
8613+
8614+
pci_dev = port_id_to_pci_dev[port_id]
8615+
profile = copy.deepcopy(mig_vif.source_vif['profile'])
8616+
profile['pci_slot'] = pci_dev.address
8617+
profile['pci_vendor_info'] = ':'.join([pci_dev.vendor_id,
8618+
pci_dev.product_id])
8619+
mig_vif.profile = profile
8620+
LOG.debug("Updating migrate VIF profile for port %(port_id)s:"
8621+
"%(profile)s", {'port_id': port_id,
8622+
'profile': profile})

nova/compute/resource_tracker.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,3 +1514,46 @@ def build_failed(self, nodename):
15141514
def build_succeeded(self, nodename):
15151515
"""Resets the failed_builds stats for the given node."""
15161516
self.stats[nodename].build_succeeded()
1517+
1518+
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
1519+
def claim_pci_devices(self, context, pci_requests):
1520+
"""Claim instance PCI resources
1521+
1522+
:param context: security context
1523+
:param pci_requests: a list of nova.objects.InstancePCIRequests
1524+
:returns: a list of nova.objects.PciDevice objects
1525+
"""
1526+
result = self.pci_tracker.claim_instance(
1527+
context, pci_requests, None)
1528+
self.pci_tracker.save(context)
1529+
return result
1530+
1531+
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
1532+
def allocate_pci_devices_for_instance(self, context, instance):
1533+
"""Allocate instance claimed PCI resources
1534+
1535+
:param context: security context
1536+
:param instance: instance object
1537+
"""
1538+
self.pci_tracker.allocate_instance(instance)
1539+
self.pci_tracker.save(context)
1540+
1541+
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
1542+
def free_pci_device_allocations_for_instance(self, context, instance):
1543+
"""Free instance allocated PCI resources
1544+
1545+
:param context: security context
1546+
:param instance: instance object
1547+
"""
1548+
self.pci_tracker.free_instance_allocations(context, instance)
1549+
self.pci_tracker.save(context)
1550+
1551+
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
1552+
def free_pci_device_claims_for_instance(self, context, instance):
1553+
"""Free instance claimed PCI resources
1554+
1555+
:param context: security context
1556+
:param instance: instance object
1557+
"""
1558+
self.pci_tracker.free_instance_claims(context, instance)
1559+
self.pci_tracker.save(context)

nova/conductor/tasks/live_migrate.py

Lines changed: 88 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from nova import network
2626
from nova import objects
2727
from nova.objects import fields as obj_fields
28+
from nova.objects import migrate_data as migrate_data_obj
2829
from nova.scheduler import utils as scheduler_utils
2930

3031
LOG = logging.getLogger(__name__)
@@ -44,6 +45,19 @@ def supports_extended_port_binding(context, host):
4445
return svc.version >= 35
4546

4647

48+
def supports_vif_related_pci_allocations(context, host):
49+
"""Checks if the compute host service is new enough to support
50+
VIF related PCI allocation during live migration
51+
52+
:param context: The user request context.
53+
:param host: The nova-compute host to check.
54+
:returns: True if the compute host is new enough to support vif related
55+
PCI allocations
56+
"""
57+
svc = objects.Service.get_by_host_and_binary(context, host, 'nova-compute')
58+
return svc.version >= 36
59+
60+
4761
class LiveMigrationTask(base.TaskBase):
4862
def __init__(self, context, instance, destination,
4963
block_migration, disk_over_commit, migration, compute_rpcapi,
@@ -186,6 +200,47 @@ def _check_instance_has_no_numa(self):
186200
else:
187201
raise exception.MigrationPreCheckError(reason=msg)
188202

203+
def _check_can_migrate_pci(self, src_host, dest_host):
204+
"""Checks that an instance can migrate with PCI requests.
205+
206+
At the moment support only if:
207+
208+
1. Instance contains VIF related PCI requests.
209+
2. Neutron supports multiple port binding extension.
210+
3. Src and Dest host support VIF related PCI allocations.
211+
"""
212+
if self.instance.pci_requests is None or not len(
213+
self.instance.pci_requests.requests):
214+
return
215+
216+
for pci_request in self.instance.pci_requests.requests:
217+
if pci_request.alias_name is not None:
218+
# allow only VIF related PCI requests in live migration.
219+
# PCI requests come from two sources: instance flavor and
220+
# SR-IOV ports.
221+
# SR-IOV ports pci_request don't have an alias_name.
222+
# TODO(adrianc): add an is_sriov_port property to PCIRequest
223+
# to make this cryptic check clearer (also in resource_tracker)
224+
225+
raise exception.MigrationPreCheckError(
226+
reason= "non-VIF related PCI requests for instance "
227+
"are not allowed for live migration.")
228+
# All PCI requests are VIF related, now check neutron,
229+
# source and destination compute nodes.
230+
if not self.network_api.supports_port_binding_extension(
231+
self.context):
232+
raise exception.MigrationPreCheckError(
233+
reason="Cannot live migrate VIF with related PCI, Neutron "
234+
"does not support required port binding extension.")
235+
if not (supports_vif_related_pci_allocations(self.context,
236+
src_host) and
237+
supports_vif_related_pci_allocations(self.context,
238+
dest_host)):
239+
raise exception.MigrationPreCheckError(
240+
reason="Cannot live migrate VIF with related PCI, "
241+
"source and destination nodes do not support "
242+
"the operation.")
243+
189244
def _check_host_is_up(self, host):
190245
service = objects.Service.get_by_compute_host(self.context, host)
191246

@@ -265,6 +320,7 @@ def _check_compatible_with_source_hypervisor(self, destination):
265320
return source_info, destination_info
266321

267322
def _call_livem_checks_on_host(self, destination):
323+
self._check_can_migrate_pci(self.source, destination)
268324
try:
269325
self.migrate_data = self.compute_rpcapi.\
270326
check_can_live_migrate_destination(self.context, self.instance,
@@ -280,8 +336,17 @@ def _call_livem_checks_on_host(self, destination):
280336
if (self.network_api.supports_port_binding_extension(self.context) and
281337
supports_extended_port_binding(self.context, self.source) and
282338
supports_extended_port_binding(self.context, destination)):
283-
self.migrate_data.vifs = (
284-
self._bind_ports_on_destination(destination))
339+
if 'vifs' not in self.migrate_data:
340+
# migrate data vifs were not constructed in dest compute
341+
# during check_can_live_migrate_destination, construct a
342+
# skeleton to be updated after port binding.
343+
# TODO(adrianc): This can be removed once we move to T release
344+
self.migrate_data.vifs = migrate_data_obj.LiveMigrateData.\
345+
create_skeleton_migrate_vifs(
346+
self.instance.get_network_info())
347+
bindings = self._bind_ports_on_destination(destination)
348+
self._update_migrate_vifs_from_bindings(self.migrate_data.vifs,
349+
bindings)
285350

286351
def _bind_ports_on_destination(self, destination):
287352
LOG.debug('Start binding ports on destination host: %s', destination,
@@ -291,23 +356,33 @@ def _bind_ports_on_destination(self, destination):
291356
# that was bound. This information is then stuffed into the
292357
# migrate_data.
293358
try:
359+
# Note(adrianc): migrate_data.vifs was partially filled
360+
# by destination compute if compute is new enough.
361+
# if that is the case, it may have updated the required port
362+
# profile for the destination node (e.g new PCI address if SR-IOV)
363+
# perform port binding against the requested profile
364+
migrate_vifs_with_profile = [mig_vif for mig_vif in
365+
self.migrate_data.vifs
366+
if 'profile_json' in mig_vif]
367+
368+
ports_profile = None
369+
if migrate_vifs_with_profile:
370+
# Update to the port profile is required
371+
ports_profile = {mig_vif.port_id: mig_vif.profile
372+
for mig_vif in migrate_vifs_with_profile}
373+
294374
bindings = self.network_api.bind_ports_to_host(
295-
self.context, self.instance, destination)
375+
self.context, self.instance, destination, None, ports_profile)
296376
except exception.PortBindingFailed as e:
297377
# Port binding failed for that host, try another one.
298378
raise exception.MigrationPreCheckError(
299379
reason=e.format_message())
380+
return bindings
300381

301-
source_vif_map = {
302-
vif['id']: vif for vif in self.instance.get_network_info()
303-
}
304-
migrate_vifs = []
305-
for port_id, binding in bindings.items():
306-
migrate_vif = objects.VIFMigrateData(
307-
port_id=port_id, **binding)
308-
migrate_vif.source_vif = source_vif_map[port_id]
309-
migrate_vifs.append(migrate_vif)
310-
return migrate_vifs
382+
def _update_migrate_vifs_from_bindings(self, migrate_vifs, bindings):
383+
for migrate_vif in migrate_vifs:
384+
for attr_name, attr_val in bindings[migrate_vif.port_id].items():
385+
setattr(migrate_vif, attr_name, attr_val)
311386

312387
def _get_source_cell_mapping(self):
313388
"""Returns the CellMapping for the cell in which the instance lives

nova/network/neutronv2/api.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3335,7 +3335,11 @@ def _update_port_binding_for_instance(self, context, instance, host,
33353335
# as in an unshelve operation.
33363336
vnic_type = p.get('binding:vnic_type')
33373337
if (vnic_type in network_model.VNIC_TYPES_SRIOV
3338-
and migration is not None):
3338+
and migration is not None
3339+
and migration['migration_type'] !=
3340+
constants.LIVE_MIGRATION):
3341+
# Note(adrianc): for live migration binding profile was already
3342+
# updated in conductor when calling bind_ports_to_host()
33393343
if not pci_mapping:
33403344
pci_mapping = self._get_pci_mapping_for_migration(context,
33413345
instance, migration)

nova/network/neutronv2/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@
2020
MULTI_NET_EXT = 'Multi Provider Network'
2121
SUBSTR_PORT_FILTERING = 'IP address substring filtering'
2222
PORT_BINDING_EXTENDED = 'Port Bindings Extended'
23+
LIVE_MIGRATION = 'live-migration'

nova/objects/migrate_data.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,23 @@ class LiveMigrateData(obj_base.NovaObject):
120120
'vifs': fields.ListOfObjectsField('VIFMigrateData'),
121121
}
122122

123+
@staticmethod
124+
def create_skeleton_migrate_vifs(vifs):
125+
"""Create migrate vifs for live migration.
126+
127+
:param vifs: a list of VIFs.
128+
:return: list of VIFMigrateData object corresponding to the provided
129+
VIFs.
130+
"""
131+
vif_mig_data = []
132+
133+
for vif in vifs:
134+
mig_vif = VIFMigrateData(
135+
port_id=vif['id'],
136+
source_vif=vif)
137+
vif_mig_data.append(mig_vif)
138+
return vif_mig_data
139+
123140

124141
@obj_base.NovaObjectRegistry.register
125142
class LibvirtLiveMigrateBDMInfo(obj_base.NovaObject):

0 commit comments

Comments
 (0)