Skip to content

Commit 750aef5

Browse files
committed
Add finish_revert_snapshot_based_resize_at_source compute method
This adds the finish_revert_snapshot_based_resize_at_source() method to the compute service. This completes the cross-cell resize revert on the source host by updating port bindings, re-connecting volumes and spawning the guest and waiting for the network-vif-plugged event from neutron. If the resize started with a stopped instance, the revert ensures the guest is powered off when it is spawned. The instance record is updated with the old_flavor information, allocations are reverted in placement, and the migration status in the source cell database is updated to "reverted". Notifications are not sent from this method like in the finish_revert_resize() method because those will be sent from conductor. Note that this implementation tries to be a bit more graceful about error handling than the old same-cell finish_revert_resize method since there are a lot of moving parts and we want to try to clean up as much as possible. Part of blueprint cross-cell-resize Change-Id: I7e7afddbd9e0f57dfb1175a0bb2b54f2ed5500f2
1 parent 26da441 commit 750aef5

File tree

6 files changed

+461
-10
lines changed

6 files changed

+461
-10
lines changed

nova/compute/manager.py

Lines changed: 171 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ def update_compute_provider_status(self, context, rp_uuid, enabled):
560560
class ComputeManager(manager.Manager):
561561
"""Manages the running instances from creation to destruction."""
562562

563-
target = messaging.Target(version='5.9')
563+
target = messaging.Target(version='5.10')
564564

565565
def __init__(self, compute_driver=None, *args, **kwargs):
566566
"""Load configuration options and connect to the hypervisor."""
@@ -4686,6 +4686,153 @@ def _revert_snapshot_based_resize_at_dest(
46864686
self.rt.drop_move_claim(ctxt, instance, instance.node,
46874687
instance_type=instance.new_flavor)
46884688

4689+
@wrap_exception()
4690+
@reverts_task_state
4691+
@wrap_instance_event(prefix='compute')
4692+
@errors_out_migration
4693+
@wrap_instance_fault
4694+
def finish_revert_snapshot_based_resize_at_source(
4695+
self, ctxt, instance, migration):
4696+
"""Reverts a snapshot-based resize at the source host.
4697+
4698+
Spawn the guest and re-connect volumes/VIFs on the source host and
4699+
revert the instance to use the old_flavor for resource usage reporting.
4700+
4701+
Updates allocations in the placement service to move the source node
4702+
allocations, held by the migration record, to the instance and drop
4703+
the allocations held by the instance on the destination node.
4704+
4705+
:param ctxt: nova auth request context targeted at the target cell
4706+
:param instance: Instance object whose vm_state is "resized" and
4707+
task_state is "resize_reverting".
4708+
:param migration: Migration object whose status is "reverting".
4709+
"""
4710+
4711+
@utils.synchronized(instance.uuid)
4712+
def do_revert():
4713+
LOG.info('Reverting resize on source host.', instance=instance)
4714+
with self._error_out_instance_on_exception(ctxt, instance):
4715+
self._finish_revert_snapshot_based_resize_at_source(
4716+
ctxt, instance, migration)
4717+
do_revert()
4718+
4719+
# Broadcast to all schedulers that the instance is on this host.
4720+
# This is best effort so if anything fails just log it.
4721+
try:
4722+
self._update_scheduler_instance_info(ctxt, instance)
4723+
except Exception as e:
4724+
LOG.warning('finish_revert_snapshot_based_resize_at_source failed '
4725+
'during post-processing. Error: %s', e,
4726+
instance=instance)
4727+
4728+
def _finish_revert_snapshot_based_resize_at_source(
4729+
self, ctxt, instance, migration):
4730+
"""Private version of finish_revert_snapshot_based_resize_at_source.
4731+
4732+
This allows the main method to be decorated with error handlers.
4733+
4734+
:param ctxt: nova auth request context targeted at the source cell
4735+
:param instance: Instance object whose vm_state is "resized" and
4736+
task_state is "resize_reverting".
4737+
:param migration: Migration object whose status is "reverting".
4738+
"""
4739+
# Delete stashed old_vm_state information. We will use this to
4740+
# determine if the guest should be powered on when we spawn it.
4741+
old_vm_state = instance.system_metadata.pop(
4742+
'old_vm_state', vm_states.ACTIVE)
4743+
4744+
# Update instance host/node and flavor-related fields. After this
4745+
# if anything fails the instance will get rebuilt/rebooted on this
4746+
# host.
4747+
self._finish_revert_resize_update_instance_flavor_host_node(
4748+
instance, migration)
4749+
4750+
# Move the allocations against the source compute node resource
4751+
# provider, held by the migration, to the instance which will drop
4752+
# the destination compute node resource provider allocations held by
4753+
# the instance. This puts the allocations against the source node
4754+
# back to the old_flavor and owned by the instance.
4755+
try:
4756+
self._revert_allocation(ctxt, instance, migration)
4757+
except exception.AllocationMoveFailed:
4758+
# Log the error but do not re-raise because we want to continue to
4759+
# process ports and volumes below.
4760+
LOG.error('Reverting allocation in placement for migration '
4761+
'%(migration_uuid)s failed. You may need to manually '
4762+
'remove the allocations for the migration consumer '
4763+
'against the source node resource provider '
4764+
'%(source_provider)s and the allocations for the '
4765+
'instance consumer against the destination node '
4766+
'resource provider %(dest_provider)s and then run the '
4767+
'"nova-manage placement heal_allocations" command.',
4768+
{'instance_uuid': instance.uuid,
4769+
'migration_uuid': migration.uuid,
4770+
'source_provider': migration.source_node,
4771+
'dest_provider': migration.dest_node},
4772+
instance=instance)
4773+
4774+
bdms = instance.get_bdms()
4775+
# prep_snapshot_based_resize_at_source created empty volume attachments
4776+
# that we need to update here to get the connection_info before calling
4777+
# driver.finish_revert_migration which will connect the volumes to this
4778+
# host.
4779+
LOG.debug('Updating volume attachments for target host %s.',
4780+
self.host, instance=instance)
4781+
# TODO(mriedem): We should probably make _update_volume_attachments
4782+
# (optionally) graceful to errors so we (1) try to process all
4783+
# attachments and (2) continue to process networking below.
4784+
self._update_volume_attachments(ctxt, instance, bdms)
4785+
4786+
LOG.debug('Updating port bindings for source host %s.',
4787+
self.host, instance=instance)
4788+
# TODO(mriedem): Calculate provider mappings when we support
4789+
# cross-cell resize/migrate with ports having resource requests.
4790+
self._finish_revert_resize_network_migrate_finish(
4791+
ctxt, instance, migration, provider_mappings=None)
4792+
network_info = self.network_api.get_instance_nw_info(ctxt, instance)
4793+
4794+
# Remember that prep_snapshot_based_resize_at_source destroyed the
4795+
# guest but left the disks intact so we cannot call spawn() here but
4796+
# finish_revert_migration should do the job.
4797+
block_device_info = self._get_instance_block_device_info(
4798+
ctxt, instance, bdms=bdms)
4799+
power_on = old_vm_state == vm_states.ACTIVE
4800+
driver_error = None
4801+
try:
4802+
self.driver.finish_revert_migration(
4803+
ctxt, instance, network_info, migration,
4804+
block_device_info=block_device_info, power_on=power_on)
4805+
except Exception as e:
4806+
driver_error = e
4807+
# Leave a hint about hard rebooting the guest and reraise so the
4808+
# instance is put into ERROR state.
4809+
with excutils.save_and_reraise_exception(logger=LOG):
4810+
LOG.error('An error occurred during finish_revert_migration. '
4811+
'The instance may need to be hard rebooted. Error: '
4812+
'%s', driver_error, instance=instance)
4813+
else:
4814+
# Perform final cleanup of the instance in the database.
4815+
instance.drop_migration_context()
4816+
# If the original vm_state was STOPPED, set it back to STOPPED.
4817+
vm_state = vm_states.ACTIVE if power_on else vm_states.STOPPED
4818+
self._update_instance_after_spawn(
4819+
ctxt, instance, vm_state=vm_state)
4820+
instance.save(expected_task_state=[task_states.RESIZE_REVERTING])
4821+
finally:
4822+
# Complete any volume attachments so the volumes are in-use. We
4823+
# do this regardless of finish_revert_migration failing because
4824+
# the instance is back on this host now and we do not want to leave
4825+
# the volumes in a pending state in case the instance is hard
4826+
# rebooted.
4827+
LOG.debug('Completing volume attachments for instance on source '
4828+
'host.', instance=instance)
4829+
with excutils.save_and_reraise_exception(
4830+
reraise=driver_error is not None, logger=LOG):
4831+
self._complete_volume_attachments(ctxt, bdms)
4832+
4833+
migration.status = 'reverted'
4834+
migration.save()
4835+
46894836
@wrap_exception()
46904837
@reverts_task_state
46914838
@wrap_instance_event(prefix='compute')
@@ -4789,6 +4936,27 @@ def _finish_revert_resize_network_migrate_finish(
47894936
LOG.error('Timeout waiting for Neutron events: %s', events,
47904937
instance=instance)
47914938

4939+
def _finish_revert_resize_update_instance_flavor_host_node(self, instance,
4940+
migration):
4941+
"""Updates host/node and flavor-related fields on the instance.
4942+
4943+
This is used when finish the revert resize operation on the source
4944+
host and updates the instance flavor-related fields back to the old
4945+
flavor and then nulls out the old/new_flavor fields.
4946+
4947+
The instance host/node fields are also set back to the source compute
4948+
host/node.
4949+
4950+
:param instance: Instance object
4951+
:param migration: Migration object
4952+
"""
4953+
self._set_instance_info(instance, instance.old_flavor)
4954+
instance.old_flavor = None
4955+
instance.new_flavor = None
4956+
instance.host = migration.source_compute
4957+
instance.node = migration.source_node
4958+
instance.save(expected_task_state=[task_states.RESIZE_REVERTING])
4959+
47924960
@wrap_exception()
47934961
@reverts_task_state
47944962
@wrap_instance_event(prefix='compute')
@@ -4817,12 +4985,8 @@ def finish_revert_resize(
48174985
old_vm_state = instance.system_metadata.pop('old_vm_state',
48184986
vm_states.ACTIVE)
48194987

4820-
self._set_instance_info(instance, instance.old_flavor)
4821-
instance.old_flavor = None
4822-
instance.new_flavor = None
4823-
instance.host = migration.source_compute
4824-
instance.node = migration.source_node
4825-
instance.save()
4988+
self._finish_revert_resize_update_instance_flavor_host_node(
4989+
instance, migration)
48264990

48274991
try:
48284992
source_allocations = self._revert_allocation(

nova/compute/rpcapi.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ class ComputeAPI(object):
375375
* 5.7 - Add finish_snapshot_based_resize_at_dest()
376376
* 5.8 - Add confirm_snapshot_based_resize_at_source()
377377
* 5.9 - Add revert_snapshot_based_resize_at_dest()
378+
* 5.10 - Add finish_revert_snapshot_based_resize_at_source()
378379
'''
379380

380381
VERSION_ALIASES = {
@@ -732,6 +733,41 @@ def finish_snapshot_based_resize_at_dest(
732733
instance=instance, migration=migration, snapshot_id=snapshot_id,
733734
request_spec=request_spec)
734735

736+
def finish_revert_snapshot_based_resize_at_source(
737+
self, ctxt, instance, migration):
738+
"""Reverts a snapshot-based resize at the source host.
739+
740+
Spawn the guest and re-connect volumes/VIFs on the source host and
741+
revert the instance to use the old_flavor for resource usage reporting.
742+
743+
Updates allocations in the placement service to move the source node
744+
allocations, held by the migration record, to the instance and drop
745+
the allocations held by the instance on the destination node.
746+
747+
This is a synchronous RPC call using the ``long_rpc_timeout``
748+
configuration option.
749+
750+
:param ctxt: nova auth request context targeted at the source cell
751+
:param instance: Instance object whose vm_state is "resized" and
752+
task_state is "resize_reverting".
753+
:param migration: Migration object whose status is "reverting".
754+
:raises: nova.exception.MigrationError if the source compute is too
755+
old to perform the operation
756+
:raises: oslo_messaging.exceptions.MessagingTimeout if the RPC call
757+
times out
758+
"""
759+
version = '5.10'
760+
client = self.router.client(ctxt)
761+
if not client.can_send_version(version):
762+
raise exception.MigrationError(reason=_('Compute too old'))
763+
cctxt = client.prepare(server=migration.source_compute,
764+
version=version,
765+
call_monitor_timeout=CONF.rpc_response_timeout,
766+
timeout=CONF.long_rpc_timeout)
767+
return cctxt.call(
768+
ctxt, 'finish_revert_snapshot_based_resize_at_source',
769+
instance=instance, migration=migration)
770+
735771
def get_console_output(self, ctxt, instance, tail_length):
736772
version = '5.0'
737773
cctxt = self.router.client(ctxt).prepare(

nova/objects/service.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232

3333
# NOTE(danms): This is the global service version counter
34-
SERVICE_VERSION = 46
34+
SERVICE_VERSION = 47
3535

3636

3737
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@@ -173,6 +173,9 @@
173173
{'compute_rpc': '5.8'},
174174
# Version 46: Compute RPC v5.9: revert_snapshot_based_resize_at_dest
175175
{'compute_rpc': '5.9'},
176+
# Version 47: Compute RPC v5.10:
177+
# finish_revert_snapshot_based_resize_at_source
178+
{'compute_rpc': '5.10'},
176179
)
177180

178181

0 commit comments

Comments
 (0)