Skip to content

Commit 3885f98

Browse files
SeanMooneyauniyal61
authored andcommitted
[compute] always set instance.host in post_livemigration
This change add a new _post_live_migration_update_host function that wraps _post_live_migration and just ensures that if we exit due to an exception instance.host is set to the destination host. when we are in _post_live_migration the guest has already started running on the destination host and we cannot revert. Sometimes admins or users will hard reboot the instance expecting that to fix everything when the vm enters the error state after the failed migrations. Previously this would end up recreating the instance on the source node leading to possible data corruption if the instance used shared storage. Change-Id: Ibc4bc7edf1c8d1e841c72c9188a0a62836e9f153 Partial-Bug: #1628606 (cherry picked from commit 8449b7c) (cherry picked from commit 643b0c7) (cherry picked from commit 17ae907) (cherry picked from commit 15502dd) (cherry picked from commit 43c0e40) (cherry picked from commit 0ac64bb)
1 parent 5e955b6 commit 3885f98

File tree

3 files changed

+63
-6
lines changed

3 files changed

+63
-6
lines changed

nova/compute/manager.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8229,8 +8229,9 @@ def _do_live_migration(self, context, dest, instance, block_migration,
82298229
# host attachment. We fetch BDMs before that to retain connection_info
82308230
# and attachment_id relating to the source host for post migration
82318231
# cleanup.
8232-
post_live_migration = functools.partial(self._post_live_migration,
8233-
source_bdms=source_bdms)
8232+
post_live_migration = functools.partial(
8233+
self._post_live_migration_update_host, source_bdms=source_bdms
8234+
)
82348235
rollback_live_migration = functools.partial(
82358236
self._rollback_live_migration, source_bdms=source_bdms)
82368237

@@ -8479,6 +8480,42 @@ def _post_live_migration_remove_source_vol_connections(
84798480
bdm.attachment_id, self.host,
84808481
six.text_type(e), instance=instance)
84818482

8483+
# TODO(sean-k-mooney): add typing
8484+
def _post_live_migration_update_host(
8485+
self, ctxt, instance, dest, block_migration=False,
8486+
migrate_data=None, source_bdms=None
8487+
):
8488+
try:
8489+
self._post_live_migration(
8490+
ctxt, instance, dest, block_migration, migrate_data,
8491+
source_bdms)
8492+
except Exception:
8493+
# Restore the instance object
8494+
node_name = None
8495+
try:
8496+
# get node name of compute, where instance will be
8497+
# running after migration, that is destination host
8498+
compute_node = self._get_compute_info(ctxt, dest)
8499+
node_name = compute_node.hypervisor_hostname
8500+
except exception.ComputeHostNotFound:
8501+
LOG.exception('Failed to get compute_info for %s', dest)
8502+
8503+
# we can never rollback from post live migration and we can only
8504+
# get here if the instance is running on the dest so we ensure
8505+
# the instance.host is set correctly and reraise the original
8506+
# exception unmodified.
8507+
if instance.host != dest:
8508+
# apply saves the new fields while drop actually removes the
8509+
# migration context from the instance, so migration persists.
8510+
instance.apply_migration_context()
8511+
instance.drop_migration_context()
8512+
instance.host = dest
8513+
instance.task_state = None
8514+
instance.node = node_name
8515+
instance.progress = 0
8516+
instance.save()
8517+
raise
8518+
84828519
@wrap_exception()
84838520
@wrap_instance_fault
84848521
def _post_live_migration(self, ctxt, instance, dest,
@@ -8490,7 +8527,7 @@ def _post_live_migration(self, ctxt, instance, dest,
84908527
and mainly updating database record.
84918528

84928529
:param ctxt: security context
8493-
:param instance: instance dict
8530+
:param instance: instance object
84948531
:param dest: destination host
84958532
:param block_migration: if true, prepare for block migration
84968533
:param migrate_data: if not None, it is a dict which has data

nova/tests/functional/regressions/test_bug_1628606.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,5 @@ def test_post_live_migration(self, mock_migration):
5757
server = self._live_migrate(
5858
server, migration_expected_state='error',
5959
server_expected_state='ERROR')
60-
# FIXME(amit): this should point to the dest as after migration
61-
# but does not because of bug 1628606
62-
self.assertEqual(self.src.host, server['OS-EXT-SRV-ATTR:host'])
60+
61+
self.assertEqual(self.dest.host, server['OS-EXT-SRV-ATTR:host'])

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9741,6 +9741,27 @@ def test_post_live_migration_new_allocations(self):
97419741
self.instance,
97429742
migration)
97439743

9744+
def test_post_live_migration_update_host(self):
9745+
@mock.patch.object(self.compute, '_get_compute_info')
9746+
def _test_post_live_migration(_get_compute_info):
9747+
dest_host = 'dest'
9748+
cn = objects.ComputeNode(hypervisor_hostname=dest_host)
9749+
_get_compute_info.return_value = cn
9750+
instance = fake_instance.fake_instance_obj(self.context,
9751+
node='src',
9752+
uuid=uuids.instance)
9753+
with mock.patch.object(self.compute, "_post_live_migration"
9754+
) as plm, mock.patch.object(instance, "save") as save:
9755+
error = ValueError("some failure")
9756+
plm.side_effect = error
9757+
self.assertRaises(
9758+
ValueError, self.compute._post_live_migration_update_host,
9759+
self.context, instance, dest_host)
9760+
save.assert_called_once()
9761+
self.assertEqual(instance.host, dest_host)
9762+
9763+
_test_post_live_migration()
9764+
97449765
def test_post_live_migration_cinder_pre_344_api(self):
97459766
# Because live migration has
97469767
# succeeded,_post_live_migration_remove_source_vol_connections()

0 commit comments

Comments
 (0)