Skip to content

Commit 29b94aa

Browse files
committed
Fix pre_live_migration rollback
During the pre live migration process, Nova performs most of the tasks related to the creation and operation of the VM in the destination host. That is done without interrupting any of the hardware in the source host. If the pre_live_migration fails, those same operations should be rolled back. Currently nova is sharing the _rollback_live_migration for both live and pre_live migration rollbacks, and that is causing the source host to try to re-attach network interfaces on the source host where they weren't actually de-attached. This patch fixes that by adding a conditional to allow nova to do different paths for migration and pre_live_migration rollbacks. Closes-bug: #1944619 Change-Id: I784190ac356695dd508e0ad8ec31d8eaa3ebee56 (cherry picked from commit 63ffba7)
1 parent 3402aa7 commit 29b94aa

File tree

4 files changed

+27
-13
lines changed

4 files changed

+27
-13
lines changed

nova/compute/manager.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8413,7 +8413,8 @@ def _cleanup_pre_live_migration(self, context, dest, instance,
84138413
migrate_data.migration = migration
84148414
self._rollback_live_migration(context, instance, dest,
84158415
migrate_data=migrate_data,
8416-
source_bdms=source_bdms)
8416+
source_bdms=source_bdms,
8417+
pre_live_migration=True)
84178418

84188419
def _do_pre_live_migration_from_source(self, context, dest, instance,
84198420
block_migration, migration,
@@ -9167,7 +9168,8 @@ def _rollback_volume_bdms(self, context, bdms, original_bdms, instance):
91679168
def _rollback_live_migration(self, context, instance,
91689169
dest, migrate_data=None,
91699170
migration_status='failed',
9170-
source_bdms=None):
9171+
source_bdms=None,
9172+
pre_live_migration=False):
91719173
"""Recovers Instance/volume state from migrating -> running.
91729174

91739175
:param context: security context
@@ -9217,8 +9219,14 @@ def _rollback_live_migration(self, context, instance,
92179219
# for nova-network)
92189220
# NOTE(mriedem): This is a no-op for neutron.
92199221
self.network_api.setup_networks_on_host(context, instance, self.host)
9220-
self.driver.rollback_live_migration_at_source(context, instance,
9221-
migrate_data)
9222+
9223+
# NOTE(erlon): We should make sure that rollback_live_migration_at_src
9224+
# is not called in the pre_live_migration rollback as that will trigger
9225+
# the src host to re-attach interfaces which were not detached
9226+
# previously.
9227+
if not pre_live_migration:
9228+
self.driver.rollback_live_migration_at_source(context, instance,
9229+
migrate_data)
92229230

92239231
# NOTE(lyarwood): Fetch the current list of BDMs, disconnect any
92249232
# connected volumes from the dest and delete any volume attachments

nova/tests/functional/regressions/test_bug_1944619.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,5 @@ def test_rollback_pre_live_migration(self):
7272
self._live_migrate(self.server,
7373
migration_expected_state='failed',
7474
server_expected_state='MIGRATING')
75-
# FIXME(erlon): In the current behavior,
76-
# rollback_live_migration_at_source is called if an error happens
77-
# during the pre_live_migration phase on the destination and therefore
78-
# triggers the observed bug. rollback_live_migration_at_source should
79-
# *not* be called for when errors happen during pre_live_migration
80-
# phase.
81-
mlpr.assert_called_once()
75+
mlpr.assert_not_called()
8276
mlpp.assert_called_once()

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9539,7 +9539,8 @@ def test_live_migration_wait_vif_plugged_vif_plug_error(
95399539
self.assertEqual('error', self.migration.status)
95409540
mock_rollback_live_mig.assert_called_once_with(
95419541
self.context, self.instance, 'dest-host',
9542-
migrate_data=migrate_data, source_bdms=source_bdms)
9542+
migrate_data=migrate_data, source_bdms=source_bdms,
9543+
pre_live_migration=True)
95439544

95449545
@mock.patch('nova.compute.rpcapi.ComputeAPI.pre_live_migration')
95459546
@mock.patch('nova.compute.manager.ComputeManager._rollback_live_migration')
@@ -9574,7 +9575,8 @@ def test_live_migration_wait_vif_plugged_timeout_error(
95749575
self.assertEqual('error', self.migration.status)
95759576
mock_rollback_live_mig.assert_called_once_with(
95769577
self.context, self.instance, 'dest-host',
9577-
migrate_data=migrate_data, source_bdms=source_bdms)
9578+
migrate_data=migrate_data, source_bdms=source_bdms,
9579+
pre_live_migration=True)
95789580

95799581
@mock.patch('nova.compute.rpcapi.ComputeAPI.pre_live_migration')
95809582
@mock.patch('nova.compute.manager.ComputeManager._rollback_live_migration')
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
fixes:
3+
- |
4+
Instances with hardware offloaded ovs ports no longer lose connectivity
5+
after failed live migrations. The driver.rollback_live_migration_at_source
6+
function is no longer called during during pre_live_migration rollback
7+
which previously resulted in connectivity loss following a failed live
8+
migration. See `Bug 1944619`_ for more details.
9+
10+
.. _Bug 1944619: https://bugs.launchpad.net/nova/+bug/1944619

0 commit comments

Comments
 (0)