|
| 1 | +# Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 2 | +# not use this file except in compliance with the License. You may obtain |
| 3 | +# a copy of the License at |
| 4 | +# |
| 5 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 6 | +# |
| 7 | +# Unless required by applicable law or agreed to in writing, software |
| 8 | +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 9 | +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 10 | +# License for the specific language governing permissions and limitations |
| 11 | +# under the License. |
| 12 | + |
| 13 | +import mock |
| 14 | +import time |
| 15 | + |
| 16 | +from nova import context as nova_context |
| 17 | +from nova import objects |
| 18 | +from nova.tests.functional import integrated_helpers |
| 19 | +from nova.tests.unit.image import fake as fake_image |
| 20 | + |
| 21 | + |
| 22 | +class ComputeManagerInitHostTestCase( |
| 23 | + integrated_helpers.ProviderUsageBaseTestCase): |
| 24 | + """Tests various actions performed when the nova-compute service starts.""" |
| 25 | + |
| 26 | + compute_driver = 'fake.MediumFakeDriver' |
| 27 | + |
| 28 | + def test_migrate_disk_and_power_off_crash_finish_revert_migration(self): |
| 29 | + """Tests the scenario that the compute service crashes while the |
| 30 | + driver's migrate_disk_and_power_off method is running (we could be |
| 31 | + slow transferring disks or something when it crashed) and on restart |
| 32 | + of the compute service the driver's finish_revert_migration method |
| 33 | + is called to cleanup the source host and reset the instnace task_state. |
| 34 | + """ |
| 35 | + # Start two compute service so we migrate across hosts. |
| 36 | + for x in range(2): |
| 37 | + self._start_compute('host%d' % x) |
| 38 | + # Create a server, it does not matter on which host it lands. |
| 39 | + name = 'test_migrate_disk_and_power_off_crash_finish_revert_migration' |
| 40 | + server = self._build_minimal_create_server_request( |
| 41 | + self.api, name, image_uuid=fake_image.get_valid_image_id(), |
| 42 | + networks='auto') |
| 43 | + server = self.api.post_server({'server': server}) |
| 44 | + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') |
| 45 | + # Save the source hostname for assertions later. |
| 46 | + source_host = server['OS-EXT-SRV-ATTR:host'] |
| 47 | + |
| 48 | + def fake_migrate_disk_and_power_off(*args, **kwargs): |
| 49 | + # Simulate the source compute service crashing by restarting it. |
| 50 | + self.restart_compute_service(self.computes[source_host]) |
| 51 | + # We have to keep the method from returning before asserting the |
| 52 | + # _init_instance restart behavior otherwise resize_instance will |
| 53 | + # fail and set the instance to ERROR status, revert allocations, |
| 54 | + # etc which is not realistic if the service actually crashed while |
| 55 | + # migrate_disk_and_power_off was running. |
| 56 | + time.sleep(30) |
| 57 | + |
| 58 | + source_driver = self.computes[source_host].manager.driver |
| 59 | + with mock.patch.object(source_driver, 'migrate_disk_and_power_off', |
| 60 | + side_effect=fake_migrate_disk_and_power_off): |
| 61 | + # Initiate a cold migration from the source host. |
| 62 | + self.admin_api.post_server_action(server['id'], {'migrate': None}) |
| 63 | + # Now wait for the task_state to be reset to None during |
| 64 | + # _init_instance. |
| 65 | + server = self._wait_for_server_parameter( |
| 66 | + self.admin_api, server, { |
| 67 | + 'status': 'ACTIVE', |
| 68 | + 'OS-EXT-STS:task_state': None, |
| 69 | + 'OS-EXT-SRV-ATTR:host': source_host |
| 70 | + } |
| 71 | + ) |
| 72 | + |
| 73 | + # Assert we went through the _init_instance processing we expect. |
| 74 | + log_out = self.stdlog.logger.output |
| 75 | + self.assertIn('Instance found in migrating state during startup. ' |
| 76 | + 'Resetting task_state', log_out) |
| 77 | + # Assert that driver.finish_revert_migration did not raise an error. |
| 78 | + self.assertNotIn('Failed to revert crashed migration', log_out) |
| 79 | + |
| 80 | + # The migration status should be "error" rather than stuck as |
| 81 | + # "migrating". |
| 82 | + context = nova_context.get_admin_context() |
| 83 | + # FIXME(mriedem): This is bug 1836369 because we would normally expect |
| 84 | + # Migration.get_by_instance_and_status to raise |
| 85 | + # MigrationNotFoundByStatus since the status should be "error". |
| 86 | + objects.Migration.get_by_instance_and_status( |
| 87 | + context, server['id'], 'migrating') |
| 88 | + |
| 89 | + # Assert things related to the resize get cleaned up: |
| 90 | + # - things set on the instance during prep_resize like: |
| 91 | + # - migration_context |
| 92 | + # - new_flavor |
| 93 | + # - stashed old_vm_state in system_metadata |
| 94 | + # - migration-based allocations from conductor/scheduler, i.e. that the |
| 95 | + # allocations created by the scheduler for the instance and dest host |
| 96 | + # are gone and the source host allocations are back on the instance |
| 97 | + # rather than the migration record |
| 98 | + instance = objects.Instance.get_by_uuid( |
| 99 | + context, server['id'], expected_attrs=[ |
| 100 | + 'migration_context', 'flavor', 'system_metadata' |
| 101 | + ]) |
| 102 | + # FIXME(mriedem): Leaving these fields set on the instance is |
| 103 | + # bug 1836369. |
| 104 | + self.assertIsNotNone(instance.migration_context) |
| 105 | + self.assertIsNotNone(instance.new_flavor) |
| 106 | + self.assertEqual('active', instance.system_metadata['old_vm_state']) |
| 107 | + |
| 108 | + dest_host = 'host0' if source_host == 'host1' else 'host1' |
| 109 | + dest_rp_uuid = self._get_provider_uuid_by_host(dest_host) |
| 110 | + dest_allocations = self._get_allocations_by_provider_uuid(dest_rp_uuid) |
| 111 | + # FIXME(mriedem): This is bug 1836369 because we orphaned the |
| 112 | + # allocations created by the scheduler for the server on the dest host. |
| 113 | + self.assertIn(server['id'], dest_allocations) |
| 114 | + source_rp_uuid = self._get_provider_uuid_by_host(source_host) |
| 115 | + source_allocations = self._get_allocations_by_provider_uuid( |
| 116 | + source_rp_uuid) |
| 117 | + # FIXME(mriedem): This is bug 1836369 because the server is running on |
| 118 | + # the source host but is not tracking allocations against the source |
| 119 | + # host. |
| 120 | + self.assertNotIn(server['id'], source_allocations) |
0 commit comments