Skip to content

Commit 386aa31

Browse files
committed
Confirm cross-cell resize from the API
This adds the logic to the API confirmResize operation such that if the migration is a cross-cell resize the API will RPC cast to conductor to confirm the resize rather than directly to the source compute service like a traditional resize. Conductor will then orchestrate the confirm process between the source and target cell. Now that the API has confirmResize plumbed this change builds on the cross-cell resize functional tests by confirming the resize for the image-backed server test. To make that extra fun, a volume is attached to the server while it is in VERIFY_RESIZE status to assert it remains attached to the instance in the target cell after the resize is confirmed. In addition, the FakeDriver.cleanup() method is updated to guard against calling it before the guest is destroyed from the "hypervisor". This is to make sure the cleanup() method is called properly from confirm_snapshot_based_resize_at_source() in the compute service on the source host. The _confirm_resize_on_deleting scenario will be dealt with in a later change. Part of blueprint cross-cell-resize Change-Id: Ia5892e1d2cb7c7685e104466f83df7bb00b168c0
1 parent 6f74bc1 commit 386aa31

File tree

4 files changed

+229
-11
lines changed

4 files changed

+229
-11
lines changed

nova/compute/api.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3687,10 +3687,21 @@ def confirm_resize(self, context, instance, migration=None):
36873687
self._record_action_start(context, instance,
36883688
instance_actions.CONFIRM_RESIZE)
36893689

3690-
self.compute_rpcapi.confirm_resize(context,
3691-
instance,
3692-
migration,
3693-
migration.source_compute)
3690+
# Check to see if this was a cross-cell resize, in which case the
3691+
# resized instance is in the target cell (the migration and instance
3692+
# came from the target cell DB in this case), and we need to cleanup
3693+
# the source host and source cell database records.
3694+
if migration.cross_cell_move:
3695+
self.compute_task_api.confirm_snapshot_based_resize(
3696+
context, instance, migration)
3697+
else:
3698+
# It's a traditional resize within a single cell, so RPC cast to
3699+
# the source compute host to cleanup the host since the instance
3700+
# is already on the target host.
3701+
self.compute_rpcapi.confirm_resize(context,
3702+
instance,
3703+
migration,
3704+
migration.source_compute)
36943705

36953706
@staticmethod
36963707
def _allow_cross_cell_resize(context, instance):

nova/tests/functional/test_cross_cell_migrate.py

Lines changed: 182 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212

1313
import mock
1414

15+
from oslo_utils.fixture import uuidsentinel as uuids
16+
17+
from nova.compute import instance_actions
1518
from nova import context as nova_context
1619
from nova import exception
1720
from nova import objects
@@ -378,13 +381,172 @@ def _resize_and_validate(self, volume_backed=False, stopped=False,
378381

379382
return server, source_rp_uuid, target_rp_uuid, old_flavor, new_flavor
380383

384+
def _attach_volume_to_server(self, server_id, volume_id):
385+
"""Attaches the volume to the server and waits for the
386+
"instance.volume_attach.end" versioned notification.
387+
"""
388+
body = {'volumeAttachment': {'volumeId': volume_id}}
389+
self.api.api_post(
390+
'/servers/%s/os-volume_attachments' % server_id, body)
391+
fake_notifier.wait_for_versioned_notifications(
392+
'instance.volume_attach.end')
393+
394+
def assert_volume_is_attached(self, server_id, volume_id):
395+
"""Asserts the volume is attached to the server."""
396+
server = self.api.get_server(server_id)
397+
attachments = server['os-extended-volumes:volumes_attached']
398+
attached_vol_ids = [attachment['id'] for attachment in attachments]
399+
self.assertIn(volume_id, attached_vol_ids,
400+
'Attached volumes: %s' % attachments)
401+
402+
def assert_resize_confirm_notifications(self):
403+
# We should have gotten only two notifications:
404+
# 1. instance.resize_confirm.start
405+
# 2. instance.resize_confirm.end
406+
self.assertEqual(2, len(fake_notifier.VERSIONED_NOTIFICATIONS),
407+
'Unexpected number of versioned notifications for '
408+
'cross-cell resize confirm: %s' %
409+
fake_notifier.VERSIONED_NOTIFICATIONS)
410+
start = fake_notifier.VERSIONED_NOTIFICATIONS[0]['event_type']
411+
self.assertEqual('instance.resize_confirm.start', start)
412+
end = fake_notifier.VERSIONED_NOTIFICATIONS[1]['event_type']
413+
self.assertEqual('instance.resize_confirm.end', end)
414+
415+
def delete_server_and_assert_cleanup(self, server):
416+
"""Deletes the server and makes various cleanup checks.
417+
418+
- makes sure allocations from placement are gone
419+
- makes sure the instance record is gone from both cells
420+
- makes sure there are no leaked volume attachments
421+
422+
:param server: dict of the server resource to delete
423+
"""
424+
# Determine which cell the instance was in when the server was deleted
425+
# in the API so we can check hard vs soft delete in the DB.
426+
current_cell = self.host_to_cell_mappings[
427+
server['OS-EXT-SRV-ATTR:host']]
428+
# Delete the server and check that the allocations are gone from
429+
# the placement service.
430+
self._delete_and_check_allocations(server)
431+
# Make sure the instance record is gone from both cell databases.
432+
ctxt = nova_context.get_admin_context()
433+
for cell_name in self.host_to_cell_mappings.values():
434+
cell = self.cell_mappings[cell_name]
435+
with nova_context.target_cell(ctxt, cell) as cctxt:
436+
# If this is the current cell the instance was in when it was
437+
# deleted it should be soft-deleted (instance.deleted!=0),
438+
# otherwise it should be hard-deleted and getting it with a
439+
# read_deleted='yes' context should still raise.
440+
read_deleted = 'no' if current_cell == cell_name else 'yes'
441+
with utils.temporary_mutation(
442+
cctxt, read_deleted=read_deleted):
443+
self.assertRaises(exception.InstanceNotFound,
444+
objects.Instance.get_by_uuid,
445+
cctxt, server['id'])
446+
# Make sure there are no leaked volume attachments.
447+
attachment_count = self._count_volume_attachments(server['id'])
448+
self.assertEqual(0, attachment_count, 'Leaked volume attachments: %s' %
449+
self.cinder.volume_to_attachment)
450+
451+
def assert_resize_confirm_actions(self, server):
452+
actions = self.api.api_get(
453+
'/servers/%s/os-instance-actions' % server['id']
454+
).body['instanceActions']
455+
actions_by_action = {action['action']: action for action in actions}
456+
self.assertIn(instance_actions.CONFIRM_RESIZE, actions_by_action)
457+
confirm_action = actions_by_action[instance_actions.CONFIRM_RESIZE]
458+
detail = self.api.api_get(
459+
'/servers/%s/os-instance-actions/%s' % (
460+
server['id'], confirm_action['request_id'])
461+
).body['instanceAction']
462+
events_by_name = {event['event']: event for event in detail['events']}
463+
self.assertEqual(2, len(detail['events']), detail)
464+
for event_name in ('conductor_confirm_snapshot_based_resize',
465+
'compute_confirm_snapshot_based_resize_at_source'):
466+
self.assertIn(event_name, events_by_name)
467+
self.assertEqual('Success', events_by_name[event_name]['result'])
468+
self.assertEqual('Success', detail['events'][0]['result'])
469+
381470
def test_resize_confirm_image_backed(self):
382471
"""Creates an image-backed server in one cell and resizes it to the
383472
host in the other cell. The resize is confirmed.
384473
"""
385-
self._resize_and_validate()
474+
server, source_rp_uuid, target_rp_uuid, _, new_flavor = (
475+
self._resize_and_validate())
476+
477+
# Attach a fake volume to the server to make sure it survives confirm.
478+
self._attach_volume_to_server(server['id'], uuids.fake_volume_id)
386479

387-
# TODO(mriedem): Confirm the resize and make assertions.
480+
# Reset the fake notifier so we only check confirmation notifications.
481+
fake_notifier.reset()
482+
483+
# Confirm the resize and check all the things. The instance and its
484+
# related records should be gone from the source cell database; the
485+
# migration should be confirmed; the allocations, held by the migration
486+
# record on the source compute node resource provider, should now be
487+
# gone; there should be a confirmResize instance action record with
488+
# a successful event.
489+
target_host = server['OS-EXT-SRV-ATTR:host']
490+
source_host = 'host1' if target_host == 'host2' else 'host2'
491+
self.api.post_server_action(server['id'], {'confirmResize': None})
492+
self._wait_for_state_change(server, 'ACTIVE')
493+
494+
# The migration should be confirmed.
495+
migrations = self.api.api_get(
496+
'/os-migrations?instance_uuid=%s' % server['id']
497+
).body['migrations']
498+
self.assertEqual(1, len(migrations), migrations)
499+
migration = migrations[0]
500+
self.assertEqual('confirmed', migration['status'], migration)
501+
502+
# The resource allocations held against the source node by the
503+
# migration record should be gone and the target node provider should
504+
# have allocations held by the instance.
505+
source_allocations = self._get_allocations_by_provider_uuid(
506+
source_rp_uuid)
507+
self.assertEqual({}, source_allocations)
508+
target_allocations = self._get_allocations_by_provider_uuid(
509+
target_rp_uuid)
510+
self.assertIn(server['id'], target_allocations)
511+
self.assertFlavorMatchesAllocation(
512+
new_flavor, target_allocations[server['id']]['resources'])
513+
514+
self.assert_resize_confirm_actions(server)
515+
516+
# Make sure the guest is on the target node hypervisor and not on the
517+
# source node hypervisor.
518+
source_guest_uuids = (
519+
self.computes[source_host].manager.driver.list_instance_uuids())
520+
self.assertNotIn(server['id'], source_guest_uuids,
521+
'Guest is still running on the source hypervisor.')
522+
target_guest_uuids = (
523+
self.computes[target_host].manager.driver.list_instance_uuids())
524+
self.assertIn(server['id'], target_guest_uuids,
525+
'Guest is not running on the target hypervisor.')
526+
527+
# Assert the source host hypervisor usage is back to 0 and the target
528+
# is using the new flavor.
529+
self.assert_hypervisor_usage(
530+
target_rp_uuid, new_flavor, volume_backed=False)
531+
no_usage = {'vcpus': 0, 'disk': 0, 'ram': 0}
532+
self.assert_hypervisor_usage(
533+
source_rp_uuid, no_usage, volume_backed=False)
534+
535+
# Run periodics and make sure the usage is still as expected.
536+
self._run_periodics()
537+
self.assert_hypervisor_usage(
538+
target_rp_uuid, new_flavor, volume_backed=False)
539+
self.assert_hypervisor_usage(
540+
source_rp_uuid, no_usage, volume_backed=False)
541+
542+
# Make sure the fake volume is still attached.
543+
self.assert_volume_is_attached(server['id'], uuids.fake_volume_id)
544+
545+
# Make sure we got the expected notifications for the confirm action.
546+
self.assert_resize_confirm_notifications()
547+
548+
# Explicitly delete the server and make sure it's gone from all cells.
549+
self.delete_server_and_assert_cleanup(server)
388550

389551
def test_resize_revert_volume_backed(self):
390552
"""Tests a volume-backed resize to another cell where the resize
@@ -457,11 +619,25 @@ def test_cold_migrate_target_host_in_other_cell(self):
457619
# onto the source host in the source cell.
458620

459621
def test_resize_confirm_from_stopped(self):
460-
"""Tests resizing and confirming a server that was initially stopped
461-
so it should remain stopped through the resize.
622+
"""Tests resizing and confirming a volume-backed server that was
623+
initially stopped so it should remain stopped through the resize.
462624
"""
463-
self._resize_and_validate(volume_backed=True, stopped=True)
464-
# TODO(mriedem): Confirm the resize and assert the guest remains off
625+
server = self._resize_and_validate(volume_backed=True, stopped=True)[0]
626+
# Confirm the resize and assert the guest remains off.
627+
self.api.post_server_action(server['id'], {'confirmResize': None})
628+
server = self._wait_for_state_change(server, 'SHUTOFF')
629+
self.assertEqual(4, server['OS-EXT-STS:power_state'],
630+
"Unexpected power state after confirmResize.")
631+
self._wait_for_migration_status(server, ['confirmed'])
632+
633+
# Now try cold-migrating back to cell1 to make sure there is no
634+
# duplicate entry error in the DB.
635+
self.api.post_server_action(server['id'], {'migrate': None})
636+
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
637+
# Should be back on host1 in cell1.
638+
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])
639+
640+
# TODO(mriedem): test_resize_revert_from_stopped with image-backed.
465641

466642
def test_finish_snapshot_based_resize_at_dest_spawn_fails(self):
467643
"""Negative test where the driver spawn fails on the dest host during

nova/tests/unit/compute/test_compute_api.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6859,6 +6859,34 @@ def test_get_migrations_sorted_filter_duplicates_using_created_at(self):
68596859
self._test_get_migrations_sorted_filter_duplicates(
68606860
[older, newer], newer)
68616861

6862+
@mock.patch('nova.objects.Migration.get_by_instance_and_status')
6863+
def test_confirm_resize_cross_cell_move_true(self, mock_migration_get):
6864+
"""Tests confirm_resize where Migration.cross_cell_move is True"""
6865+
instance = fake_instance.fake_instance_obj(
6866+
self.context, vm_state=vm_states.RESIZED, task_state=None,
6867+
launched_at=timeutils.utcnow())
6868+
migration = objects.Migration(cross_cell_move=True)
6869+
mock_migration_get.return_value = migration
6870+
with test.nested(
6871+
mock.patch.object(self.context, 'elevated',
6872+
return_value=self.context),
6873+
mock.patch.object(migration, 'save'),
6874+
mock.patch.object(self.compute_api, '_record_action_start'),
6875+
mock.patch.object(self.compute_api.compute_task_api,
6876+
'confirm_snapshot_based_resize'),
6877+
) as (
6878+
mock_elevated, mock_migration_save, mock_record_action,
6879+
mock_conductor_confirm
6880+
):
6881+
self.compute_api.confirm_resize(self.context, instance)
6882+
mock_elevated.assert_called_once_with()
6883+
mock_migration_save.assert_called_once_with()
6884+
self.assertEqual('confirming', migration.status)
6885+
mock_record_action.assert_called_once_with(
6886+
self.context, instance, instance_actions.CONFIRM_RESIZE)
6887+
mock_conductor_confirm.assert_called_once_with(
6888+
self.context, instance, migration)
6889+
68626890

68636891
class DiffDictTestCase(test.NoDBTestCase):
68646892
"""Unit tests for _diff_dict()."""

nova/virt/fake.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,10 @@ def destroy(self, context, instance, network_info, block_device_info=None,
314314

315315
def cleanup(self, context, instance, network_info, block_device_info=None,
316316
destroy_disks=True, migrate_data=None, destroy_vifs=True):
317-
pass
317+
# cleanup() should not be called when the guest has not been destroyed.
318+
if instance.uuid in self.instances:
319+
raise exception.InstanceExists(
320+
"Instance %s has not been destroyed." % instance.uuid)
318321

319322
def attach_volume(self, context, connection_info, instance, mountpoint,
320323
disk_bus=None, device_type=None, encryption=None):

0 commit comments

Comments
 (0)