Skip to content

Commit e818dae

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "cleanup evacuated instances not on hypervisor" into stable/stein
2 parents a7261b6 + b8f2cd6 commit e818dae

File tree

2 files changed

+119
-13
lines changed

2 files changed

+119
-13
lines changed

nova/compute/manager.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,9 @@ def _destroy_evacuated_instances(self, context):
635635
compared to the instances for the migration records and those local
636636
guests are destroyed, along with instance allocation records in
637637
Placement for this node.
638+
Then allocations are removed from Placement for every instance that is
639+
evacuated from this host regardless if the instance is reported by the
640+
hypervisor or not.
638641
"""
639642
filters = {
640643
'source_compute': self.host,
@@ -661,18 +664,14 @@ def _destroy_evacuated_instances(self, context):
661664
# TODO(mriedem): We could optimize by pre-loading the joined fields
662665
# we know we'll use, like info_cache and flavor.
663666
local_instances = self._get_instances_on_driver(context)
664-
evacuated = [inst for inst in local_instances
665-
if inst.uuid in evacuations]
667+
evacuated_local_instances = {inst.uuid: inst
668+
for inst in local_instances
669+
if inst.uuid in evacuations}
666670

667-
# NOTE(gibi): We are called from init_host and at this point the
668-
# compute_nodes of the resource tracker has not been populated yet so
669-
# we cannot rely on the resource tracker here.
670-
compute_nodes = {}
671-
672-
for instance in evacuated:
673-
migration = evacuations[instance.uuid]
674-
LOG.info('Deleting instance as it has been evacuated from '
675-
'this host', instance=instance)
671+
for instance in evacuated_local_instances.values():
672+
LOG.info('Destroying instance as it has been evacuated from '
673+
'this host but still exists in the hypervisor',
674+
instance=instance)
676675
try:
677676
network_info = self.network_api.get_instance_nw_info(
678677
context, instance)
@@ -692,7 +691,28 @@ def _destroy_evacuated_instances(self, context):
692691
network_info,
693692
bdi, destroy_disks)
694693

695-
# delete the allocation of the evacuated instance from this host
694+
# NOTE(gibi): We are called from init_host and at this point the
695+
# compute_nodes of the resource tracker has not been populated yet so
696+
# we cannot rely on the resource tracker here.
697+
compute_nodes = {}
698+
699+
for instance_uuid, migration in evacuations.items():
700+
try:
701+
if instance_uuid in evacuated_local_instances:
702+
# Avoid the db call if we already have the instance loaded
703+
# above
704+
instance = evacuated_local_instances[instance_uuid]
705+
else:
706+
instance = objects.Instance.get_by_uuid(
707+
context, instance_uuid)
708+
except exception.InstanceNotFound:
709+
# The instance already deleted so we expect that every
710+
# allocation of that instance has already been cleaned up
711+
continue
712+
713+
LOG.info('Cleaning up allocations of the instance as it has been '
714+
'evacuated from this host',
715+
instance=instance)
696716
if migration.source_node not in compute_nodes:
697717
try:
698718
cn_uuid = objects.ComputeNode.get_by_host_and_nodename(

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3920,7 +3920,8 @@ def fake_get_node(context, host, node):
39203920
# both instance_1 and instance_2 is destroyed in the driver
39213921
destroy.assert_has_calls(
39223922
[mock.call(self.context, instance_1, None, {}, True),
3923-
mock.call(self.context, instance_2, None, {}, True)])
3923+
mock.call(self.context, instance_2, None, {}, True)],
3924+
any_order=True)
39243925

39253926
# but only instance_2 is deallocated as the compute node for
39263927
# instance_1 is already deleted
@@ -3929,6 +3930,91 @@ def fake_get_node(context, host, node):
39293930

39303931
self.assertEqual(2, get_node.call_count)
39313932

3933+
def test_destroy_evacuated_instances_not_on_hypervisor(self):
3934+
our_host = self.compute.host
3935+
flavor = objects.Flavor()
3936+
instance_1 = objects.Instance(self.context, flavor=flavor)
3937+
instance_1.uuid = uuids.instance_1
3938+
instance_1.task_state = None
3939+
instance_1.vm_state = vm_states.ACTIVE
3940+
instance_1.host = 'not-' + our_host
3941+
instance_1.user_id = uuids.user_id
3942+
instance_1.project_id = uuids.project_id
3943+
instance_1.deleted = False
3944+
3945+
migration_1 = objects.Migration(instance_uuid=instance_1.uuid)
3946+
migration_1.status = 'done'
3947+
migration_1.source_node = 'our-node'
3948+
3949+
our_node = objects.ComputeNode(
3950+
host=our_host, uuid=uuids.our_node_uuid)
3951+
3952+
with test.nested(
3953+
mock.patch.object(self.compute, '_get_instances_on_driver',
3954+
return_value=[]),
3955+
mock.patch.object(self.compute.driver, 'destroy'),
3956+
mock.patch('nova.objects.MigrationList.get_by_filters'),
3957+
mock.patch('nova.objects.Migration.save'),
3958+
mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename'),
3959+
mock.patch('nova.scheduler.utils.resources_from_flavor'),
3960+
mock.patch.object(self.compute.reportclient,
3961+
'remove_provider_tree_from_instance_allocation'),
3962+
mock.patch('nova.objects.Instance.get_by_uuid')
3963+
) as (_get_intances_on_driver, destroy, migration_list, migration_save,
3964+
get_node, get_resources, remove_allocation,
3965+
instance_get_by_uuid):
3966+
migration_list.return_value = [migration_1]
3967+
instance_get_by_uuid.return_value = instance_1
3968+
get_node.return_value = our_node
3969+
get_resources.return_value = mock.sentinel.resources
3970+
3971+
self.compute._destroy_evacuated_instances(self.context)
3972+
3973+
# nothing to be destroyed as the driver returned no instances on
3974+
# the hypervisor
3975+
self.assertFalse(destroy.called)
3976+
3977+
# but our only instance still cleaned up in placement
3978+
remove_allocation.assert_called_once_with(
3979+
self.context, instance_1.uuid, uuids.our_node_uuid)
3980+
instance_get_by_uuid.assert_called_once_with(
3981+
self.context, instance_1.uuid)
3982+
get_node.assert_called_once_with(
3983+
self.context, our_host, 'our-node')
3984+
3985+
def test_destroy_evacuated_instances_not_on_hyp_and_instance_deleted(self):
3986+
migration_1 = objects.Migration(instance_uuid=uuids.instance_1)
3987+
migration_1.status = 'done'
3988+
migration_1.source_node = 'our-node'
3989+
3990+
with test.nested(
3991+
mock.patch.object(self.compute, '_get_instances_on_driver',
3992+
return_value=[]),
3993+
mock.patch.object(self.compute.driver, 'destroy'),
3994+
mock.patch('nova.objects.MigrationList.get_by_filters'),
3995+
mock.patch('nova.objects.Migration.save'),
3996+
mock.patch('nova.scheduler.utils.resources_from_flavor'),
3997+
mock.patch.object(self.compute.reportclient,
3998+
'remove_provider_tree_from_instance_allocation'),
3999+
mock.patch('nova.objects.Instance.get_by_uuid')
4000+
) as (_get_instances_on_driver,
4001+
destroy, migration_list, migration_save, get_resources,
4002+
remove_allocation, instance_get_by_uuid):
4003+
migration_list.return_value = [migration_1]
4004+
instance_get_by_uuid.side_effect = exception.InstanceNotFound(
4005+
instance_id=uuids.instance_1)
4006+
4007+
self.compute._destroy_evacuated_instances(self.context)
4008+
4009+
# nothing to be destroyed as the driver returned no instances on
4010+
# the hypervisor
4011+
self.assertFalse(destroy.called)
4012+
4013+
instance_get_by_uuid.assert_called_once_with(
4014+
self.context, uuids.instance_1)
4015+
# nothing to be cleaned as the instance was deleted already
4016+
self.assertFalse(remove_allocation.called)
4017+
39324018
@mock.patch('nova.compute.manager.ComputeManager.'
39334019
'_destroy_evacuated_instances')
39344020
@mock.patch('nova.compute.manager.LOG')

0 commit comments

Comments
 (0)