Skip to content

Commit ad9f373

Browse files
committed
Update usage in RT.drop_move_claim during confirm resize
The confirm resize flow in the compute manager runs on the source host. It calls RT.drop_move_claim to drop resource usage from the source host for the old flavor. The problem with drop_move_claim is it only decrements the old flavor from the reported usage if the instance is in RT.tracked_migrations, which will only be there on the source host if the update_available_resource periodic task runs before the resize is confirmed, otherwise the instance is still just tracked in RT.tracked_instances on the source host. This leaves the source compute incorrectly reporting resource usage for the old flavor until the next periodic runs, which could be a large window if resizes are configured to automatically confirm, e.g. resize_confirm_window=1, and the periodic interval is big, e.g. update_resources_interval=600. This fixes the issue by also updating usage in drop_move_claim when the instance is not in tracked_migrations but is in tracked_instances. Because of the tight coupling with the instance.migration_context we need to ensure the migration_context still exists before drop_move_claim is called during confirm_resize, so a test wrinkle is added to enforce that. test_drop_move_claim_on_revert also needed some updating for reality because of how drop_move_claim is called during revert_resize. And finally, the functional recreate test is updated to show the bug is fixed. Change-Id: Ia6d8a7909081b0b856bd7e290e234af7e42a2b38 Closes-Bug: #1818914 Related-Bug: #1641750 Related-Bug: #1498126
1 parent 0b92c7e commit ad9f373

File tree

5 files changed

+43
-26
lines changed

5 files changed

+43
-26
lines changed

nova/compute/manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4053,6 +4053,10 @@ def _confirm_resize(self, context, instance, migration=None):
40534053
migration.status = 'confirmed'
40544054
migration.save()
40554055

4056+
# NOTE(mriedem): drop_move_claim relies on
4057+
# instance.migration_context so make sure to not call
4058+
# instance.drop_migration_context() until after drop_move_claim
4059+
# is called.
40564060
self.rt.drop_move_claim(context, instance, migration.source_node,
40574061
old_instance_type, prefix='old_')
40584062
instance.drop_migration_context()

nova/compute/resource_tracker.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -459,31 +459,27 @@ def drop_move_claim(self, context, instance, nodename,
459459
attributes. 'old_' or 'new_', with 'new_' being the
460460
default.
461461
"""
462+
# Remove usage for an instance that is tracked in migrations, such as
463+
# on the dest node during revert resize.
462464
if instance['uuid'] in self.tracked_migrations:
463465
migration = self.tracked_migrations.pop(instance['uuid'])
464-
465466
if not instance_type:
466467
instance_type = self._get_instance_type(instance, prefix,
467468
migration)
468-
469-
if instance_type is not None:
470-
numa_topology = self._get_migration_context_resource(
471-
'numa_topology', instance, prefix=prefix)
472-
usage = self._get_usage_dict(
473-
instance_type, instance, numa_topology=numa_topology)
474-
self._drop_pci_devices(instance, nodename, prefix)
475-
self._update_usage(usage, nodename, sign=-1)
476-
477-
ctxt = context.elevated()
478-
self._update(ctxt, self.compute_nodes[nodename])
479469
# Remove usage for an instance that is not tracked in migrations (such
480470
# as on the source node after a migration).
481471
# NOTE(lbeliveau): On resize on the same node, the instance is
482472
# included in both tracked_migrations and tracked_instances.
483-
elif (instance['uuid'] in self.tracked_instances):
473+
elif instance['uuid'] in self.tracked_instances:
484474
self.tracked_instances.remove(instance['uuid'])
475+
476+
if instance_type is not None:
477+
numa_topology = self._get_migration_context_resource(
478+
'numa_topology', instance, prefix=prefix)
479+
usage = self._get_usage_dict(
480+
instance_type, instance, numa_topology=numa_topology)
485481
self._drop_pci_devices(instance, nodename, prefix)
486-
# TODO(lbeliveau): Validate if numa needs the same treatment.
482+
self._update_usage(usage, nodename, sign=-1)
487483

488484
ctxt = context.elevated()
489485
self._update(ctxt, self.compute_nodes[nodename])

nova/tests/functional/test_servers.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2122,13 +2122,8 @@ def test_resize_confirm_assert_hypervisor_usage_no_periodics(self):
21222122
self._wait_for_state_change(self.api, server, 'ACTIVE')
21232123

21242124
# There should no resource usage for flavor1 on the source host.
2125-
# FIXME(mriedem): This is bug 1818914 where the source host continues
2126-
# to report old_flavor usage until the update_available_resource
2127-
# periodic task runs. Uncomment this once fixed.
2128-
# self.assert_hypervisor_usage(
2129-
# source_rp_uuid, no_usage, volume_backed=False)
21302125
self.assert_hypervisor_usage(
2131-
source_rp_uuid, self.flavor1, volume_backed=False)
2126+
source_rp_uuid, no_usage, volume_backed=False)
21322127
# And resource usage for flavor2 should still be on the target host.
21332128
self.assert_hypervisor_usage(
21342129
dest_rp_uuid, self.flavor2, volume_backed=False)

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7175,7 +7175,15 @@ def test_confirm_resize_deletes_allocations(self):
71757175
def do_confirm_resize(mock_save, mock_drop, mock_delete,
71767176
mock_confirm, mock_nwapi, mock_notify,
71777177
mock_mig_save, mock_mig_get, mock_inst_get):
7178-
self._mock_rt()
7178+
7179+
def fake_drop_move_claim(*args, **kwargs):
7180+
# RT.drop_move_claim must be called before
7181+
# instance.drop_migration_context.
7182+
mock_drop.assert_not_called()
7183+
7184+
mock_rt = self._mock_rt()
7185+
# Enforce order of drop_move_claim/drop_migration_context calls.
7186+
mock_rt.drop_move_claim.side_effect = fake_drop_move_claim
71797187
self.instance.migration_context = objects.MigrationContext(
71807188
new_pci_devices=None,
71817189
old_pci_devices=None)

nova/tests/unit/compute/test_resource_tracker.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2490,23 +2490,37 @@ def test_drop_move_claim_on_revert(self):
24902490

24912491
instance = _INSTANCE_FIXTURES[0].obj_clone()
24922492
instance.task_state = task_states.RESIZE_MIGRATING
2493-
instance.flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2]
2493+
instance.new_flavor = _INSTANCE_TYPE_OBJ_FIXTURES[2]
24942494
instance.migration_context = objects.MigrationContext()
24952495
instance.migration_context.new_pci_devices = objects.PciDeviceList(
24962496
objects=pci_devs)
24972497

2498-
self.rt.tracked_instances = set([instance.uuid])
2498+
# When reverting a resize and dropping the move claim, the
2499+
# destination compute calls drop_move_claim to drop the new_flavor
2500+
# usage and the instance should be in tracked_migrations from when
2501+
# the resize_claim was made on the dest during prep_resize.
2502+
self.rt.tracked_migrations = {
2503+
instance.uuid: objects.Migration(migration_type='resize')}
24992504

25002505
# not using mock.sentinel.ctx because drop_move_claim calls elevated
25012506
ctx = mock.MagicMock()
25022507

25032508
with test.nested(
2504-
mock.patch.object(self.rt, '_update'),
2505-
mock.patch.object(self.rt.pci_tracker, 'free_device')
2506-
) as (update_mock, mock_pci_free_device):
2509+
mock.patch.object(self.rt, '_update'),
2510+
mock.patch.object(self.rt.pci_tracker, 'free_device'),
2511+
mock.patch.object(self.rt, '_get_usage_dict'),
2512+
mock.patch.object(self.rt, '_update_usage')
2513+
) as (
2514+
update_mock, mock_pci_free_device, mock_get_usage,
2515+
mock_update_usage,
2516+
):
25072517
self.rt.drop_move_claim(ctx, instance, _NODENAME)
25082518
mock_pci_free_device.assert_called_once_with(
25092519
pci_dev, mock.ANY)
2520+
mock_get_usage.assert_called_once_with(
2521+
instance.new_flavor, instance, numa_topology=None)
2522+
mock_update_usage.assert_called_once_with(
2523+
mock_get_usage.return_value, _NODENAME, sign=-1)
25102524

25112525
@mock.patch('nova.compute.utils.is_volume_backed_instance',
25122526
return_value=False)

0 commit comments

Comments
 (0)