Skip to content

Commit 9a977cb

Browse files
committed
Add functional recreate test for regression bug 1825537
Change I2d9ab06b485f76550dbbff46f79f40ff4c97d12f in Rocky (and backported through to Pike) added error handling to the resize_instance and finish_resize methods to revert allocations in placement when a failure occurs. This is OK for resize_instance, which runs on the source compute, as long as the instance.host/node values have not yet been changed to the dest host/node before RPC casting to the finish_resize method on the dest compute. It's OK because the instance is still on the source compute and the DB says so, so any attempt to recover the instance via hard reboot or rebuild will be on the source host. This is not OK for finish_resize because if we fail there and revert the allocations, the instance host/node values are already pointing at the dest compute and by reverting the allocations in placement, placement will be incorrectly tracking the instance usage with the old flavor against the source node resource provider rather than the new flavor against the dest node resource provider - where the instance is actually running and the nova DB says the instance lives. This change adds a simple functional regression test to recreate the bug with a multi-host resize. There is already a same-host resize functional test marked here which will need to be fixed as well. Conflicts: nova/tests/functional/test_servers.py nova/virt/fake.py NOTE(mriedem): The test_servers conflict is due to not having change If6aa37d9b6b48791e070799ab026c816fda4441c in Rocky. As a result, the new regression test also had to be modified for the call to assertFlavorMatchesAllocation. The fake module conflict is due to not having change Iefff121640e04abdbb6a4ae546c447f168dc8af9 in Rocky. Change-Id: Ie9e294db7e24d0e3cbe83eee847f0fbfb7478900 Related-Bug: #1825537 (cherry picked from commit f4bb672) (cherry picked from commit eaa1fc6)
1 parent 7404689 commit 9a977cb

File tree

3 files changed

+90
-0
lines changed

3 files changed

+90
-0
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
13+
from nova.tests.functional import integrated_helpers
14+
15+
16+
class FinishResizeErrorAllocationCleanupTestCase(
17+
integrated_helpers.ProviderUsageBaseTestCase):
18+
"""Test for bug 1825537 introduced in Rocky and backported down to Pike.
19+
20+
Tests a scenario where finish_resize fails on the dest compute during a
21+
resize and ensures resource provider allocations are properly cleaned up
22+
in placement.
23+
"""
24+
25+
compute_driver = 'fake.FakeFinishMigrationFailDriver'
26+
27+
def setUp(self):
28+
super(FinishResizeErrorAllocationCleanupTestCase, self).setUp()
29+
# Get the flavors we're going to use.
30+
flavors = self.api.get_flavors()
31+
self.flavor1 = flavors[0]
32+
self.flavor2 = flavors[1]
33+
34+
def _resize_and_assert_error(self, server, dest_host):
35+
# Now resize the server and wait for it to go to ERROR status because
36+
# the finish_migration virt driver method in host2 should fail.
37+
req = {'resize': {'flavorRef': self.flavor2['id']}}
38+
self.api.post_server_action(server['id'], req)
39+
# The instance is set to ERROR status before the fault is recorded so
40+
# to avoid a race we need to wait for the task_state to change
41+
# to None which happens after the fault is recorded.
42+
server = self._wait_for_server_parameter(
43+
self.admin_api, server,
44+
{'status': 'ERROR', 'OS-EXT-STS:task_state': None})
45+
# The server should be pointing at $dest_host because resize_instance
46+
# will have updated the host/node value on the instance before casting
47+
# to the finish_resize method on the dest compute.
48+
self.assertEqual(dest_host, server['OS-EXT-SRV-ATTR:host'])
49+
# In this case the FakeFinishMigrationFailDriver.finish_migration
50+
# method raises VirtualInterfaceCreateException.
51+
self.assertIn('Virtual Interface creation failed',
52+
server['fault']['message'])
53+
54+
def test_finish_resize_fails_allocation_cleanup(self):
55+
# Start two computes so we can resize across hosts.
56+
self._start_compute('host1')
57+
self._start_compute('host2')
58+
59+
# Create a server on host1.
60+
server = self._boot_and_check_allocations(self.flavor1, 'host1')
61+
62+
# Resize to host2 which should fail.
63+
self._resize_and_assert_error(server, 'host2')
64+
65+
# Check the resource provider allocations. Since the server is pointed
66+
# at the dest host in the DB now, the dest node resource provider
67+
# allocations should still exist with the new flavor.
68+
source_rp_uuid = self._get_provider_uuid_by_host('host1')
69+
dest_rp_uuid = self._get_provider_uuid_by_host('host2')
70+
# FIXME(mriedem): This is bug 1825537 where the allocations are
71+
# reverted when finish_resize fails so the dest node resource provider
72+
# does not have any allocations and the instance allocations are for
73+
# the old flavor on the source node resource provider even though the
74+
# instance is not running on the source host nor pointed at the source
75+
# host in the DB.
76+
# self.assertFlavorMatchesAllocation(
77+
# self.flavor2, server['id'], dest_rp_uuid)
78+
dest_rp_usages = self._get_provider_usages(dest_rp_uuid)
79+
no_usage = {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}
80+
self.assertEqual(no_usage, dest_rp_usages)
81+
source_usages = self._get_provider_usages(source_rp_uuid)
82+
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)

nova/tests/functional/test_servers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3088,6 +3088,7 @@ def fake_resize_method(*args, **kwargs):
30883088
# Ensure the allocation records still exist on the host.
30893089
source_rp_uuid = self._get_provider_uuid_by_host(hostname)
30903090
source_usages = self._get_provider_usages(source_rp_uuid)
3091+
# FIXME(mriedem): This is wrong for the _finish_resize case.
30913092
# The new_flavor should have been subtracted from the doubled
30923093
# allocation which just leaves us with the original flavor.
30933094
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)

nova/virt/fake.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,13 @@ class MediumFakeDriver(FakeDriver):
674674
local_gb = 1028
675675

676676

677+
class FakeFinishMigrationFailDriver(FakeDriver):
678+
"""FakeDriver variant that will raise an exception from finish_migration"""
679+
680+
def finish_migration(self, *args, **kwargs):
681+
raise exception.VirtualInterfaceCreateException()
682+
683+
677684
class FakeRescheduleDriver(FakeDriver):
678685
"""FakeDriver derivative that triggers a reschedule on the first spawn
679686
attempt. This is expected to only be used in tests that have more than

0 commit comments

Comments
 (0)