Skip to content

Commit 9cd0fcd

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Add functional regression test for bug 1837955"
2 parents eba3750 + 5cc39fc commit 9cd0fcd

File tree

1 file changed

+114
-0
lines changed

1 file changed

+114
-0
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
13+
import time
14+
15+
from nova import exception
16+
from nova.tests.functional import integrated_helpers
17+
from nova.tests.unit import fake_notifier
18+
from nova.tests.unit.image import fake as fake_image
19+
20+
21+
class BuildRescheduleClaimFailsTestCase(
22+
integrated_helpers.ProviderUsageBaseTestCase):
23+
"""Regression test case for bug 1837955 where a server build fails on the
24+
primary host and then attempting to allocate resources on the alternate
25+
host, the alternate host is full and the allocations claim in placement
26+
fails, resulting in the build failing due to MaxRetriesExceeded and the
27+
server going to ERROR status.
28+
"""
29+
compute_driver = 'fake.SmallFakeDriver'
30+
31+
def _wait_for_unversioned_notification(self, event_type):
32+
for x in range(20): # wait up to 10 seconds
33+
for notification in fake_notifier.NOTIFICATIONS:
34+
if notification.event_type == event_type:
35+
return notification
36+
time.sleep(.5)
37+
self.fail('Timed out waiting for unversioned notification %s. Got: %s'
38+
% (event_type, fake_notifier.NOTIFICATIONS))
39+
40+
def test_build_reschedule_alt_host_alloc_fails(self):
41+
# Start two compute services so we have one alternate host.
42+
# Set cpu_allocation_ratio=1.0 to make placement inventory
43+
# and allocations for VCPU easier to manage.
44+
self.flags(cpu_allocation_ratio=1.0)
45+
for x in range(2):
46+
self._start_compute('host%i' % x)
47+
48+
def fake_instance_claim(_self, _context, _inst, nodename, *a, **kw):
49+
# Before triggering the reschedule to the other host, max out the
50+
# capacity on the alternate host.
51+
alt_nodename = 'host0' if nodename == 'host1' else 'host1'
52+
rp_uuid = self._get_provider_uuid_by_host(alt_nodename)
53+
inventories = self._get_provider_inventory(rp_uuid)
54+
# Fake some other consumer taking all of the VCPU on the alt host.
55+
# Since we set cpu_allocation_ratio=1.0 the total is the total
56+
# capacity for VCPU on the host.
57+
total_vcpu = inventories['VCPU']['total']
58+
alt_consumer = '7d32d0bc-af16-44b2-8019-a24925d76152'
59+
allocs = {
60+
'allocations': {
61+
rp_uuid: {
62+
'resources': {
63+
'VCPU': total_vcpu
64+
}
65+
}
66+
},
67+
'project_id': self.api.project_id,
68+
'user_id': self.api.project_id
69+
}
70+
resp = self.placement_api.put(
71+
'/allocations/%s' % alt_consumer, allocs, version='1.12')
72+
self.assertEqual(204, resp.status, resp.content)
73+
raise exception.ComputeResourcesUnavailable(reason='overhead!')
74+
75+
# Stub out the instance claim (regardless of which host the scheduler
76+
# picks as the primary) to trigger a reschedule.
77+
self.stub_out('nova.compute.manager.resource_tracker.ResourceTracker.'
78+
'instance_claim', fake_instance_claim)
79+
80+
# Now that our stub is in place, try to create a server and wait for it
81+
# to go to ERROR status.
82+
server = self._build_minimal_create_server_request(
83+
self.api, 'test_build_reschedule_alt_host_alloc_fails',
84+
image_uuid=fake_image.get_valid_image_id(),
85+
networks=[{'port': self.neutron.port_1['id']}])
86+
server = self.api.post_server({'server': server})
87+
# FIXME(mriedem): This is bug 1837955 where the status is stuck in
88+
# BUILD rather than the vm_state being set to error and the task_state
89+
# being set to None. Uncomment this when the bug is fixed.
90+
# server = self._wait_for_state_change(self.api, server, 'ERROR')
91+
92+
# Wait for the MaxRetriesExceeded fault to be recorded.
93+
# set_vm_state_and_notify sets the vm_state to ERROR before the fault
94+
# is recorded but after the notification is sent. So wait for the
95+
# unversioned notification to show up and then get the fault.
96+
# FIXME(mriedem): Uncomment this when bug 1837955 is fixed.
97+
# self._wait_for_unversioned_notification(
98+
# 'compute_task.build_instances')
99+
# server = self.api.get_server(server['id'])
100+
# self.assertIn('fault', server)
101+
# self.assertIn('Exceeded maximum number of retries',
102+
# server['fault']['message'])
103+
104+
# TODO(mriedem): Remove this when the bug is fixed. We need to assert
105+
# something before the bug is fixed to show the failure so check the
106+
# logs.
107+
for x in range(20):
108+
logs = self.stdlog.logger.output
109+
if 'MaxRetriesExceeded' in logs:
110+
break
111+
time.sleep(.5)
112+
else:
113+
self.fail('Timed out waiting for MaxRetriesExceeded to show up '
114+
'in the logs.')

0 commit comments

Comments
 (0)