Skip to content

Commit 44709cf

Browse files
committed
Add functional recreate test for bug 1764556
This attempts to recreate the following scenario: 1) boot instance on ocata host where the compute service does not have a uuid 2) migrate instance 3) delete the ocata service (thus deleting the compute node) 4) start compute service with the same name 5) migrate instance to newly-created compute node 6) upgrade to pike where services.uuid data migration happens 7) list instances as admin to join on the services table The failure occurs when listing instances because the deleted service with the same name as the compute host that the instance is running on gets pulled from the DB and the Service object attempts to set a uuid on it, which fails since it's not using a read_deleted="yes" context. While working on this, the service_get_all_by_binary DB API method had to be fixed to not hard-code read_deleted="no" since the test needs to be able to read deleted services, which it can control via its own context object (note that RequestContext.read_deleted already defaults to "no" so the hard-coding in the DB API is unnecessarily restrictive). NOTE(mriedem): This backport needed to use the set_nodes/restore_nodes methods on the fake virt module since change I2cf2fcbaebc706f897ce5dfbff47d32117064f9c is not in Stein. Change-Id: I4d60da26fcf0a77628d1fdf4e818884614fa4f02 Related-Bug: #1764556 (cherry picked from commit 81f05f5)
1 parent 25e222c commit 44709cf

File tree

2 files changed

+164
-2
lines changed

2 files changed

+164
-2
lines changed

nova/db/sqlalchemy/api.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,7 @@ def service_get_by_host_and_topic(context, host, topic):
480480

481481
@pick_context_manager_reader
482482
def service_get_all_by_binary(context, binary, include_disabled=False):
483-
query = model_query(context, models.Service, read_deleted="no").\
484-
filter_by(binary=binary)
483+
query = model_query(context, models.Service).filter_by(binary=binary)
485484
if not include_disabled:
486485
query = query.filter_by(disabled=False)
487486
return query.all()
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
13+
import six
14+
15+
from nova import context as nova_context
16+
from nova.db import api as db
17+
from nova import test
18+
from nova.tests import fixtures as nova_fixtures
19+
from nova.tests.functional.api import client as api_client
20+
from nova.tests.functional import fixtures as func_fixtures
21+
from nova.tests.functional import integrated_helpers
22+
from nova.tests.unit.image import fake as fake_image
23+
from nova.tests.unit import policy_fixture
24+
from nova import utils
25+
from nova.virt import fake as fake_virt
26+
27+
28+
class InstanceListWithDeletedServicesTestCase(
29+
test.TestCase, integrated_helpers.InstanceHelperMixin):
30+
"""Contains regression testing for bug 1764556 which is similar to bug
31+
1746509 but different in that it deals with a deleted service and compute
32+
node which was not upgraded to the point of having a UUID and that causes
33+
problems later when an instance which was running on that node is migrated
34+
back to an upgraded service with the same hostname. This is because the
35+
service uuid migration routine gets a ServiceNotFound error when loading
36+
up a deleted service by hostname.
37+
"""
38+
def setUp(self):
39+
super(InstanceListWithDeletedServicesTestCase, self).setUp()
40+
self.useFixture(policy_fixture.RealPolicyFixture())
41+
42+
# The NeutronFixture is needed to stub out validate_networks in API.
43+
self.useFixture(nova_fixtures.NeutronFixture(self))
44+
45+
# We need the computes reporting into placement for the filter
46+
# scheduler to pick a host.
47+
self.useFixture(func_fixtures.PlacementFixture())
48+
49+
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
50+
api_version='v2.1'))
51+
self.api = api_fixture.api
52+
self.admin_api = api_fixture.admin_api
53+
self.admin_api.microversion = 'latest'
54+
55+
# the image fake backend needed for image discovery
56+
fake_image.stub_out_image_service(self)
57+
self.addCleanup(fake_image.FakeImageService_reset)
58+
# Get the image before we set the microversion to latest to avoid
59+
# the proxy issues with GET /images in 2.36.
60+
self.image_id = self.api.get_images()[0]['id']
61+
self.api.microversion = 'latest'
62+
63+
self.start_service('conductor')
64+
self.start_service('scheduler')
65+
66+
def _migrate_server(self, server, target_host):
67+
self.admin_api.api_post('/servers/%s/action' % server['id'],
68+
{'migrate': None})
69+
server = self._wait_for_state_change(
70+
self.admin_api, server, 'VERIFY_RESIZE')
71+
self.assertEqual(target_host, server['OS-EXT-SRV-ATTR:host'])
72+
self.admin_api.api_post('/servers/%s/action' % server['id'],
73+
{'confirmResize': None},
74+
check_response_status=[204])
75+
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
76+
return server
77+
78+
def test_instance_list_deleted_service_with_no_uuid(self):
79+
"""This test covers the following scenario:
80+
81+
1. create an instance on a host which we'll simulate to be old
82+
by not having a uuid set
83+
2. migrate the instance to the "newer" host that has a service uuid
84+
3. delete the old service/compute node
85+
4. start a new service with the old hostname (still host1); this will
86+
also create a new compute_nodes table record for that host/node
87+
5. migrate the instance back to the host1 service
88+
6. list instances which will try to online migrate the old service uuid
89+
"""
90+
fake_virt.set_nodes(['host1'])
91+
self.addCleanup(fake_virt.restore_nodes)
92+
host1 = self.start_service('compute', host='host1')
93+
94+
# Create an instance which will be on host1 since it's the only host.
95+
server_req = self._build_minimal_create_server_request(
96+
self.api, 'test_instance_list_deleted_service_with_no_uuid',
97+
image_uuid=self.image_id, networks='none')
98+
server = self.api.post_server({'server': server_req})
99+
self._wait_for_state_change(self.api, server, 'ACTIVE')
100+
101+
# Now we start a 2nd compute which is "upgraded" (has a uuid) and
102+
# we'll migrate the instance to that host.
103+
fake_virt.set_nodes(['host2'])
104+
self.addCleanup(fake_virt.restore_nodes)
105+
host2 = self.start_service('compute', host='host2')
106+
self.assertIsNotNone(host2.service_ref.uuid)
107+
108+
server = self._migrate_server(server, 'host2')
109+
110+
# Delete the host1 service (which implicitly deletes the host1 compute
111+
# node record).
112+
host1.stop()
113+
self.admin_api.api_delete(
114+
'/os-services/%s' % host1.service_ref.uuid)
115+
# We should now only have 1 compute service (host2).
116+
compute_services = self.admin_api.api_get(
117+
'/os-services?binary=nova-compute').body['services']
118+
self.assertEqual(1, len(compute_services))
119+
# Make sure the compute node is also gone.
120+
self.admin_api.api_get(
121+
'/os-hypervisors?hypervisor_hostname_pattern=host1',
122+
check_response_status=[404])
123+
124+
# Now recreate the host1 service and compute node by restarting the
125+
# service.
126+
self.restart_compute_service(host1)
127+
# At this point, host1's service should have a uuid.
128+
self.assertIsNotNone(host1.service_ref.uuid)
129+
130+
# Sanity check that there are 3 services in the database, but only 1
131+
# is deleted.
132+
ctxt = nova_context.get_admin_context()
133+
with utils.temporary_mutation(ctxt, read_deleted='yes'):
134+
services = db.service_get_all_by_binary(ctxt, 'nova-compute')
135+
self.assertEqual(3, len(services))
136+
deleted_services = [svc for svc in services if svc['deleted']]
137+
self.assertEqual(1, len(deleted_services))
138+
deleted_service = deleted_services[0]
139+
self.assertEqual('host1', deleted_service['host'])
140+
141+
# Now migrate the instance back to host1.
142+
self._migrate_server(server, 'host1')
143+
144+
# Now null out the service uuid to simulate that the deleted host1
145+
# is old. We have to do this through the DB API directly since the
146+
# Service object won't allow a null uuid field. We also have to do
147+
# this *after* deleting the service via the REST API and migrating the
148+
# server because otherwise that will set a uuid when looking up the
149+
# service.
150+
with utils.temporary_mutation(ctxt, read_deleted='yes'):
151+
service_ref = db.service_update(
152+
ctxt, deleted_service['id'], {'uuid': None})
153+
self.assertIsNone(service_ref['uuid'])
154+
155+
# Finally, list servers as an admin so it joins on services to get host
156+
# information.
157+
# FIXME(mriedem): This is bug 1764556 where the join on the services
158+
# table also pulls the deleted service that doesn't have a uuid and
159+
# attempts to migrate that service to have a uuid, which fails because
160+
# it's not using a read_deleted='yes' context.
161+
ex = self.assertRaises(api_client.OpenStackApiException,
162+
self.admin_api.get_servers, detail=True)
163+
self.assertIn('ServiceNotFound', six.text_type(ex))

0 commit comments

Comments
 (0)