Skip to content

Commit eadd78e

Browse files
committed
Add functional recreate test for bug 1764556
This attempts to recreate the following scenario: 1) boot instance on ocata host where the compute service does not have a uuid 2) migrate instance 3) delete the ocata service (thus deleting the compute node) 4) start compute service with the same name 5) migrate instance to newly-created compute node 6) upgrade to pike where services.uuid data migration happens 7) list instances as admin to join on the services table The failure occurs when listing instances because the deleted service with the same name as the compute host that the instance is running on gets pulled from the DB and the Service object attempts to set a uuid on it, which fails since it's not using a read_deleted="yes" context. While working on this, the service_get_all_by_binary DB API method had to be fixed to not hard-code read_deleted="no" since the test needs to be able to read deleted services, which it can control via its own context object (note that RequestContext.read_deleted already defaults to "no" so the hard-coding in the DB API is unnecessarily restrictive). NOTE(mriedem): This backport needed to account for not having change Idaed39629095f86d24a54334c699a26c218c6593 in Rocky so the PlacementFixture comes from the same module as the other nova fixtures. Change-Id: I4d60da26fcf0a77628d1fdf4e818884614fa4f02 Related-Bug: #1764556 (cherry picked from commit 81f05f5) (cherry picked from commit 44709cf)
1 parent 9c6d900 commit eadd78e

File tree

2 files changed

+163
-2
lines changed

2 files changed

+163
-2
lines changed

nova/db/sqlalchemy/api.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,7 @@ def service_get_by_host_and_topic(context, host, topic):
472472

473473
@pick_context_manager_reader
474474
def service_get_all_by_binary(context, binary, include_disabled=False):
475-
query = model_query(context, models.Service, read_deleted="no").\
476-
filter_by(binary=binary)
475+
query = model_query(context, models.Service).filter_by(binary=binary)
477476
if not include_disabled:
478477
query = query.filter_by(disabled=False)
479478
return query.all()
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
13+
import six
14+
15+
from nova import context as nova_context
16+
from nova.db import api as db
17+
from nova import test
18+
from nova.tests import fixtures as nova_fixtures
19+
from nova.tests.functional.api import client as api_client
20+
from nova.tests.functional import integrated_helpers
21+
from nova.tests.unit.image import fake as fake_image
22+
from nova.tests.unit import policy_fixture
23+
from nova import utils
24+
from nova.virt import fake as fake_virt
25+
26+
27+
class InstanceListWithDeletedServicesTestCase(
28+
test.TestCase, integrated_helpers.InstanceHelperMixin):
29+
"""Contains regression testing for bug 1764556 which is similar to bug
30+
1746509 but different in that it deals with a deleted service and compute
31+
node which was not upgraded to the point of having a UUID and that causes
32+
problems later when an instance which was running on that node is migrated
33+
back to an upgraded service with the same hostname. This is because the
34+
service uuid migration routine gets a ServiceNotFound error when loading
35+
up a deleted service by hostname.
36+
"""
37+
def setUp(self):
38+
super(InstanceListWithDeletedServicesTestCase, self).setUp()
39+
self.useFixture(policy_fixture.RealPolicyFixture())
40+
41+
# The NeutronFixture is needed to stub out validate_networks in API.
42+
self.useFixture(nova_fixtures.NeutronFixture(self))
43+
44+
# We need the computes reporting into placement for the filter
45+
# scheduler to pick a host.
46+
self.useFixture(nova_fixtures.PlacementFixture())
47+
48+
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
49+
api_version='v2.1'))
50+
self.api = api_fixture.api
51+
self.admin_api = api_fixture.admin_api
52+
self.admin_api.microversion = 'latest'
53+
54+
# the image fake backend needed for image discovery
55+
fake_image.stub_out_image_service(self)
56+
self.addCleanup(fake_image.FakeImageService_reset)
57+
# Get the image before we set the microversion to latest to avoid
58+
# the proxy issues with GET /images in 2.36.
59+
self.image_id = self.api.get_images()[0]['id']
60+
self.api.microversion = 'latest'
61+
62+
self.start_service('conductor')
63+
self.start_service('scheduler')
64+
65+
def _migrate_server(self, server, target_host):
66+
self.admin_api.api_post('/servers/%s/action' % server['id'],
67+
{'migrate': None})
68+
server = self._wait_for_state_change(
69+
self.admin_api, server, 'VERIFY_RESIZE')
70+
self.assertEqual(target_host, server['OS-EXT-SRV-ATTR:host'])
71+
self.admin_api.api_post('/servers/%s/action' % server['id'],
72+
{'confirmResize': None},
73+
check_response_status=[204])
74+
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
75+
return server
76+
77+
def test_instance_list_deleted_service_with_no_uuid(self):
78+
"""This test covers the following scenario:
79+
80+
1. create an instance on a host which we'll simulate to be old
81+
by not having a uuid set
82+
2. migrate the instance to the "newer" host that has a service uuid
83+
3. delete the old service/compute node
84+
4. start a new service with the old hostname (still host1); this will
85+
also create a new compute_nodes table record for that host/node
86+
5. migrate the instance back to the host1 service
87+
6. list instances which will try to online migrate the old service uuid
88+
"""
89+
fake_virt.set_nodes(['host1'])
90+
self.addCleanup(fake_virt.restore_nodes)
91+
host1 = self.start_service('compute', host='host1')
92+
93+
# Create an instance which will be on host1 since it's the only host.
94+
server_req = self._build_minimal_create_server_request(
95+
self.api, 'test_instance_list_deleted_service_with_no_uuid',
96+
image_uuid=self.image_id, networks='none')
97+
server = self.api.post_server({'server': server_req})
98+
self._wait_for_state_change(self.api, server, 'ACTIVE')
99+
100+
# Now we start a 2nd compute which is "upgraded" (has a uuid) and
101+
# we'll migrate the instance to that host.
102+
fake_virt.set_nodes(['host2'])
103+
self.addCleanup(fake_virt.restore_nodes)
104+
host2 = self.start_service('compute', host='host2')
105+
self.assertIsNotNone(host2.service_ref.uuid)
106+
107+
server = self._migrate_server(server, 'host2')
108+
109+
# Delete the host1 service (which implicitly deletes the host1 compute
110+
# node record).
111+
host1.stop()
112+
self.admin_api.api_delete(
113+
'/os-services/%s' % host1.service_ref.uuid)
114+
# We should now only have 1 compute service (host2).
115+
compute_services = self.admin_api.api_get(
116+
'/os-services?binary=nova-compute').body['services']
117+
self.assertEqual(1, len(compute_services))
118+
# Make sure the compute node is also gone.
119+
self.admin_api.api_get(
120+
'/os-hypervisors?hypervisor_hostname_pattern=host1',
121+
check_response_status=[404])
122+
123+
# Now recreate the host1 service and compute node by restarting the
124+
# service.
125+
self.restart_compute_service(host1)
126+
# At this point, host1's service should have a uuid.
127+
self.assertIsNotNone(host1.service_ref.uuid)
128+
129+
# Sanity check that there are 3 services in the database, but only 1
130+
# is deleted.
131+
ctxt = nova_context.get_admin_context()
132+
with utils.temporary_mutation(ctxt, read_deleted='yes'):
133+
services = db.service_get_all_by_binary(ctxt, 'nova-compute')
134+
self.assertEqual(3, len(services))
135+
deleted_services = [svc for svc in services if svc['deleted']]
136+
self.assertEqual(1, len(deleted_services))
137+
deleted_service = deleted_services[0]
138+
self.assertEqual('host1', deleted_service['host'])
139+
140+
# Now migrate the instance back to host1.
141+
self._migrate_server(server, 'host1')
142+
143+
# Now null out the service uuid to simulate that the deleted host1
144+
# is old. We have to do this through the DB API directly since the
145+
# Service object won't allow a null uuid field. We also have to do
146+
# this *after* deleting the service via the REST API and migrating the
147+
# server because otherwise that will set a uuid when looking up the
148+
# service.
149+
with utils.temporary_mutation(ctxt, read_deleted='yes'):
150+
service_ref = db.service_update(
151+
ctxt, deleted_service['id'], {'uuid': None})
152+
self.assertIsNone(service_ref['uuid'])
153+
154+
# Finally, list servers as an admin so it joins on services to get host
155+
# information.
156+
# FIXME(mriedem): This is bug 1764556 where the join on the services
157+
# table also pulls the deleted service that doesn't have a uuid and
158+
# attempts to migrate that service to have a uuid, which fails because
159+
# it's not using a read_deleted='yes' context.
160+
ex = self.assertRaises(api_client.OpenStackApiException,
161+
self.admin_api.get_servers, detail=True)
162+
self.assertIn('ServiceNotFound', six.text_type(ex))

0 commit comments

Comments
 (0)