Skip to content

Commit 499e753

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Sync COMPUTE_STATUS_DISABLED from API"
2 parents 9c2cbf3 + 3f0605c commit 499e753

File tree

7 files changed

+258
-6
lines changed

7 files changed

+258
-6
lines changed

nova/compute/api.py

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
MIN_COMPUTE_TRUSTED_CERTS = 31
107107
MIN_COMPUTE_ABORT_QUEUED_LIVE_MIGRATION = 34
108108
MIN_COMPUTE_VOLUME_TYPE = 36
109+
MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
109110

110111
# FIXME(danms): Keep a global cache of the cells we find the
111112
# first time we look. This needs to be refreshed on a timer or
@@ -5047,17 +5048,69 @@ def service_get_by_compute_host(self, context, host_name):
50475048
"""Get service entry for the given compute hostname."""
50485049
return objects.Service.get_by_compute_host(context, host_name)
50495050

5051+
def _update_compute_provider_status(self, context, service):
5052+
"""Calls the compute service to sync the COMPUTE_STATUS_DISABLED trait.
5053+
5054+
There are two cases where the API will not call the compute service:
5055+
5056+
* The compute service is down. In this case the trait is synchronized
5057+
when the compute service is restarted.
5058+
* The compute service is old. In this case the trait is synchronized
5059+
when the compute service is upgraded and restarted.
5060+
5061+
:param context: nova auth RequestContext
5062+
:param service: nova.objects.Service object which has been enabled
5063+
or disabled (see ``service_update``).
5064+
"""
5065+
# Make sure the service is up so we can make the RPC call.
5066+
if not self.servicegroup_api.service_is_up(service):
5067+
LOG.info('Compute service on host %s is down. The '
5068+
'COMPUTE_STATUS_DISABLED trait will be synchronized '
5069+
'when the service is restarted.', service.host)
5070+
return
5071+
5072+
# Make sure the compute service is new enough for the trait sync
5073+
# behavior.
5074+
# TODO(mriedem): Remove this compat check in the U release.
5075+
if service.version < MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED:
5076+
LOG.info('Compute service on host %s is too old to sync the '
5077+
'COMPUTE_STATUS_DISABLED trait in Placement. The '
5078+
'trait will be synchronized when the service is '
5079+
'upgraded and restarted.', service.host)
5080+
return
5081+
5082+
enabled = not service.disabled
5083+
# Avoid leaking errors out of the API.
5084+
try:
5085+
LOG.debug('Calling the compute service on host %s to sync the '
5086+
'COMPUTE_STATUS_DISABLED trait.', service.host)
5087+
self.rpcapi.set_host_enabled(context, service.host, enabled)
5088+
except Exception:
5089+
LOG.exception('An error occurred while updating host enabled '
5090+
'status to "%s" for compute host: %s',
5091+
'enabled' if enabled else 'disabled',
5092+
service.host)
5093+
50505094
def service_update(self, context, service):
50515095
"""Performs the actual service update operation.
50525096
5097+
If the "disabled" field is changed, potentially calls the compute
5098+
service to sync the COMPUTE_STATUS_DISABLED trait on the compute node
5099+
resource providers managed by this compute service.
5100+
50535101
:param context: nova auth RequestContext
50545102
:param service: nova.objects.Service object with changes already
50555103
set on the object
50565104
"""
5105+
# Before persisting changes and resetting the changed fields on the
5106+
# Service object, determine if the disabled field changed.
5107+
update_placement = 'disabled' in service.obj_what_changed()
5108+
# Persist the Service object changes to the database.
50575109
service.save()
5058-
# TODO(mriedem): Reflect COMPUTE_STATUS_DISABLED trait changes to the
5059-
# associated compute node resource providers if the service's disabled
5060-
# status changed.
5110+
# If the disabled field changed, potentially call the compute service
5111+
# to sync the COMPUTE_STATUS_DISABLED trait.
5112+
if update_placement:
5113+
self._update_compute_provider_status(context, service)
50615114
return service
50625115

50635116
@target_host_cell

nova/compute/rpcapi.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,9 @@ def set_admin_password(self, ctxt, instance, new_pass):
913913
def set_host_enabled(self, ctxt, host, enabled):
914914
version = '5.0'
915915
cctxt = self.router.client(ctxt).prepare(
916-
server=host, version=version)
916+
server=host, version=version,
917+
call_monitor_timeout=CONF.rpc_response_timeout,
918+
timeout=CONF.long_rpc_timeout)
917919
return cctxt.call(ctxt, 'set_host_enabled', enabled=enabled)
918920

919921
def swap_volume(self, ctxt, instance, old_volume_id, new_volume_id,

nova/conf/rpc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
2929
* live migration
3030
* scheduling
31+
* enabling/disabling a compute service
3132
3233
Related options:
3334

nova/tests/functional/api_sample_tests/test_services.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ def setUp(self):
3737
test_services.fake_service_get_by_host_binary)
3838
self.stub_out("nova.db.api.service_update",
3939
test_services.fake_service_update)
40+
# If we are not using real services, we need to stub out
41+
# HostAPI._update_compute_provider_status so we don't actually
42+
# try to call a fake service over RPC.
43+
self.stub_out('nova.compute.api.HostAPI.'
44+
'_update_compute_provider_status',
45+
lambda *args, **kwargs: None)
4046
self.useFixture(utils_fixture.TimeFixture(test_services.fake_utcnow()))
4147

4248
def test_services_list(self):

nova/tests/functional/wsgi/test_services.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@
1111
# under the License.
1212

1313
import os_resource_classes as orc
14+
import os_traits
1415
import six
1516

1617
from nova import context as nova_context
1718
from nova import exception
1819
from nova import objects
1920
from nova.tests.functional.api import client as api_client
2021
from nova.tests.functional import integrated_helpers
22+
from nova.tests.unit.image import fake as fake_image
23+
from nova import utils
2124

2225

2326
class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
@@ -171,3 +174,132 @@ def test_evacuate_then_delete_compute_service(self):
171174
log_output = self.stdlog.logger.output
172175
self.assertIn('Error updating resources for node host1.', log_output)
173176
self.assertIn('Failed to create resource provider host1', log_output)
177+
178+
179+
class ComputeStatusFilterTest(integrated_helpers.ProviderUsageBaseTestCase):
180+
"""Tests the API, compute service and Placement interaction with the
181+
COMPUTE_STATUS_DISABLED trait when a compute service is enable/disabled.
182+
183+
This version of the test uses the 2.latest microversion for testing the
184+
2.53+ behavior of the PUT /os-services/{service_id} API.
185+
"""
186+
compute_driver = 'fake.SmallFakeDriver'
187+
188+
def _update_service(self, service, disabled, forced_down=None):
189+
"""Update the service using the 2.53 request schema.
190+
191+
:param service: dict representing the service resource in the API
192+
:param disabled: True if the service should be disabled, False if the
193+
service should be enabled
194+
:param forced_down: Optionally change the forced_down value.
195+
"""
196+
status = 'disabled' if disabled else 'enabled'
197+
req = {'status': status}
198+
if forced_down is not None:
199+
req['forced_down'] = forced_down
200+
self.admin_api.put_service(service['id'], req)
201+
202+
def test_compute_status_filter(self):
203+
"""Tests the compute_status_filter placement request filter"""
204+
# Start a compute service so a compute node and resource provider is
205+
# created.
206+
compute = self._start_compute('host1')
207+
# Get the UUID of the resource provider that was created.
208+
rp_uuid = self._get_provider_uuid_by_host('host1')
209+
# Get the service from the compute API.
210+
services = self.admin_api.get_services(binary='nova-compute',
211+
host='host1')
212+
self.assertEqual(1, len(services))
213+
service = services[0]
214+
215+
# At this point, the service should be enabled and the
216+
# COMPUTE_STATUS_DISABLED trait should not be set on the
217+
# resource provider in placement.
218+
self.assertEqual('enabled', service['status'])
219+
rp_traits = self._get_provider_traits(rp_uuid)
220+
trait = os_traits.COMPUTE_STATUS_DISABLED
221+
self.assertNotIn(trait, rp_traits)
222+
223+
# Now disable the compute service via the API.
224+
self._update_service(service, disabled=True)
225+
226+
# The update to placement should be synchronous so check the provider
227+
# traits and COMPUTE_STATUS_DISABLED should be set.
228+
rp_traits = self._get_provider_traits(rp_uuid)
229+
self.assertIn(trait, rp_traits)
230+
231+
# Try creating a server which should fail because nothing is available.
232+
networks = [{'port': self.neutron.port_1['id']}]
233+
server_req = self._build_minimal_create_server_request(
234+
self.api, 'test_compute_status_filter',
235+
image_uuid=fake_image.get_valid_image_id(), networks=networks)
236+
server = self.api.post_server({'server': server_req})
237+
server = self._wait_for_state_change(self.api, server, 'ERROR')
238+
# There should be a NoValidHost fault recorded.
239+
self.assertIn('fault', server)
240+
self.assertIn('No valid host', server['fault']['message'])
241+
242+
# Now enable the service and the trait should be gone.
243+
self._update_service(service, disabled=False)
244+
rp_traits = self._get_provider_traits(rp_uuid)
245+
self.assertNotIn(trait, rp_traits)
246+
247+
# Try creating another server and it should be OK.
248+
server = self.api.post_server({'server': server_req})
249+
self._wait_for_state_change(self.api, server, 'ACTIVE')
250+
251+
# Stop, force-down and disable the service so the API cannot call
252+
# the compute service to sync the trait.
253+
compute.stop()
254+
self._update_service(service, disabled=True, forced_down=True)
255+
# The API should have logged a message about the service being down.
256+
self.assertIn('Compute service on host host1 is down. The '
257+
'COMPUTE_STATUS_DISABLED trait will be synchronized '
258+
'when the service is restarted.',
259+
self.stdlog.logger.output)
260+
# The trait should not be on the provider even though the node is
261+
# disabled.
262+
rp_traits = self._get_provider_traits(rp_uuid)
263+
self.assertNotIn(trait, rp_traits)
264+
# Restart the compute service which should sync and set the trait on
265+
# the provider in placement.
266+
self.restart_compute_service(compute)
267+
rp_traits = self._get_provider_traits(rp_uuid)
268+
self.assertIn(trait, rp_traits)
269+
270+
271+
class ComputeStatusFilterTest211(ComputeStatusFilterTest):
272+
"""Extends ComputeStatusFilterTest and uses the 2.11 API for the
273+
legacy os-services disable/enable/force-down API behavior
274+
"""
275+
microversion = '2.11'
276+
277+
def _update_service(self, service, disabled, forced_down=None):
278+
"""Update the service using the 2.11 request schema.
279+
280+
:param service: dict representing the service resource in the API
281+
:param disabled: True if the service should be disabled, False if the
282+
service should be enabled
283+
:param forced_down: Optionally change the forced_down value.
284+
"""
285+
# Before 2.53 the service is uniquely identified by host and binary.
286+
body = {
287+
'host': service['host'],
288+
'binary': service['binary']
289+
}
290+
# Handle forced_down first if provided since the enable/disable
291+
# behavior in the API depends on it.
292+
if forced_down is not None:
293+
body['forced_down'] = forced_down
294+
self.admin_api.api_put('/os-services/force-down', body)
295+
296+
if disabled:
297+
self.admin_api.api_put('/os-services/disable', body)
298+
else:
299+
self.admin_api.api_put('/os-services/enable', body)
300+
301+
def _get_provider_uuid_by_host(self, host):
302+
# We have to temporarily mutate to 2.53 to get the hypervisor UUID.
303+
with utils.temporary_mutation(self.admin_api, microversion='2.53'):
304+
return super(ComputeStatusFilterTest211,
305+
self)._get_provider_uuid_by_host(host)

nova/tests/unit/compute/test_host_api.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import fixtures
1818
import mock
19+
import oslo_messaging as messaging
1920
from oslo_utils.fixture import uuidsentinel as uuids
2021

2122
from nova.api.openstack.compute import services
@@ -327,18 +328,73 @@ def test_service_update_by_host_and_binary(self):
327328
service_id = 42
328329
expected_result = dict(test_service.fake_service, id=service_id)
329330

331+
@mock.patch.object(self.host_api, '_update_compute_provider_status')
330332
@mock.patch.object(self.host_api.db, 'service_get_by_host_and_binary')
331333
@mock.patch.object(self.host_api.db, 'service_update')
332-
def _do_test(mock_service_update, mock_service_get_by_host_and_binary):
334+
def _do_test(mock_service_update, mock_service_get_by_host_and_binary,
335+
mock_update_compute_provider_status):
333336
mock_service_get_by_host_and_binary.return_value = expected_result
334337
mock_service_update.return_value = expected_result
335338

336339
result = self.host_api.service_update_by_host_and_binary(
337340
self.ctxt, host_name, binary, params_to_update)
338341
self._compare_obj(result, expected_result)
342+
mock_update_compute_provider_status.assert_called_once_with(
343+
self.ctxt, test.MatchType(objects.Service))
339344

340345
_do_test()
341346

347+
@mock.patch('nova.compute.api.HostAPI._update_compute_provider_status',
348+
new_callable=mock.NonCallableMock)
349+
def test_service_update_no_update_provider_status(self, mock_ucps):
350+
"""Tests the scenario that the service is updated but the disabled
351+
field is not changed, for example the forced_down field is only
352+
updated. In this case _update_compute_provider_status should not be
353+
called.
354+
"""
355+
service = objects.Service(forced_down=True)
356+
self.assertIn('forced_down', service.obj_what_changed())
357+
with mock.patch.object(service, 'save') as mock_save:
358+
retval = self.host_api.service_update(self.ctxt, service)
359+
self.assertIs(retval, service)
360+
mock_save.assert_called_once_with()
361+
362+
@mock.patch('nova.compute.rpcapi.ComputeAPI.set_host_enabled',
363+
new_callable=mock.NonCallableMock)
364+
def test_update_compute_provider_status_service_too_old(self, mock_she):
365+
"""Tests the scenario that the service is up but is too old to sync the
366+
COMPUTE_STATUS_DISABLED trait.
367+
"""
368+
service = objects.Service(host='fake-host')
369+
service.version = compute.MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED - 1
370+
with mock.patch.object(
371+
self.host_api.servicegroup_api, 'service_is_up',
372+
return_value=True) as service_is_up:
373+
self.host_api._update_compute_provider_status(self.ctxt, service)
374+
service_is_up.assert_called_once_with(service)
375+
self.assertIn('Compute service on host fake-host is too old to sync '
376+
'the COMPUTE_STATUS_DISABLED trait in Placement.',
377+
self.stdlog.logger.output)
378+
379+
@mock.patch('nova.compute.rpcapi.ComputeAPI.set_host_enabled',
380+
side_effect=messaging.MessagingTimeout)
381+
def test_update_compute_provider_status_service_rpc_error(self, mock_she):
382+
"""Tests the scenario that the RPC call to the compute service raised
383+
some exception.
384+
"""
385+
service = objects.Service(host='fake-host', disabled=True)
386+
with mock.patch.object(
387+
self.host_api.servicegroup_api, 'service_is_up',
388+
return_value=True) as service_is_up:
389+
self.host_api._update_compute_provider_status(self.ctxt, service)
390+
service_is_up.assert_called_once_with(service)
391+
mock_she.assert_called_once_with(self.ctxt, 'fake-host', False)
392+
log_output = self.stdlog.logger.output
393+
self.assertIn('An error occurred while updating host enabled '
394+
'status to "disabled" for compute host: fake-host',
395+
log_output)
396+
self.assertIn('MessagingTimeout', log_output)
397+
342398
@mock.patch.object(objects.InstanceList, 'get_by_host',
343399
return_value = ['fake-responses'])
344400
def test_instance_get_all_by_host(self, mock_get):

nova/tests/unit/compute/test_rpcapi.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,8 +468,10 @@ def test_set_admin_password(self):
468468
version='5.0')
469469

470470
def test_set_host_enabled(self):
471+
self.flags(long_rpc_timeout=600, rpc_response_timeout=120)
471472
self._test_compute_api('set_host_enabled', 'call',
472-
enabled='enabled', host='host')
473+
enabled='enabled', host='host',
474+
call_monitor_timeout=120, timeout=600)
473475

474476
def test_get_host_uptime(self):
475477
self._test_compute_api('get_host_uptime', 'call', host='host')

0 commit comments

Comments
 (0)