Skip to content

Commit 099b490

Browse files
committed
Update COMPUTE_STATUS_DISABLED from set_host_enabled compute call
This adds code to the ComputeManager.set_host_enabled method, which will be called by the API in a subsequent change when a compute service is enabled or disabled, which will add or remove the COMPUTE_STATUS_DISABLED trait to/from the related compute node resource providers managed by the compute service. The set_host_enabled compute RPC API is a synchronous RPC call and the only (non-fake) in-tree compute driver that implements the set_host_enabled driver interface is XenAPIDriver. As such, the method is changed to handle NotImplementedError so an error is not returned to the API if a driver does not implement the interface. Before this series, only the PUT /os-hosts/{host_name} API would call this compute service method and that API was deprecated in 2.43. In other words, most users (admins) are likely not using that API and the only ones that could before were running with XenAPIDriver. The change to update the COMPUTE_STATUS_DISABLED trait usage is best effort so errors, expected or unexpected, from the ComputeVirtAPI are logged but not raised back to the caller. With change I3005b46221ac3c0e559e1072131a7e4846c9867c the ResourceTracker update_available_resource flow will sync the trait based on the current compute service disabled status. The compute service RPC API version is incremented so that the API will be able to determine if the compute service is new enough for this new behavior when disabling/enabling a compute service in the os-services API. Part of blueprint pre-filter-disabled-computes Change-Id: Ia95de2f23f12b002b2189c9294ec190569a628ab
1 parent 9b99be4 commit 099b490

File tree

3 files changed

+235
-3
lines changed

3 files changed

+235
-3
lines changed

nova/compute/manager.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4982,10 +4982,69 @@ def host_maintenance_mode(self, context, host, mode):
49824982
"""
49834983
return self.driver.host_maintenance_mode(host, mode)
49844984

4985+
def _update_compute_provider_status(self, context, enabled):
4986+
"""Adds or removes the COMPUTE_STATUS_DISABLED trait for this host.
4987+
4988+
For each ComputeNode managed by this service, adds or removes the
4989+
COMPUTE_STATUS_DISABLED traits to/from the associated resource provider
4990+
in Placement.
4991+
4992+
:param context: nova auth RequestContext
4993+
:param enabled: True if the node is enabled in which case the trait
4994+
would be removed, False if the node is disabled in which case
4995+
the trait would be added.
4996+
:raises: ComputeHostNotFound if there are no compute nodes found in
4997+
the ResourceTracker for this service.
4998+
"""
4999+
# Get the compute node(s) on this host. Remember that ironic can be
5000+
# managing more than one compute node.
5001+
nodes = self.rt.compute_nodes.values()
5002+
if not nodes:
5003+
raise exception.ComputeHostNotFound(host=self.host)
5004+
# For each node, we want to add (or remove) the COMPUTE_STATUS_DISABLED
5005+
# trait on the related resource provider in placement so the scheduler
5006+
# (pre-)filters the provider based on its status.
5007+
for node in nodes:
5008+
try:
5009+
self.virtapi.update_compute_provider_status(
5010+
context, node.uuid, enabled)
5011+
except (exception.ResourceProviderTraitRetrievalFailed,
5012+
exception.ResourceProviderUpdateConflict,
5013+
exception.ResourceProviderUpdateFailed,
5014+
exception.TraitRetrievalFailed) as e:
5015+
# This is best effort so just log a warning and continue. The
5016+
# update_available_resource periodic task will sync the trait.
5017+
LOG.warning('An error occurred while updating '
5018+
'COMPUTE_STATUS_DISABLED trait on compute node '
5019+
'resource provider %s. Error: %s',
5020+
node.uuid, e.format_message())
5021+
except Exception:
5022+
LOG.exception('An error occurred while updating '
5023+
'COMPUTE_STATUS_DISABLED trait on compute node '
5024+
'resource provider %s.', node.uuid)
5025+
49855026
@wrap_exception()
49865027
def set_host_enabled(self, context, enabled):
4987-
"""Sets the specified host's ability to accept new instances."""
4988-
return self.driver.set_host_enabled(enabled)
5028+
"""Sets the specified host's ability to accept new instances.
5029+
5030+
This method will add or remove the COMPUTE_STATUS_DISABLED trait
5031+
to/from the associated compute node resource provider(s) for this
5032+
compute service.
5033+
"""
5034+
try:
5035+
self._update_compute_provider_status(context, enabled)
5036+
except exception.ComputeHostNotFound:
5037+
LOG.warning('Unable to add/remove trait COMPUTE_STATUS_DISABLED. '
5038+
'No ComputeNode(s) found for host: %s', self.host)
5039+
5040+
try:
5041+
return self.driver.set_host_enabled(enabled)
5042+
except NotImplementedError:
5043+
# Only the xenapi driver implements set_host_enabled but we don't
5044+
# want NotImplementedError to get raised back to the API. We still
5045+
# need to honor the compute RPC API contract and return 'enabled'
5046+
# or 'disabled' though.
5047+
return 'enabled' if enabled else 'disabled'
49895048

49905049
@wrap_exception()
49915050
def get_host_uptime(self, context):

nova/objects/service.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232

3333
# NOTE(danms): This is the global service version counter
34-
SERVICE_VERSION = 37
34+
SERVICE_VERSION = 38
3535

3636

3737
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@@ -151,6 +151,8 @@
151151
{'compute_rpc': '5.0'},
152152
# Version 37: prep_resize takes a RequestSpec object
153153
{'compute_rpc': '5.1'},
154+
# Version 38: set_host_enabled reflects COMPUTE_STATUS_DISABLED trait
155+
{'compute_rpc': '5.1'},
154156
)
155157

156158

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9082,3 +9082,174 @@ def fake_last_completed_audit_period():
90829082
self.assertEqual(0, mock_task_log().errors,
90839083
'an error was encountered processing the deleted test'
90849084
' instance')
9085+
9086+
9087+
@ddt.ddt
9088+
class ComputeManagerSetHostEnabledTestCase(test.NoDBTestCase):
9089+
9090+
def setUp(self):
9091+
super(ComputeManagerSetHostEnabledTestCase, self).setUp()
9092+
self.compute = manager.ComputeManager()
9093+
self.context = context.RequestContext(user_id=fakes.FAKE_USER_ID,
9094+
project_id=fakes.FAKE_PROJECT_ID)
9095+
9096+
@ddt.data(True, False)
9097+
def test_set_host_enabled(self, enabled):
9098+
"""Happy path test for set_host_enabled"""
9099+
with mock.patch.object(self.compute,
9100+
'_update_compute_provider_status') as ucpt:
9101+
retval = self.compute.set_host_enabled(self.context, enabled)
9102+
expected_retval = 'enabled' if enabled else 'disabled'
9103+
self.assertEqual(expected_retval, retval)
9104+
ucpt.assert_called_once_with(self.context, enabled)
9105+
9106+
@mock.patch('nova.compute.manager.LOG.warning')
9107+
def test_set_host_enabled_compute_host_not_found(self, mock_warning):
9108+
"""Tests _update_compute_provider_status raising ComputeHostNotFound"""
9109+
error = exception.ComputeHostNotFound(host=self.compute.host)
9110+
with mock.patch.object(self.compute,
9111+
'_update_compute_provider_status',
9112+
side_effect=error) as ucps:
9113+
retval = self.compute.set_host_enabled(self.context, False)
9114+
self.assertEqual('disabled', retval)
9115+
ucps.assert_called_once_with(self.context, False)
9116+
# A warning should have been logged for the ComputeHostNotFound error.
9117+
mock_warning.assert_called_once()
9118+
self.assertIn('Unable to add/remove trait COMPUTE_STATUS_DISABLED. '
9119+
'No ComputeNode(s) found for host',
9120+
mock_warning.call_args[0][0])
9121+
9122+
def test_set_host_enabled_update_provider_status_error(self):
9123+
"""Tests _update_compute_provider_status raising some unexpected error
9124+
"""
9125+
error = messaging.MessagingTimeout
9126+
with test.nested(
9127+
mock.patch.object(self.compute,
9128+
'_update_compute_provider_status',
9129+
side_effect=error),
9130+
mock.patch.object(self.compute.driver, 'set_host_enabled',
9131+
# The driver is not called in this case.
9132+
new_callable=mock.NonCallableMock),
9133+
) as (
9134+
ucps, driver_set_host_enabled,
9135+
):
9136+
self.assertRaises(error,
9137+
self.compute.set_host_enabled,
9138+
self.context, False)
9139+
ucps.assert_called_once_with(self.context, False)
9140+
9141+
@ddt.data(True, False)
9142+
def test_set_host_enabled_not_implemented_error(self, enabled):
9143+
"""Tests the driver raising NotImplementedError"""
9144+
with test.nested(
9145+
mock.patch.object(self.compute, '_update_compute_provider_status'),
9146+
mock.patch.object(self.compute.driver, 'set_host_enabled',
9147+
side_effect=NotImplementedError),
9148+
) as (
9149+
ucps, driver_set_host_enabled,
9150+
):
9151+
retval = self.compute.set_host_enabled(self.context, enabled)
9152+
expected_retval = 'enabled' if enabled else 'disabled'
9153+
self.assertEqual(expected_retval, retval)
9154+
ucps.assert_called_once_with(self.context, enabled)
9155+
driver_set_host_enabled.assert_called_once_with(enabled)
9156+
9157+
def test_set_host_enabled_driver_error(self):
9158+
"""Tests the driver raising some unexpected error"""
9159+
error = exception.HypervisorUnavailable(host=self.compute.host)
9160+
with test.nested(
9161+
mock.patch.object(self.compute, '_update_compute_provider_status'),
9162+
mock.patch.object(self.compute.driver, 'set_host_enabled',
9163+
side_effect=error),
9164+
) as (
9165+
ucps, driver_set_host_enabled,
9166+
):
9167+
self.assertRaises(exception.HypervisorUnavailable,
9168+
self.compute.set_host_enabled,
9169+
self.context, False)
9170+
ucps.assert_called_once_with(self.context, False)
9171+
driver_set_host_enabled.assert_called_once_with(False)
9172+
9173+
@ddt.data(True, False)
9174+
def test_update_compute_provider_status(self, enabled):
9175+
"""Happy path test for _update_compute_provider_status"""
9176+
# Fake out some fake compute nodes (ironic driver case).
9177+
self.compute.rt.compute_nodes = {
9178+
uuids.node1: objects.ComputeNode(uuid=uuids.node1),
9179+
uuids.node2: objects.ComputeNode(uuid=uuids.node2),
9180+
}
9181+
with mock.patch.object(self.compute.virtapi,
9182+
'update_compute_provider_status') as ucps:
9183+
self.compute._update_compute_provider_status(
9184+
self.context, enabled=enabled)
9185+
self.assertEqual(2, ucps.call_count)
9186+
ucps.assert_has_calls([
9187+
mock.call(self.context, uuids.node1, enabled),
9188+
mock.call(self.context, uuids.node2, enabled),
9189+
], any_order=True)
9190+
9191+
def test_update_compute_provider_status_no_nodes(self):
9192+
"""Tests the case that _update_compute_provider_status will raise
9193+
ComputeHostNotFound if there are no nodes in the resource tracker.
9194+
"""
9195+
self.assertRaises(exception.ComputeHostNotFound,
9196+
self.compute._update_compute_provider_status,
9197+
self.context, enabled=True)
9198+
9199+
@mock.patch('nova.compute.manager.LOG.warning')
9200+
def test_update_compute_provider_status_expected_errors(self, m_warn):
9201+
"""Tests _update_compute_provider_status handling a set of expected
9202+
errors from the ComputeVirtAPI and logging a warning.
9203+
"""
9204+
# Setup a fake compute in the resource tracker.
9205+
self.compute.rt.compute_nodes = {
9206+
uuids.node: objects.ComputeNode(uuid=uuids.node)
9207+
}
9208+
errors = (
9209+
exception.ResourceProviderTraitRetrievalFailed(uuid=uuids.node),
9210+
exception.ResourceProviderUpdateConflict(
9211+
uuid=uuids.node, generation=1, error='conflict'),
9212+
exception.ResourceProviderUpdateFailed(
9213+
url='https://placement', error='dogs'),
9214+
exception.TraitRetrievalFailed(error='cats'),
9215+
)
9216+
for error in errors:
9217+
with mock.patch.object(
9218+
self.compute.virtapi, 'update_compute_provider_status',
9219+
side_effect=error) as ucps:
9220+
self.compute._update_compute_provider_status(
9221+
self.context, enabled=False)
9222+
ucps.assert_called_once_with(self.context, uuids.node, False)
9223+
# The expected errors are logged as a warning.
9224+
m_warn.assert_called_once()
9225+
self.assertIn('An error occurred while updating '
9226+
'COMPUTE_STATUS_DISABLED trait on compute node',
9227+
m_warn.call_args[0][0])
9228+
m_warn.reset_mock()
9229+
9230+
@mock.patch('nova.compute.manager.LOG.exception')
9231+
def test_update_compute_provider_status_unexpected_error(self, m_exc):
9232+
"""Tests _update_compute_provider_status handling an unexpected
9233+
exception from the ComputeVirtAPI and logging it.
9234+
"""
9235+
# Use two fake nodes here to make sure we try updating each even when
9236+
# an error occurs.
9237+
self.compute.rt.compute_nodes = {
9238+
uuids.node1: objects.ComputeNode(uuid=uuids.node1),
9239+
uuids.node2: objects.ComputeNode(uuid=uuids.node2),
9240+
}
9241+
with mock.patch.object(
9242+
self.compute.virtapi, 'update_compute_provider_status',
9243+
side_effect=(TypeError, AttributeError)) as ucps:
9244+
self.compute._update_compute_provider_status(
9245+
self.context, enabled=False)
9246+
self.assertEqual(2, ucps.call_count)
9247+
ucps.assert_has_calls([
9248+
mock.call(self.context, uuids.node1, False),
9249+
mock.call(self.context, uuids.node2, False),
9250+
], any_order=True)
9251+
# Each exception should have been logged.
9252+
self.assertEqual(2, m_exc.call_count)
9253+
self.assertIn('An error occurred while updating '
9254+
'COMPUTE_STATUS_DISABLED trait',
9255+
m_exc.call_args_list[0][0][0])

0 commit comments

Comments
 (0)