Skip to content

Commit e9889be

Browse files
committed
Delete resource providers for all nodes when deleting compute service
Change I7b8622b178d5043ed1556d7bdceaf60f47e5ac80 started deleting the compute node resource provider associated with a compute node when deleting a nova-compute service. However, it would only delete the first compute node associated with the service which means for an ironic compute service that is managing multiple nodes, the resource providers were not cleaned up in placement. This fixes the issue by iterating all the compute nodes and cleaning up their providers. Note this could be potentially a lot of nodes, but we don't really have many good options here but to iterate them and clean them up one at a time. Note that this is best-effort but because of how the SchedulerReportClient.delete_resource_provider method ignores ResourceProviderInUse errors, and we could have stale allocations on the host for which delete_resource_provider is not accounting, namely allocations from evacuated instances (or incomplete migrations though you can't migrate baremetal instances today), we could still delete the compute service and orphan those in-use providers. That, however, is no worse than before this change where we did not try to cleanup all providers. The issue described above is being tracked with bug 1829479 and will be dealt with separately. Change-Id: I9e852e25ea89f32bf19cdaeb1f5dac8f749f5dbc Closes-Bug: #1811726 (cherry picked from commit 650fe11)
1 parent acd2daa commit e9889be

File tree

3 files changed

+32
-9
lines changed

3 files changed

+32
-9
lines changed

nova/api/openstack/compute/services.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,14 @@ def delete(self, req, id):
266266
ag.id,
267267
service.host)
268268
# remove the corresponding resource provider record from
269-
# placement for this compute node
270-
self.placementclient.delete_resource_provider(
271-
context, service.compute_node, cascade=True)
269+
# placement for the compute nodes managed by this service;
270+
# remember that an ironic compute service can manage multiple
271+
# nodes
272+
compute_nodes = objects.ComputeNodeList.get_all_by_host(
273+
context, service.host)
274+
for compute_node in compute_nodes:
275+
self.placementclient.delete_resource_provider(
276+
context, compute_node, cascade=True)
272277
# remove the host_mapping of this host.
273278
try:
274279
hm = objects.HostMapping.get_by_host(context, service.host)

nova/tests/unit/api/openstack/compute/test_services.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -713,25 +713,33 @@ def test_compute_service_delete_host_mapping_not_found(
713713
"""Tests that we are still able to successfully delete a nova-compute
714714
service even if the HostMapping is not found.
715715
"""
716+
@mock.patch('nova.objects.ComputeNodeList.get_all_by_host',
717+
return_value=objects.ComputeNodeList(objects=[
718+
objects.ComputeNode(host='host1',
719+
hypervisor_hostname='node1'),
720+
objects.ComputeNode(host='host1',
721+
hypervisor_hostname='node2')]))
716722
@mock.patch.object(self.controller.host_api, 'service_get_by_id',
717723
return_value=objects.Service(
718-
host='host1', binary='nova-compute',
719-
compute_node=objects.ComputeNode()))
724+
host='host1', binary='nova-compute'))
720725
@mock.patch.object(self.controller.aggregate_api,
721726
'get_aggregates_by_host',
722727
return_value=objects.AggregateList())
723728
@mock.patch.object(self.controller.placementclient,
724729
'delete_resource_provider')
725730
def _test(delete_resource_provider,
726-
get_aggregates_by_host, service_get_by_id):
731+
get_aggregates_by_host, service_get_by_id,
732+
cn_get_all_by_host):
727733
self.controller.delete(self.req, 2)
728734
ctxt = self.req.environ['nova.context']
729735
service_get_by_id.assert_called_once_with(ctxt, 2)
730736
get_instances.assert_called_once_with(ctxt, 'host1')
731737
get_aggregates_by_host.assert_called_once_with(ctxt, 'host1')
732-
delete_resource_provider.assert_called_once_with(
733-
ctxt, service_get_by_id.return_value.compute_node,
734-
cascade=True)
738+
self.assertEqual(2, delete_resource_provider.call_count)
739+
nodes = cn_get_all_by_host.return_value
740+
delete_resource_provider.assert_has_calls([
741+
mock.call(ctxt, node, cascade=True) for node in nodes
742+
], any_order=True)
735743
get_hm.assert_called_once_with(ctxt, 'host1')
736744
service_delete.assert_called_once_with()
737745
_test()
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
fixes:
3+
- |
4+
`Bug 1811726`_ is fixed by deleting the resource provider (in placement)
5+
associated with each compute node record managed by a ``nova-compute``
6+
service when that service is deleted via the
7+
``DELETE /os-services/{service_id}`` API. This is particularly important
8+
for compute services managing ironic baremetal nodes.
9+
10+
.. _Bug 1811726: https://bugs.launchpad.net/nova/+bug/1811726

0 commit comments

Comments
 (0)