Skip to content

Commit 35273a8

Browse files
committed
rt: only map compute node if we created it
If ComputeNode.create() fails, the update_available_resource periodic will not try to create it again because it will be mapped in the compute_nodes dict and _init_compute_node will return early but trying to save changes to that ComputeNode object later will fail because there is no id on the object, since we failed to create it in the DB. This simply reverses the logic such that we only map the compute node if we successfully created it. Some existing _init_compute_node testing had to be changed since it relied on the order of when the ComputeNode object is created and put into the compute_nodes dict in order to pass the object along to some much lower-level PCI tracker code, which was arguably mocking too deep for a unit test. That is changed to avoid the low-level mocking and assertions and just assert that _setup_pci_tracker is called as expected. Conflicts: nova/compute/resource_tracker.py nova/tests/unit/compute/test_resource_tracker.py NOTE(mriedem): The resource_tracker.py conflict is due to not having I14a310b20bd9892e7b34464e6baad49bf5928ece in Rocky. The test conflicts are due to not having change I37e8ad5b14262d801702411c2c87e73550adda70 in Rocky. Change-Id: I9fa1d509a3de405d6246fb8670612c65c10cc93b Closes-Bug: #1839674 (cherry picked from commit f578146) (cherry picked from commit 648770b)
1 parent 72f9aa7 commit 35273a8

File tree

2 files changed

+44
-35
lines changed

2 files changed

+44
-35
lines changed

nova/compute/resource_tracker.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,8 +580,12 @@ def _init_compute_node(self, context, resources):
580580
cn = objects.ComputeNode(context)
581581
cn.host = self.host
582582
self._copy_resources(cn, resources)
583-
self.compute_nodes[nodename] = cn
584583
cn.create()
584+
# Only map the ComputeNode into compute_nodes if create() was OK
585+
# because if create() fails, on the next run through here nodename
586+
# would be in compute_nodes and we won't try to create again (because
587+
# of the logic above).
588+
self.compute_nodes[nodename] = cn
585589
LOG.info('Compute node record created for '
586590
'%(host)s:%(node)s with uuid: %(uuid)s',
587591
{'host': self.host, 'node': nodename, 'uuid': cn.uuid})

nova/tests/unit/compute/test_resource_tracker.py

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,56 +1074,45 @@ def fake_get_all(_ctx, nodename):
10741074
self.assertEqual(_HOSTNAME, self.rt.compute_nodes[_NODENAME].host)
10751075

10761076
@mock.patch('nova.objects.ComputeNodeList.get_by_hypervisor')
1077-
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
1078-
return_value=objects.PciDeviceList(objects=[]))
10791077
@mock.patch('nova.objects.ComputeNode.create')
10801078
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
10811079
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
10821080
'_update')
10831081
def test_compute_node_created_on_empty(self, update_mock, get_mock,
1084-
create_mock, pci_tracker_mock,
1082+
create_mock,
10851083
get_by_hypervisor_mock):
10861084
get_by_hypervisor_mock.return_value = []
10871085
self._test_compute_node_created(update_mock, get_mock, create_mock,
1088-
pci_tracker_mock,
10891086
get_by_hypervisor_mock)
10901087

10911088
@mock.patch('nova.objects.ComputeNodeList.get_by_hypervisor')
1092-
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
1093-
return_value=objects.PciDeviceList(objects=[]))
10941089
@mock.patch('nova.objects.ComputeNode.create')
10951090
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
10961091
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
10971092
'_update')
10981093
def test_compute_node_created_on_empty_rebalance(self, update_mock,
10991094
get_mock,
11001095
create_mock,
1101-
pci_tracker_mock,
11021096
get_by_hypervisor_mock):
11031097
get_by_hypervisor_mock.return_value = []
11041098
self._test_compute_node_created(update_mock, get_mock, create_mock,
1105-
pci_tracker_mock,
11061099
get_by_hypervisor_mock,
11071100
rebalances_nodes=True)
11081101

11091102
@mock.patch('nova.objects.ComputeNodeList.get_by_hypervisor')
1110-
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
1111-
return_value=objects.PciDeviceList(objects=[]))
11121103
@mock.patch('nova.objects.ComputeNode.create')
11131104
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
11141105
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
11151106
'_update')
11161107
def test_compute_node_created_too_many(self, update_mock, get_mock,
1117-
create_mock, pci_tracker_mock,
1108+
create_mock,
11181109
get_by_hypervisor_mock):
11191110
get_by_hypervisor_mock.return_value = ["fake_node_1", "fake_node_2"]
11201111
self._test_compute_node_created(update_mock, get_mock, create_mock,
1121-
pci_tracker_mock,
11221112
get_by_hypervisor_mock,
11231113
rebalances_nodes=True)
11241114

1125-
def _test_compute_node_created(self, update_mock, get_mock,
1126-
create_mock, pci_tracker_mock,
1115+
def _test_compute_node_created(self, update_mock, get_mock, create_mock,
11271116
get_by_hypervisor_mock,
11281117
rebalances_nodes=False):
11291118
self.flags(cpu_allocation_ratio=1.0, ram_allocation_ratio=1.0,
@@ -1152,12 +1141,12 @@ def _test_compute_node_created(self, update_mock, get_mock,
11521141
'current_workload': 0,
11531142
'vcpus': 4,
11541143
'running_vms': 0,
1155-
'pci_passthrough_devices': '[]'
1144+
'pci_passthrough_devices': '[]',
1145+
'uuid': uuids.compute_node_uuid
11561146
}
11571147
# The expected compute represents the initial values used
11581148
# when creating a compute node.
11591149
expected_compute = objects.ComputeNode(
1160-
id=42,
11611150
host_ip=resources['host_ip'],
11621151
vcpus=resources['vcpus'],
11631152
memory_mb=resources['memory_mb'],
@@ -1177,20 +1166,11 @@ def _test_compute_node_created(self, update_mock, get_mock,
11771166
cpu_allocation_ratio=1.0,
11781167
disk_allocation_ratio=1.0,
11791168
stats={'failed_builds': 0},
1180-
pci_device_pools=objects.PciDevicePoolList(objects=[]),
11811169
uuid=uuids.compute_node_uuid
11821170
)
11831171

1184-
def set_cn_id():
1185-
# The PCI tracker needs the compute node's ID when starting up, so
1186-
# make sure that we set the ID value so we don't get a Cannot load
1187-
# 'id' in base class error
1188-
cn = self.rt.compute_nodes[_NODENAME]
1189-
cn.id = 42 # Has to be a number, not a mock
1190-
cn.uuid = uuids.compute_node_uuid
1191-
1192-
create_mock.side_effect = set_cn_id
1193-
self.rt._init_compute_node(mock.sentinel.ctx, resources)
1172+
with mock.patch.object(self.rt, '_setup_pci_tracker') as setup_pci:
1173+
self.rt._init_compute_node(mock.sentinel.ctx, resources)
11941174

11951175
cn = self.rt.compute_nodes[_NODENAME]
11961176
get_mock.assert_called_once_with(mock.sentinel.ctx, _HOSTNAME,
@@ -1202,22 +1182,47 @@ def set_cn_id():
12021182
get_by_hypervisor_mock.assert_not_called()
12031183
create_mock.assert_called_once_with()
12041184
self.assertTrue(obj_base.obj_equal_prims(expected_compute, cn))
1205-
pci_tracker_mock.assert_called_once_with(mock.sentinel.ctx,
1206-
42)
1185+
setup_pci.assert_called_once_with(mock.sentinel.ctx, cn, resources)
12071186
self.assertFalse(update_mock.called)
12081187

1188+
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
1189+
'_setup_pci_tracker')
1190+
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
1191+
side_effect=exc.ComputeHostNotFound(host=_HOSTNAME))
1192+
@mock.patch('nova.objects.ComputeNode.create',
1193+
side_effect=(test.TestingException, None))
1194+
def test_compute_node_create_fail_retry_works(self, mock_create, mock_get,
1195+
mock_setup_pci):
1196+
"""Tests that _init_compute_node will not save the ComputeNode object
1197+
in the compute_nodes dict if create() fails.
1198+
"""
1199+
self._setup_rt()
1200+
self.assertEqual({}, self.rt.compute_nodes)
1201+
ctxt = context.get_context()
1202+
# The first ComputeNode.create fails so rt.compute_nodes should
1203+
# remain empty.
1204+
resources = copy.deepcopy(_VIRT_DRIVER_AVAIL_RESOURCES)
1205+
resources['uuid'] = uuids.cn_uuid # for the LOG.info message
1206+
self.assertRaises(test.TestingException,
1207+
self.rt._init_compute_node, ctxt, resources)
1208+
self.assertEqual({}, self.rt.compute_nodes)
1209+
# Second create works so compute_nodes should have a mapping.
1210+
self.rt._init_compute_node(ctxt, resources)
1211+
self.assertIn(_NODENAME, self.rt.compute_nodes)
1212+
mock_get.assert_has_calls([mock.call(
1213+
ctxt, _HOSTNAME, _NODENAME)] * 2)
1214+
self.assertEqual(2, mock_create.call_count)
1215+
mock_setup_pci.assert_called_once_with(
1216+
ctxt, test.MatchType(objects.ComputeNode), resources)
1217+
12091218
@mock.patch('nova.objects.ComputeNodeList.get_by_hypervisor')
1210-
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
1211-
return_value=objects.PciDeviceList(objects=[]))
12121219
@mock.patch('nova.objects.ComputeNode.create')
12131220
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
12141221
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
12151222
'_update')
12161223
def test_node_removed(self, update_mock, get_mock,
1217-
create_mock, pci_tracker_mock,
1218-
get_by_hypervisor_mock):
1224+
create_mock, get_by_hypervisor_mock):
12191225
self._test_compute_node_created(update_mock, get_mock, create_mock,
1220-
pci_tracker_mock,
12211226
get_by_hypervisor_mock)
12221227
self.rt.old_resources[_NODENAME] = mock.sentinel.foo
12231228
self.assertIn(_NODENAME, self.rt.compute_nodes)

0 commit comments

Comments
 (0)