|
| 1 | +# Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 2 | +# not use this file except in compliance with the License. You may obtain |
| 3 | +# a copy of the License at |
| 4 | +# |
| 5 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 6 | +# |
| 7 | +# Unless required by applicable law or agreed to in writing, software |
| 8 | +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 9 | +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 10 | +# License for the specific language governing permissions and limitations |
| 11 | +# under the License. |
| 12 | + |
| 13 | +from oslo_log import log as logging |
| 14 | + |
| 15 | +from nova import context |
| 16 | +from nova.db import api as db_api |
| 17 | +from nova import exception |
| 18 | +from nova import objects |
| 19 | +from nova import test |
| 20 | +from nova.tests import fixtures as nova_fixtures |
| 21 | +from nova.tests.functional import integrated_helpers |
| 22 | +from nova import utils |
| 23 | +from nova.virt import fake as fake_virt |
| 24 | + |
| 25 | +LOG = logging.getLogger(__name__) |
| 26 | + |
| 27 | + |
| 28 | +class PeriodicNodeRecreateTestCase(test.TestCase, |
| 29 | + integrated_helpers.InstanceHelperMixin): |
| 30 | + """Regression test for bug 1839560 introduced in Rocky. |
| 31 | +
|
| 32 | + When an ironic node is undergoing maintenance the driver will not report |
| 33 | + it as an available node to the ComputeManager.update_available_resource |
| 34 | + periodic task. The ComputeManager will then (soft) delete a ComputeNode |
| 35 | + record for that no-longer-available node. If/when the ironic node is |
| 36 | + available again and the driver reports it, the ResourceTracker will attempt |
| 37 | + to create a ComputeNode record for the ironic node. |
| 38 | +
|
| 39 | + The regression with change Ia69fabce8e7fd7de101e291fe133c6f5f5f7056a is |
| 40 | + that the ironic node uuid is used as the ComputeNode.uuid and there is |
| 41 | + a unique constraint on the ComputeNode.uuid value in the database. So |
| 42 | + trying to create a ComputeNode with the same uuid (after the ironic node |
| 43 | + comes back from being unavailable) fails with a DuplicateEntry error since |
| 44 | + there is a (soft) deleted version of the ComputeNode with the same uuid |
| 45 | + in the database. |
| 46 | + """ |
| 47 | + def setUp(self): |
| 48 | + super(PeriodicNodeRecreateTestCase, self).setUp() |
| 49 | + # We need the PlacementFixture for the compute nodes to report in but |
| 50 | + # otherwise don't care about placement for this test. |
| 51 | + self.useFixture(nova_fixtures.PlacementFixture()) |
| 52 | + # Start up the API so we can query the os-hypervisors API. |
| 53 | + self.api = self.useFixture(nova_fixtures.OSAPIFixture( |
| 54 | + api_version='v2.1')).admin_api |
| 55 | + # Make sure we're using the fake driver that has predictable uuids |
| 56 | + # for each node. |
| 57 | + self.flags(compute_driver='fake.PredictableNodeUUIDDriver') |
| 58 | + |
| 59 | + def test_update_available_resource_node_recreate(self): |
| 60 | + # First we create a compute service to manage a couple of fake nodes. |
| 61 | + # When start_service runs, it will create the node1 and node2 |
| 62 | + # ComputeNodes. |
| 63 | + fake_virt.set_nodes(['node1', 'node2']) |
| 64 | + self.addCleanup(fake_virt.restore_nodes) |
| 65 | + compute = self.start_service('compute', 'node1') |
| 66 | + # Now we should have two compute nodes, make sure the hypervisors API |
| 67 | + # shows them. |
| 68 | + hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors'] |
| 69 | + self.assertEqual(2, len(hypervisors), hypervisors) |
| 70 | + self.assertEqual({'node1', 'node2'}, |
| 71 | + set([hyp['hypervisor_hostname'] |
| 72 | + for hyp in hypervisors])) |
| 73 | + # Now stub the driver to only report node1. This is making it look like |
| 74 | + # node2 is no longer available when update_available_resource runs. |
| 75 | + compute.manager.driver._nodes = ['node1'] |
| 76 | + ctxt = context.get_admin_context() |
| 77 | + compute.manager.update_available_resource(ctxt) |
| 78 | + # node2 should have been deleted, check the logs and API. |
| 79 | + log = self.stdlog.logger.output |
| 80 | + self.assertIn('Deleting orphan compute node', log) |
| 81 | + self.assertIn('hypervisor host is node2', log) |
| 82 | + hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors'] |
| 83 | + self.assertEqual(1, len(hypervisors), hypervisors) |
| 84 | + self.assertEqual('node1', hypervisors[0]['hypervisor_hostname']) |
| 85 | + # But the node2 ComputeNode is still in the database with deleted!=0. |
| 86 | + with utils.temporary_mutation(ctxt, read_deleted='yes'): |
| 87 | + cn = objects.ComputeNode.get_by_host_and_nodename( |
| 88 | + ctxt, 'node1', 'node2') |
| 89 | + self.assertTrue(cn.deleted) |
| 90 | + # Now stub the driver again to report node2 as being back and run |
| 91 | + # the periodic task. |
| 92 | + compute.manager.driver._nodes = ['node1', 'node2'] |
| 93 | + compute.manager.update_available_resource(ctxt) |
| 94 | + # FIXME(mriedem): This is bug 1839560 where the ResourceTracker fails |
| 95 | + # to create a ComputeNode for node2 because of conflicting UUIDs. |
| 96 | + log = self.stdlog.logger.output |
| 97 | + self.assertIn('Error updating resources for node node2', log) |
| 98 | + self.assertIn('DBDuplicateEntry', log) |
| 99 | + # Should still only have one reported hypervisor (node1). |
| 100 | + hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors'] |
| 101 | + self.assertEqual(1, len(hypervisors), hypervisors) |
| 102 | + # Test the workaround for bug 1839560 by archiving the deleted node2 |
| 103 | + # compute_nodes table record which will allow the periodic to create a |
| 104 | + # new entry for node2. We can remove this when the bug is fixed. |
| 105 | + LOG.info('Archiving the database.') |
| 106 | + archived = db_api.archive_deleted_rows(1000)[0] |
| 107 | + self.assertIn('compute_nodes', archived) |
| 108 | + self.assertEqual(1, archived['compute_nodes']) |
| 109 | + with utils.temporary_mutation(ctxt, read_deleted='yes'): |
| 110 | + self.assertRaises(exception.ComputeHostNotFound, |
| 111 | + objects.ComputeNode.get_by_host_and_nodename, |
| 112 | + ctxt, 'node1', 'node2') |
| 113 | + # Now run the periodic again and we should have a new ComputeNode for |
| 114 | + # node2. |
| 115 | + LOG.info('Running update_available_resource which should create a new ' |
| 116 | + 'ComputeNode record for node2.') |
| 117 | + compute.manager.update_available_resource(ctxt) |
| 118 | + hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors'] |
| 119 | + self.assertEqual(2, len(hypervisors), hypervisors) |
0 commit comments