stackhpc · JohnGarbutt · Nov 19, 2019 · Apr 28, 2021 · Nov 19, 2019 · Nov 18, 2019
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
@@ -9896,6 +9896,8 @@ def update_available_resource(self, context, startup=False):
                                                             use_slave=True,
                                                             startup=startup)
 
+        self.rt.clean_compute_node_cache(compute_nodes_in_db)
+
         # Delete orphan compute node not reported by driver but still in db
         for cn in compute_nodes_in_db:
             if cn.hypervisor_hostname not in nodenames:
@@ -9904,17 +9906,31 @@ def update_available_resource(self, context, startup=False):
                          "nodes are %(nodes)s",
                          {'id': cn.id, 'hh': cn.hypervisor_hostname,
                           'nodes': nodenames})
-                cn.destroy()
-                self.rt.remove_node(cn.hypervisor_hostname)
-                # Delete the corresponding resource provider in placement,
-                # along with any associated allocations.
                 try:
-                    self.reportclient.delete_resource_provider(context, cn,
-                                                               cascade=True)
-                except keystone_exception.ClientException as e:
-                    LOG.error(
-                        "Failed to delete compute node resource provider "
-                        "for compute node %s: %s", cn.uuid, six.text_type(e))
+                    cn.destroy()
+                except exception.ObjectActionError:
+                    # NOTE(mgoddard): it's possible that another compute
+                    # service took ownership of this compute node since we
+                    # queried it due to a rebalance, and this will cause the
+                    # deletion to fail. Ignore the error in that case.
+                    LOG.info("Ignoring failure to delete orphan compute node "
+                             "%(id)s on hypervisor host %(hh)s due to "
+                             "possible node rebalance",
+                             {'id': cn.id, 'hh': cn.hypervisor_hostname})
+                    self.rt.remove_node(cn.hypervisor_hostname)
+                    self.reportclient.invalidate_resource_provider(cn.uuid)
+                else:
+                    self.rt.remove_node(cn.hypervisor_hostname)
+                    # Delete the corresponding resource provider in placement,
+                    # along with any associated allocations.
+                    try:
+                        self.reportclient.delete_resource_provider(
+                            context, cn, cascade=True)
+                    except keystone_exception.ClientException as e:
+                        LOG.error(
+                            "Failed to delete compute node resource provider "
+                            "for compute node %s: %s", cn.uuid,
+                            six.text_type(e))
 
         for nodename in nodenames:
             self._update_available_resource_for_node(context, nodename,

diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
@@ -1831,3 +1831,21 @@ def finish_evacuation(self, instance, node, migration):
         if migration:
             migration.status = 'done'
             migration.save()
+
+    @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
+    def clean_compute_node_cache(self, compute_nodes_in_db):
+        """Clean the compute node cache of any nodes that no longer exist.
+
+        :param compute_nodes_in_db: list of ComputeNode objects from the DB.
+        """
+        compute_nodes_in_db_nodenames = {cn.hypervisor_hostname
+                                         for cn in compute_nodes_in_db}
+        stale_cns = set(self.compute_nodes) - compute_nodes_in_db_nodenames
+
+        for stale_cn in stale_cns:
+            # NOTE(mgoddard): we have found a node in the cache that has no
+            # compute node in the DB. This could be due to a node rebalance
+            # where another compute service took ownership of the node. Clean
+            # up the cache.
+            self.remove_node(stale_cn)
+            self.reportclient.invalidate_resource_provider(stale_cn)
diff --git a/nova/conf/compute.py b/nova/conf/compute.py
@@ -959,6 +959,15 @@
 * Any integer >= 1 represents the maximum allowed. A value of 0 will cause the
   ``nova-compute`` service to fail to start, as 0 disk devices is an invalid
   configuration that would prevent instances from being able to boot.
+"""),
+    cfg.ListOpt('vmdk_allowed_types',
+                default=['streamOptimized', 'monolithicSparse'],
+                help="""
+A list of strings describing allowed VMDK "create-type" subformats
+that will be allowed. This is recommended to only include
+single-file-with-sparse-header variants to avoid potential host file
+exposure due to processing named extents. If this list is empty, then no
+form of VMDK image will be allowed.
 """),
 ]
 

diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py
@@ -345,6 +345,14 @@
 * :oslo.config:option:`DEFAULT.instances_path`
 * :oslo.config:option:`image_cache.subdirectory_name`
 * :oslo.config:option:`update_resources_interval`
+"""),
+    cfg.BoolOpt('skip_cpu_compare_on_dest',
+               default=False,
+               help="""
+With the libvirt driver, during live migration, skip comparing guest CPU
+with the destination host. When using QEMU >= 2.9 and libvirt >=
+4.4.0, libvirt will do the correct thing with respect to checking CPU
+compatibility on the destination host during live migration.
 """),
 ]
 

diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py
@@ -690,16 +690,29 @@ def compute_node_search_by_hypervisor(context, hypervisor_match):
 
 
 @pick_context_manager_writer
-def compute_node_create(context, values):
+def _compute_node_create(context, values):
     """Creates a new ComputeNode and populates the capacity fields
     with the most recent data.
     """
     convert_objects_related_datetimes(values)
 
     compute_node_ref = models.ComputeNode()
     compute_node_ref.update(values)
+    compute_node_ref.save(context.session)
+    return compute_node_ref
+
+
+# NOTE(mgoddard): We avoid decorating this with @pick_context_manager_writer,
+# so that we get a separate transaction in the exception handler. This avoids
+# an error message about inactive DB sessions during a transaction rollback.
+# See https://bugs.launchpad.net/nova/+bug/1853159.
+def compute_node_create(context, values):
+    """Creates a new ComputeNode and populates the capacity fields
+    with the most recent data. Will restore a soft deleted compute node if a
+    UUID has been explicitly requested.
+    """
     try:
-        compute_node_ref.save(context.session)
+        compute_node_ref = _compute_node_create(context, values)
     except db_exc.DBDuplicateEntry:
         with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
             # Check to see if we have a (soft) deleted ComputeNode with the
@@ -763,14 +776,24 @@ def compute_node_update(context, compute_id, values):
 
 
 @pick_context_manager_writer
-def compute_node_delete(context, compute_id):
+def compute_node_delete(context, compute_id, constraint=None):
     """Delete a ComputeNode record."""
-    result = model_query(context, models.ComputeNode).\
-             filter_by(id=compute_id).\
-             soft_delete(synchronize_session=False)
+    query = model_query(context, models.ComputeNode).filter_by(id=compute_id)
+
+    if constraint is not None:
+        query = constraint.apply(models.ComputeNode, query)
+
+    result = query.soft_delete(synchronize_session=False)
 
     if not result:
-        raise exception.ComputeHostNotFound(host=compute_id)
+        # The soft_delete could fail for one of two reasons:
+        # 1) The compute node no longer exists
+        # 2) The constraint, if specified, was not met
+        # Try to read the compute node and let it raise ComputeHostNotFound if
+        # 1) happened.
+        compute_node_get(context, compute_id)
+        # Else, raise ConstraintNotMet if 2) happened.
+        raise exception.ConstraintNotMet()
 
 
 @pick_context_manager_reader

diff --git a/nova/objects/compute_node.py b/nova/objects/compute_node.py
@@ -354,7 +354,20 @@ def save(self, prune_stats=False):
 
     @base.remotable
     def destroy(self):
-        db.compute_node_delete(self._context, self.id)
+        if self.obj_attr_is_set('host') and self.host:
+            # NOTE(melwitt): If our host is set, avoid a race between
+            # nova-computes during ironic driver node rebalances which can
+            # change node ownership.
+            constraint = db.constraint(host=db.equal_any(self.host))
+        else:
+            constraint = None
+
+        try:
+            sa_api.compute_node_delete(
+                self._context, self.id, constraint=constraint)
+        except exception.ConstraintNotMet:
+            raise exception.ObjectActionError(action='destroy',
+                                              reason='host changed')
 
     def update_from_virt_driver(self, resources):
         # NOTE(pmurray): the virt driver provides a dict of values that

diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py
@@ -676,11 +676,7 @@ def _delete_provider(self, rp_uuid, global_request_id=None):
             if resp:
                 LOG.info("Deleted resource provider %s", rp_uuid)
             # clean the caches
-            try:
-                self._provider_tree.remove(rp_uuid)
-            except ValueError:
-                pass
-            self._association_refresh_time.pop(rp_uuid, None)
+            self.invalidate_resource_provider(rp_uuid)
             return
 
         msg = ("[%(placement_req_id)s] Failed to delete resource provider "
@@ -2161,6 +2157,17 @@ def delete_resource_provider(self, context, compute_node, cascade=False):
             # for backward compatibility.
             pass
 
+    def invalidate_resource_provider(self, name_or_uuid):
+        """Invalidate the cache for a resource provider.
+
+        :param name_or_uuid: Name or UUID of the resource provider to look up.
+        """
+        try:
+            self._provider_tree.remove(name_or_uuid)
+        except ValueError:
+            pass
+        self._association_refresh_time.pop(name_or_uuid, None)
+
     def get_provider_by_name(self, context, name):
         """Queries the placement API for resource provider information matching
         a supplied name.

diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py
@@ -0,0 +1,171 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import mock
+
+from nova import context
+from nova import objects
+from nova.tests.functional import integrated_helpers
+
+
+class NodeRebalanceDeletedComputeNodeRaceTestCase(
+    integrated_helpers.ProviderUsageBaseTestCase,
+):
+    """Regression test for bug 1853009 observed in Rocky & later.
+
+    When an ironic node re-balances from one host to another, there can be a
+    race where the old host deletes the orphan compute node after the new host
+    has taken ownership of it which results in the new host failing to create
+    the compute node and resource provider because the ResourceTracker does not
+    detect a change.
+    """
+    # Make sure we're using the fake driver that has predictable uuids
+    # for each node.
+    compute_driver = 'fake.PredictableNodeUUIDDriver'
+
+    def _assert_hypervisor_api(self, nodename, expected_host):
+        # We should have one compute node shown by the API.
+        hypervisors = self.api.api_get(
+            '/os-hypervisors/detail'
+        ).body['hypervisors']
+        self.assertEqual(1, len(hypervisors), hypervisors)
+        hypervisor = hypervisors[0]
+        self.assertEqual(nodename, hypervisor['hypervisor_hostname'])
+        self.assertEqual(expected_host, hypervisor['service']['host'])
+
+    def _start_compute(self, host):
+        host = self.start_service('compute', host)
+        # Ironic compute driver has rebalances_nodes = True.
+        host.manager.driver.rebalances_nodes = True
+        return host
+
+    def setUp(self):
+        super(NodeRebalanceDeletedComputeNodeRaceTestCase, self).setUp()
+
+        self.nodename = 'fake-node'
+        self.ctxt = context.get_admin_context()
+
+    def test_node_rebalance_deleted_compute_node_race(self):
+        # Simulate a service running and then stopping. host_a runs, creates
+        # fake-node, then is stopped. The fake-node compute node is destroyed.
+        # This leaves a soft-deleted node in the DB.
+        host_a = self._start_compute('host_a')
+        host_a.manager.driver._set_nodes([self.nodename])
+        host_a.manager.update_available_resource(self.ctxt)
+        host_a.stop()
+        cn = objects.ComputeNode.get_by_host_and_nodename(
+            self.ctxt, 'host_a', self.nodename,
+        )
+        cn.destroy()
+
+        self.assertEqual(0, len(objects.ComputeNodeList.get_all(self.ctxt)))
+
+        # Now we create a new compute service to manage our node.
+        host_b = self._start_compute('host_b')
+        host_b.manager.driver._set_nodes([self.nodename])
+
+        # When start_service runs, it will create a host_b ComputeNode. We want
+        # to delete that and inject our fake node into the driver which will
+        # be re-balanced to another host later. First assert this actually
+        # exists.
+        self._assert_hypervisor_api('host_b', expected_host='host_b')
+
+        # Now run the update_available_resource periodic to register fake-node
+        # and have it managed by host_b. This will also detect the "host_b"
+        # node as orphaned and delete it along with its resource provider.
+
+        # host_b[1]: Finds no compute record in RT. Tries to create one
+        # (_init_compute_node).
+        host_b.manager.update_available_resource(self.ctxt)
+        self._assert_hypervisor_api(self.nodename, expected_host='host_b')
+        # There should only be one resource provider (fake-node).
+        original_rps = self._get_all_providers()
+        self.assertEqual(1, len(original_rps), original_rps)
+        self.assertEqual(self.nodename, original_rps[0]['name'])
+
+        # Simulate a re-balance by restarting host_a and make it manage
+        # fake-node. At this point both host_b and host_a think they own
+        # fake-node.
+        host_a = self._start_compute('host_a')
+        host_a.manager.driver._set_nodes([self.nodename])
+
+        # host_a[1]: Finds no compute record in RT, 'moves' existing node from
+        # host_b
+        host_a.manager.update_available_resource(self.ctxt)
+        # Assert that fake-node was re-balanced from host_b to host_a.
+        self.assertIn(
+            'ComputeNode fake-node moving from host_b to host_a',
+            self.stdlog.logger.output)
+        self._assert_hypervisor_api(self.nodename, expected_host='host_a')
+
+        # host_a[2]: Begins periodic update, queries compute nodes for this
+        # host, finds the fake-node.
+        cn = objects.ComputeNode.get_by_host_and_nodename(
+            self.ctxt, 'host_a', self.nodename,
+        )
+
+        # host_b[2]: Finds no compute record in RT, 'moves' existing node from
+        # host_a
+        host_b.manager.update_available_resource(self.ctxt)
+        # Assert that fake-node was re-balanced from host_a to host_b.
+        self.assertIn(
+            'ComputeNode fake-node moving from host_a to host_b',
+            self.stdlog.logger.output)
+        self._assert_hypervisor_api(self.nodename, expected_host='host_b')
+
+        # Complete rebalance, as host_a realises it does not own fake-node.
+        host_a.manager.driver._set_nodes([])
+
+        # host_a[2]: Deletes orphan compute node.
+        # Mock out the compute node query to simulate a race condition where
+        # the list includes an orphan compute node that is newly owned by
+        # host_b by the time host_a attempts to delete it.
+        with mock.patch(
+            'nova.compute.manager.ComputeManager._get_compute_nodes_in_db'
+        ) as mock_get:
+            mock_get.return_value = [cn]
+            host_a.manager.update_available_resource(self.ctxt)
+
+        # Verify that the node was almost deleted, but was saved by the host
+        # check
+        self.assertIn(
+            'Deleting orphan compute node %s hypervisor host '
+            'is fake-node, nodes are' % cn.id,
+            self.stdlog.logger.output)
+        self.assertIn(
+            'Ignoring failure to delete orphan compute node %s on '
+            'hypervisor host fake-node' % cn.id,
+            self.stdlog.logger.output)
+        self._assert_hypervisor_api(self.nodename, 'host_b')
+        rps = self._get_all_providers()
+        self.assertEqual(1, len(rps), rps)
+        self.assertEqual(self.nodename, rps[0]['name'])
+
+        # Simulate deletion of an orphan by host_a. It shouldn't happen
+        # anymore, but let's assume it already did.
+        cn = objects.ComputeNode.get_by_host_and_nodename(
+            self.ctxt, 'host_b', self.nodename)
+        cn.destroy()
+        host_a.manager.rt.remove_node(cn.hypervisor_hostname)
+        host_a.manager.reportclient.delete_resource_provider(
+            self.ctxt, cn, cascade=True)
+
+        # host_b[3]: Should recreate compute node and resource provider.
+        host_b.manager.update_available_resource(self.ctxt)
+
+        # Verify that the node was recreated.
+        self._assert_hypervisor_api(self.nodename, 'host_b')
+
+        # The resource provider has now been created.
+        rps = self._get_all_providers()
+        self.assertEqual(1, len(rps), rps)
+        self.assertEqual(self.nodename, rps[0]['name'])