Skip to content

Commit 0fbfc26

Browse files
mriedempriteau
authored andcommitted
Restore soft-deleted compute node with same uuid
There is a unique index on the compute_nodes.uuid column which means we can't have more than one compute_nodes record in the same DB with the same UUID even if one is soft deleted because the deleted column is not part of that unique index constraint. This is a problem with ironic nodes where the node is 1:1 with the compute node record, and when a node is undergoing maintenance the driver doesn't return it from get_available_nodes() so the ComputeManager.update_available_resource periodic task (soft) deletes the compute node record, but when the node is no longer under maintenance in ironic and the driver reports it, the ResourceTracker._init_compute_node code will fail to create the ComputeNode record again because of the duplicate uuid. This change handles the DBDuplicateEntry error in compute_node_create by finding the soft-deleted compute node with the same uuid and simply updating it to no longer be (soft) deleted. Closes-Bug: #1839560 Change-Id: Iafba419fe86446ffe636721f523fb619f8f787b3 (cherry picked from commit 8b00726) (cherry picked from commit 1b02166) (cherry picked from commit 9ce9484)
1 parent 62aac69 commit 0fbfc26

File tree

3 files changed

+74
-24
lines changed

3 files changed

+74
-24
lines changed

nova/db/sqlalchemy/api.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from oslo_db.sqlalchemy import update_match
3131
from oslo_db.sqlalchemy import utils as sqlalchemyutils
3232
from oslo_log import log as logging
33+
from oslo_utils import excutils
3334
from oslo_utils import importutils
3435
from oslo_utils import timeutils
3536
from oslo_utils import uuidutils
@@ -693,11 +694,54 @@ def compute_node_create(context, values):
693694

694695
compute_node_ref = models.ComputeNode()
695696
compute_node_ref.update(values)
696-
compute_node_ref.save(context.session)
697+
try:
698+
compute_node_ref.save(context.session)
699+
except db_exc.DBDuplicateEntry:
700+
with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
701+
# Check to see if we have a (soft) deleted ComputeNode with the
702+
# same UUID and if so just update it and mark as no longer (soft)
703+
# deleted. See bug 1839560 for details.
704+
if 'uuid' in values:
705+
# Get a fresh context for a new DB session and allow it to
706+
# get a deleted record.
707+
ctxt = nova.context.get_admin_context(read_deleted='yes')
708+
compute_node_ref = _compute_node_get_and_update_deleted(
709+
ctxt, values)
710+
# If we didn't get anything back we failed to find the node
711+
# by uuid and update it so re-raise the DBDuplicateEntry.
712+
if compute_node_ref:
713+
err_ctx.reraise = False
697714

698715
return compute_node_ref
699716

700717

718+
@pick_context_manager_writer
719+
def _compute_node_get_and_update_deleted(context, values):
720+
"""Find a ComputeNode by uuid, update and un-delete it.
721+
722+
This is a special case from the ``compute_node_create`` method which
723+
needs to be separate to get a new Session.
724+
725+
This method will update the ComputeNode, if found, to have deleted=0 and
726+
deleted_at=None values.
727+
728+
:param context: request auth context which should be able to read deleted
729+
records
730+
:param values: values used to update the ComputeNode record - must include
731+
uuid
732+
:return: updated ComputeNode sqlalchemy model object if successfully found
733+
and updated, None otherwise
734+
"""
735+
cn = model_query(
736+
context, models.ComputeNode).filter_by(uuid=values['uuid']).first()
737+
if cn:
738+
# Update with the provided values but un-soft-delete.
739+
update_values = copy.deepcopy(values)
740+
update_values['deleted'] = 0
741+
update_values['deleted_at'] = None
742+
return compute_node_update(context, cn.id, update_values)
743+
744+
701745
@oslo_db_api.wrap_db_retry(max_retries=5, retry_on_deadlock=True)
702746
@pick_context_manager_writer
703747
def compute_node_update(context, compute_id, values):

nova/tests/functional/regressions/test_bug_1839560.py

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from nova import context
1616
from nova.db import api as db_api
17-
from nova import exception
1817
from nova import objects
1918
from nova import test
2019
from nova.tests import fixtures as nova_fixtures
@@ -90,30 +89,25 @@ def test_update_available_resource_node_recreate(self):
9089
# Now stub the driver again to report node2 as being back and run
9190
# the periodic task.
9291
compute.manager.driver._nodes = ['node1', 'node2']
92+
LOG.info('Running update_available_resource which should bring back '
93+
'node2.')
9394
compute.manager.update_available_resource(ctxt)
94-
# FIXME(mriedem): This is bug 1839560 where the ResourceTracker fails
95-
# to create a ComputeNode for node2 because of conflicting UUIDs.
95+
# The DBDuplicateEntry error should have been handled and resulted in
96+
# updating the (soft) deleted record to no longer be deleted.
9697
log = self.stdlog.logger.output
97-
self.assertIn('Error updating resources for node node2', log)
98-
self.assertIn('DBDuplicateEntry', log)
99-
# Should still only have one reported hypervisor (node1).
98+
self.assertNotIn('DBDuplicateEntry', log)
99+
# Should have two reported hypervisors again.
100100
hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
101-
self.assertEqual(1, len(hypervisors), hypervisors)
102-
# Test the workaround for bug 1839560 by archiving the deleted node2
103-
# compute_nodes table record which will allow the periodic to create a
104-
# new entry for node2. We can remove this when the bug is fixed.
101+
self.assertEqual(2, len(hypervisors), hypervisors)
102+
# Now that the node2 record was un-soft-deleted, archiving should not
103+
# remove any compute_nodes.
105104
LOG.info('Archiving the database.')
106105
archived = db_api.archive_deleted_rows(1000)[0]
107-
self.assertIn('compute_nodes', archived)
108-
self.assertEqual(1, archived['compute_nodes'])
109-
with utils.temporary_mutation(ctxt, read_deleted='yes'):
110-
self.assertRaises(exception.ComputeHostNotFound,
111-
objects.ComputeNode.get_by_host_and_nodename,
112-
ctxt, 'node1', 'node2')
113-
# Now run the periodic again and we should have a new ComputeNode for
114-
# node2.
115-
LOG.info('Running update_available_resource which should create a new '
116-
'ComputeNode record for node2.')
117-
compute.manager.update_available_resource(ctxt)
118-
hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
119-
self.assertEqual(2, len(hypervisors), hypervisors)
106+
self.assertNotIn('compute_nodes', archived)
107+
cn2 = objects.ComputeNode.get_by_host_and_nodename(
108+
ctxt, 'node1', 'node2')
109+
self.assertFalse(cn2.deleted)
110+
self.assertIsNone(cn2.deleted_at)
111+
# The node2 id and uuid should not have changed in the DB.
112+
self.assertEqual(cn.id, cn2.id)
113+
self.assertEqual(cn.uuid, cn2.uuid)

nova/tests/unit/db/test_db_api.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6882,6 +6882,18 @@ def test_compute_node_create(self):
68826882
new_stats = jsonutils.loads(self.item['stats'])
68836883
self.assertEqual(self.stats, new_stats)
68846884

6885+
def test_compute_node_create_duplicate_host_hypervisor_hostname(self):
6886+
"""Tests to make sure that DBDuplicateEntry is raised when trying to
6887+
create a duplicate ComputeNode with the same host and
6888+
hypervisor_hostname values but different uuid values. This makes
6889+
sure that when _compute_node_get_and_update_deleted returns None
6890+
the DBDuplicateEntry is re-raised.
6891+
"""
6892+
other_node = dict(self.compute_node_dict)
6893+
other_node['uuid'] = uuidutils.generate_uuid()
6894+
self.assertRaises(db_exc.DBDuplicateEntry,
6895+
db.compute_node_create, self.ctxt, other_node)
6896+
68856897
def test_compute_node_get_all(self):
68866898
nodes = db.compute_node_get_all(self.ctxt)
68876899
self.assertEqual(1, len(nodes))

0 commit comments

Comments
 (0)