Skip to content

Commit 2383cbb

Browse files
markgoddardlyarwood
authored andcommitted
Fix inactive session error in compute node creation
In the fix for bug 1839560 [1][2], soft-deleted compute nodes may be restored, to ensure we can reuse ironic node UUIDs as compute node UUIDs. While this seems to largely work, it results in some nasty errors being generated [3]: InvalidRequestError This session is in 'inactive' state, due to the SQL transaction being rolled back; no further SQL can be emitted within this transaction. This happens because compute_node_create is decorated with pick_context_manager_writer, which begins a transaction. While _compute_node_get_and_update_deleted claims that calling a second pick_context_manager_writer decorated function will begin a new subtransaction, this does not appear to be the case. This change removes pick_context_manager_writer from the compute_node_create function, and adds a new _compute_node_create function which ensures the transaction is finished if _compute_node_get_and_update_deleted is called. The new unit test added here fails without this change. This change marks the removal of the final FIXME from the functional test added in [4]. [1] https://bugs.launchpad.net/nova/+bug/1839560 [2] https://git.openstack.org/cgit/openstack/nova/commit/?id=89dd74ac7f1028daadf86cb18948e27fe9d1d411 [3] http://paste.openstack.org/show/786350/ [4] https://review.opendev.org/#/c/695012/ Change-Id: Iae119ea8776bc7f2e5dbe2e502a743217beded73 Closes-Bug: #1853159 Related-Bug: #1853009
1 parent a8492e8 commit 2383cbb

File tree

3 files changed

+31
-35
lines changed

3 files changed

+31
-35
lines changed

nova/db/main/api.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,7 @@ def compute_node_search_by_hypervisor(context, hypervisor_match):
768768

769769

770770
@pick_context_manager_writer
771-
def compute_node_create(context, values):
771+
def _compute_node_create(context, values):
772772
"""Create a compute node from the values dictionary.
773773
774774
:param context: The security context
@@ -781,8 +781,21 @@ def compute_node_create(context, values):
781781

782782
compute_node_ref = models.ComputeNode()
783783
compute_node_ref.update(values)
784+
compute_node_ref.save(context.session)
785+
return compute_node_ref
786+
787+
788+
# NOTE(mgoddard): We avoid decorating this with @pick_context_manager_writer,
789+
# so that we get a separate transaction in the exception handler. This avoids
790+
# an error message about inactive DB sessions during a transaction rollback.
791+
# See https://bugs.launchpad.net/nova/+bug/1853159.
792+
def compute_node_create(context, values):
793+
"""Creates a new ComputeNode and populates the capacity fields
794+
with the most recent data. Will restore a soft deleted compute node if a
795+
UUID has been explicitly requested.
796+
"""
784797
try:
785-
compute_node_ref.save(context.session)
798+
compute_node_ref = _compute_node_create(context, values)
786799
except db_exc.DBDuplicateEntry:
787800
with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
788801
# Check to see if we have a (soft) deleted ComputeNode with the

nova/tests/functional/regressions/test_bug_1853009.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,6 @@ def test_node_rebalance_deleted_compute_node_race(self):
8585

8686
# host_b[1]: Finds no compute record in RT. Tries to create one
8787
# (_init_compute_node).
88-
# FIXME(mgoddard): This shows a traceback with SQL rollback due to
89-
# soft-deleted node. The create seems to succeed but breaks the RT
90-
# update for this node. See
91-
# https://bugs.launchpad.net/nova/+bug/1853159.
9288
host_b.manager.update_available_resource(self.ctxt)
9389
self._assert_hypervisor_api(self.nodename, expected_host='host_b')
9490
# There should only be one resource provider (fake-node).
@@ -164,41 +160,12 @@ def test_node_rebalance_deleted_compute_node_race(self):
164160
self.ctxt, cn, cascade=True)
165161

166162
# host_b[3]: Should recreate compute node and resource provider.
167-
# FIXME(mgoddard): Resource provider not recreated here, due to
168-
# https://bugs.launchpad.net/nova/+bug/1853159.
169163
host_b.manager.update_available_resource(self.ctxt)
170164

171165
# Verify that the node was recreated.
172166
self._assert_hypervisor_api(self.nodename, 'host_b')
173167

174-
# But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute
175-
# node is not cached in the RT.
176-
self.assertNotIn(self.nodename, host_b.manager.rt.compute_nodes)
177-
178-
# There is no RP.
179-
rps = self._get_all_providers()
180-
self.assertEqual(0, len(rps), rps)
181-
182-
# But the RP exists in the provider tree.
183-
self.assertFalse(host_b.manager.rt.reportclient._provider_tree.exists(
184-
self.nodename))
185-
186-
# host_b[1]: Should add compute node to RT cache and recreate resource
187-
# provider.
188-
host_b.manager.update_available_resource(self.ctxt)
189-
190-
# Verify that the node still exists.
191-
self._assert_hypervisor_api(self.nodename, 'host_b')
192-
193-
# And it is now in the RT cache.
194-
self.assertIn(self.nodename, host_b.manager.rt.compute_nodes)
195-
196168
# The resource provider has now been created.
197169
rps = self._get_all_providers()
198170
self.assertEqual(1, len(rps), rps)
199171
self.assertEqual(self.nodename, rps[0]['name'])
200-
201-
# This fails due to the lack of a resource provider.
202-
self.assertIn(
203-
'Skipping removal of allocations for deleted instances',
204-
self.stdlog.logger.output)

nova/tests/unit/db/main/test_api.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5183,6 +5183,22 @@ def test_compute_node_create_duplicate_host_hypervisor_hostname(self):
51835183
self.assertRaises(db_exc.DBDuplicateEntry,
51845184
db.compute_node_create, self.ctxt, other_node)
51855185

5186+
def test_compute_node_create_duplicate_uuid(self):
5187+
"""Tests to make sure that no exception is raised when trying to create
5188+
a compute node with the same host, hypervisor_hostname and uuid values
5189+
as another compute node that was previously soft-deleted.
5190+
"""
5191+
# Prior to fixing https://bugs.launchpad.net/nova/+bug/1853159, this
5192+
# raised the following error:
5193+
# sqlalchemy.exc.InvalidRequestError: This session is in 'inactive'
5194+
# state, due to the SQL transaction being rolled back; no further SQL
5195+
# can be emitted within this transaction.
5196+
constraint = db.constraint(host=db.equal_any(self.item['host']))
5197+
db.compute_node_delete(
5198+
self.ctxt, self.item['id'], constraint=constraint)
5199+
new_node = db.compute_node_create(self.ctxt, self.compute_node_dict)
5200+
self.assertEqual(self.item['uuid'], new_node['uuid'])
5201+
51865202
def test_compute_node_get_all(self):
51875203
nodes = db.compute_node_get_all(self.ctxt)
51885204
self.assertEqual(1, len(nodes))

0 commit comments

Comments
 (0)