Skip to content

Commit 665c053

Browse files
markgoddardmelwitt
authored andcommitted
Fix inactive session error in compute node creation
In the fix for bug 1839560 [1][2], soft-deleted compute nodes may be restored, to ensure we can reuse ironic node UUIDs as compute node UUIDs. While this seems to largely work, it results in some nasty errors being generated [3]: InvalidRequestError This session is in 'inactive' state, due to the SQL transaction being rolled back; no further SQL can be emitted within this transaction. This happens because compute_node_create is decorated with pick_context_manager_writer, which begins a transaction. While _compute_node_get_and_update_deleted claims that calling a second pick_context_manager_writer decorated function will begin a new subtransaction, this does not appear to be the case. This change removes pick_context_manager_writer from the compute_node_create function, and adds a new _compute_node_create function which ensures the transaction is finished if _compute_node_get_and_update_deleted is called. The new unit test added here fails without this change. This change marks the removal of the final FIXME from the functional test added in [4]. [1] https://bugs.launchpad.net/nova/+bug/1839560 [2] https://git.openstack.org/cgit/openstack/nova/commit/?id=89dd74ac7f1028daadf86cb18948e27fe9d1d411 [3] http://paste.openstack.org/show/786350/ [4] https://review.opendev.org/#/c/695012/ Conflicts: nova/db/sqlalchemy/api.py NOTE(melwitt): The conflict is because change I9f414cf831316b624132d9e06192f1ecbbd3dd78 (db: Copy docs from 'nova.db.*' to 'nova.db.sqlalchemy.*') is not in Wallaby. NOTE(melwitt): Difference from the cherry picked change from calling nova.db.api => nova.db.sqlalchemy.api directly are due to the alembic migration in Xena which looks to have made the nova.db.api interface obsolete. Change-Id: Iae119ea8776bc7f2e5dbe2e502a743217beded73 Closes-Bug: #1853159 Related-Bug: #1853009 (cherry picked from commit 2383cbb)
1 parent cbbca58 commit 665c053

File tree

3 files changed

+31
-35
lines changed

3 files changed

+31
-35
lines changed

nova/db/sqlalchemy/api.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,16 +688,29 @@ def compute_node_search_by_hypervisor(context, hypervisor_match):
688688

689689

690690
@pick_context_manager_writer
691-
def compute_node_create(context, values):
691+
def _compute_node_create(context, values):
692692
"""Creates a new ComputeNode and populates the capacity fields
693693
with the most recent data.
694694
"""
695695
convert_objects_related_datetimes(values)
696696

697697
compute_node_ref = models.ComputeNode()
698698
compute_node_ref.update(values)
699+
compute_node_ref.save(context.session)
700+
return compute_node_ref
701+
702+
703+
# NOTE(mgoddard): We avoid decorating this with @pick_context_manager_writer,
704+
# so that we get a separate transaction in the exception handler. This avoids
705+
# an error message about inactive DB sessions during a transaction rollback.
706+
# See https://bugs.launchpad.net/nova/+bug/1853159.
707+
def compute_node_create(context, values):
708+
"""Creates a new ComputeNode and populates the capacity fields
709+
with the most recent data. Will restore a soft deleted compute node if a
710+
UUID has been explicitly requested.
711+
"""
699712
try:
700-
compute_node_ref.save(context.session)
713+
compute_node_ref = _compute_node_create(context, values)
701714
except db_exc.DBDuplicateEntry:
702715
with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
703716
# Check to see if we have a (soft) deleted ComputeNode with the

nova/tests/functional/regressions/test_bug_1853009.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,6 @@ def test_node_rebalance_deleted_compute_node_race(self):
8585

8686
# host_b[1]: Finds no compute record in RT. Tries to create one
8787
# (_init_compute_node).
88-
# FIXME(mgoddard): This shows a traceback with SQL rollback due to
89-
# soft-deleted node. The create seems to succeed but breaks the RT
90-
# update for this node. See
91-
# https://bugs.launchpad.net/nova/+bug/1853159.
9288
host_b.manager.update_available_resource(self.ctxt)
9389
self._assert_hypervisor_api(self.nodename, expected_host='host_b')
9490
# There should only be one resource provider (fake-node).
@@ -164,41 +160,12 @@ def test_node_rebalance_deleted_compute_node_race(self):
164160
self.ctxt, cn, cascade=True)
165161

166162
# host_b[3]: Should recreate compute node and resource provider.
167-
# FIXME(mgoddard): Resource provider not recreated here, due to
168-
# https://bugs.launchpad.net/nova/+bug/1853159.
169163
host_b.manager.update_available_resource(self.ctxt)
170164

171165
# Verify that the node was recreated.
172166
self._assert_hypervisor_api(self.nodename, 'host_b')
173167

174-
# But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute
175-
# node is not cached in the RT.
176-
self.assertNotIn(self.nodename, host_b.manager.rt.compute_nodes)
177-
178-
# There is no RP.
179-
rps = self._get_all_providers()
180-
self.assertEqual(0, len(rps), rps)
181-
182-
# But the RP exists in the provider tree.
183-
self.assertFalse(host_b.manager.rt.reportclient._provider_tree.exists(
184-
self.nodename))
185-
186-
# host_b[1]: Should add compute node to RT cache and recreate resource
187-
# provider.
188-
host_b.manager.update_available_resource(self.ctxt)
189-
190-
# Verify that the node still exists.
191-
self._assert_hypervisor_api(self.nodename, 'host_b')
192-
193-
# And it is now in the RT cache.
194-
self.assertIn(self.nodename, host_b.manager.rt.compute_nodes)
195-
196168
# The resource provider has now been created.
197169
rps = self._get_all_providers()
198170
self.assertEqual(1, len(rps), rps)
199171
self.assertEqual(self.nodename, rps[0]['name'])
200-
201-
# This fails due to the lack of a resource provider.
202-
self.assertIn(
203-
'Skipping removal of allocations for deleted instances',
204-
self.stdlog.logger.output)

nova/tests/unit/db/test_db_api.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5257,6 +5257,22 @@ def test_compute_node_create_duplicate_host_hypervisor_hostname(self):
52575257
self.assertRaises(db_exc.DBDuplicateEntry,
52585258
db.compute_node_create, self.ctxt, other_node)
52595259

5260+
def test_compute_node_create_duplicate_uuid(self):
5261+
"""Tests to make sure that no exception is raised when trying to create
5262+
a compute node with the same host, hypervisor_hostname and uuid values
5263+
as another compute node that was previously soft-deleted.
5264+
"""
5265+
# Prior to fixing https://bugs.launchpad.net/nova/+bug/1853159, this
5266+
# raised the following error:
5267+
# sqlalchemy.exc.InvalidRequestError: This session is in 'inactive'
5268+
# state, due to the SQL transaction being rolled back; no further SQL
5269+
# can be emitted within this transaction.
5270+
constraint = db.constraint(host=db.equal_any(self.item['host']))
5271+
sqlalchemy_api.compute_node_delete(
5272+
self.ctxt, self.item['id'], constraint=constraint)
5273+
new_node = db.compute_node_create(self.ctxt, self.compute_node_dict)
5274+
self.assertEqual(self.item['uuid'], new_node['uuid'])
5275+
52605276
def test_compute_node_get_all(self):
52615277
nodes = db.compute_node_get_all(self.ctxt)
52625278
self.assertEqual(1, len(nodes))

0 commit comments

Comments
 (0)