Skip to content

Commit f548fa5

Browse files
committed
Fix DB deadlock in quotas
There's a potential deadlock when creating the first LB in a new project. The query that creates an empty quota object for the project may have triggered a deadlock in the DB, this deadlock should have been handlded by an oslo_db.api.wrab_db_retry decorator but the decorated function was incorrect called: the DB session must begin in the decorated function and not in the caller. If begin is called by the caller, in case of retry sqlalchemy will complain that the session was not correctly restarted after a rollback. Closes-Bug: #2038798 Change-Id: Iec1f8f8f61d77640d4a33d7ffb62925f7e860286 (cherry picked from commit 333f035)
1 parent 56e4545 commit f548fa5

File tree

3 files changed

+67
-12
lines changed

3 files changed

+67
-12
lines changed

octavia/db/repositories.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from octavia.common import data_models
4141
from octavia.common import exceptions
4242
from octavia.common import validate
43+
from octavia.db import api as db_api
4344
from octavia.db import models
4445

4546
CONF = cfg.CONF
@@ -390,13 +391,8 @@ def check_quota_met(self, session, lock_session, _class, project_id,
390391
if not project_id:
391392
raise exceptions.MissingProjectID()
392393

393-
quotas = self.quotas.get(session, project_id=project_id)
394-
if not quotas:
395-
# Make sure we have a record to lock
396-
self.quotas.update(
397-
session,
398-
project_id,
399-
quota={})
394+
self.quotas.ensure_project_exists(project_id)
395+
400396
# Lock the project record in the database to block other quota checks
401397
#
402398
# Note: You cannot just use the current count as the in-use
@@ -1883,11 +1879,6 @@ def delete(self, session, id, **filters):
18831879
class QuotasRepository(BaseRepository):
18841880
model_class = models.Quotas
18851881

1886-
# Since this is for the initial quota record creation it locks the table
1887-
# which can lead to recoverable deadlocks. Thus we use the deadlock
1888-
# retry wrapper here. This may not be appropriate for other sessions
1889-
# and or queries. Use with caution.
1890-
@oslo_db_api.wrap_db_retry(max_retries=5, retry_on_deadlock=True)
18911882
def update(self, session, project_id, **model_kwargs):
18921883
kwargs_quota = model_kwargs['quota']
18931884
quotas = (
@@ -1904,6 +1895,19 @@ def update(self, session, project_id, **model_kwargs):
19041895
session.flush()
19051896
return self.get(session, project_id=project_id)
19061897

1898+
# Since this is for the initial quota record creation it locks the table
1899+
# which can lead to recoverable deadlocks. Thus we use the deadlock
1900+
# retry wrapper here. This may not be appropriate for other sessions
1901+
# and or queries. Use with caution.
1902+
@oslo_db_api.wrap_db_retry(max_retries=5, retry_on_deadlock=True)
1903+
def ensure_project_exists(self, project_id):
1904+
with db_api.session().begin() as session:
1905+
quotas = self.get(session, project_id=project_id)
1906+
if not quotas:
1907+
# Make sure we have a record to lock
1908+
self.update(session, project_id, quota={})
1909+
session.commit()
1910+
19071911
def delete(self, session, project_id):
19081912
quotas = (
19091913
session.query(self.model_class)

octavia/tests/functional/db/base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030

3131
class OctaviaDBTestBase(test_base.BaseTestCase):
3232

33+
facade = None
34+
3335
def setUp(self, connection_string='sqlite://'):
3436
super().setUp()
3537

@@ -73,11 +75,18 @@ def _get_db_engine_session(self):
7375
sqlite_fk=True)
7476
engine = facade.get_engine()
7577
session = facade.get_session(expire_on_commit=True)
78+
self.facade = facade
7679
else:
7780
engine = db_api.get_engine()
7881
session = db_api.get_session()
7982
return engine, session
8083

84+
def get_session(self):
85+
if 'sqlite:///' in self.connection_string:
86+
return self.facade.get_session(expire_on_commit=True)
87+
else:
88+
return db_api.get_session()
89+
8190
def _seed_lookup_tables(self, session):
8291
self._seed_lookup_table(
8392
session, constants.SUPPORTED_PROVISIONING_STATUSES,

octavia/tests/functional/db/test_repositories.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# License for the specific language governing permissions and limitations
1313
# under the License.
1414

15+
from concurrent.futures import ThreadPoolExecutor
1516
import datetime
1617
import random
1718
from unittest import mock
@@ -509,6 +510,47 @@ def test_sqlite_transactions_broken(self):
509510
project_id=project_id).all()
510511
self.assertEqual(1, len(lbs)) # After rollback: 1 (broken!)
511512

513+
def test_check_quota_met_check_deadlock(self):
514+
# This test doesn't work with sqlite, using another backend is not
515+
# straighforward, we need to update the connection_string passed to the
516+
# __init__ func and also change some calls in the constructor (don't
517+
# create the DB objects if we use a DB that was deployed for Octavia)
518+
if 'sqlite://' in self.connection_string:
519+
self.skipTest("The test for checking potential deadlocks "
520+
"doesn't work with the sqlite backend")
521+
522+
conf = self.useFixture(oslo_fixture.Config(cfg.CONF))
523+
conf.config(group='api_settings', auth_strategy=constants.TESTING)
524+
conf.config(group='quotas', default_load_balancer_quota=-1)
525+
526+
# Calling check_quota_met concurrently from many threads may
527+
# have triggered a deadlock in the DB
528+
# (Note: we run the test 8 times because it's not 100% reproducible)
529+
# https://bugs.launchpad.net/octavia/+bug/2038798
530+
for _ in range(8):
531+
number_of_projects = 8
532+
project_ids = (
533+
uuidutils.generate_uuid()
534+
for _ in range(number_of_projects))
535+
536+
with ThreadPoolExecutor(
537+
max_workers=number_of_projects) as executor:
538+
def _test_check_quota_met(project_id):
539+
session = self.get_session()
540+
session.begin()
541+
self.assertFalse(self.repos.check_quota_met(
542+
session, data_models.LoadBalancer,
543+
project_id))
544+
session.commit()
545+
546+
futs = []
547+
for project_id in project_ids:
548+
future = executor.submit(_test_check_quota_met, project_id)
549+
futs.append(future)
550+
551+
for fut in futs:
552+
fut.result()
553+
512554
def test_check_quota_met(self):
513555

514556
project_id = uuidutils.generate_uuid()

0 commit comments

Comments
 (0)