Skip to content

Commit d1fd3f7

Browse files
authored
PYTHON-2363 Rate limit new connection creations via maxConnecting (#511)
At most 2 connections can be in the pending state per connection pool. The pending state covers all the work required to setup a new connection including TCP, TLS, and MongoDB authentication. For example, if two threads are currently creating connections, a third thread will wait for either an existing connection to be checked back into the pool or for one of the two threads to finish creating a connection. The change reduces the likelihood of connection storms and improves the driver's ability to reuse existing connections.
1 parent 86d5811 commit d1fd3f7

9 files changed

+477
-23
lines changed

doc/faq.rst

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,32 @@ to 100. If there are ``maxPoolSize`` connections to a server and all are in
5858
use, the next request to that server will wait until one of the connections
5959
becomes available.
6060

61-
The client instance opens one additional socket per server in your MongoDB
61+
The client instance opens two additional sockets per server in your MongoDB
6262
topology for monitoring the server's state.
6363

64-
For example, a client connected to a 3-node replica set opens 3 monitoring
64+
For example, a client connected to a 3-node replica set opens 6 monitoring
6565
sockets. It also opens as many sockets as needed to support a multi-threaded
6666
application's concurrent operations on each server, up to ``maxPoolSize``. With
6767
a ``maxPoolSize`` of 100, if the application only uses the primary (the
6868
default), then only the primary connection pool grows and the total connections
69-
is at most 103. If the application uses a
69+
is at most 106. If the application uses a
7070
:class:`~pymongo.read_preferences.ReadPreference` to query the secondaries,
71-
their pools also grow and the total connections can reach 303.
71+
their pools also grow and the total connections can reach 306.
72+
73+
Additionally, the pools are rate limited such that each connection pool can
74+
only create at most 2 connections in parallel at any time. The connection
75+
creation covers covers all the work required to setup a new connection
76+
including DNS, TCP, SSL/TLS, MongoDB handshake, and MongoDB authentication.
77+
For example, if three threads concurrently attempt to check out a connection
78+
from an empty pool, the first two threads will begin creating new connections
79+
while the third thread will wait. The third thread stops waiting when either:
80+
81+
- one of the first two threads finishes creating a connection, or
82+
- an existing connection is checked back into the pool.
83+
84+
Rate limiting concurrent connection creation reduces the likelihood of
85+
connection storms and improves the driver's ability to reuse existing
86+
connections.
7287

7388
It is possible to set the minimum number of concurrent connections to each
7489
server with ``minPoolSize``, which defaults to 0. The connection pool will be

pymongo/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@
8989
# Default value for minPoolSize.
9090
MIN_POOL_SIZE = 0
9191

92+
# The maximum number of concurrent connection creation attempts per pool.
93+
MAX_CONNECTING = 2
94+
9295
# Default value for maxIdleTimeMS.
9396
MAX_IDLE_TIME_MS = None
9497

pymongo/pool.py

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@
2828
IPADDR_SAFE as _IPADDR_SAFE)
2929

3030
from bson import DEFAULT_CODEC_OPTIONS
31-
from bson.py3compat import imap, itervalues, _unicode
31+
from bson.py3compat import imap, itervalues, _unicode, PY3
3232
from bson.son import SON
3333
from pymongo import auth, helpers, thread_util, __version__
3434
from pymongo.client_session import _validate_session_write_concern
3535
from pymongo.common import (MAX_BSON_SIZE,
36+
MAX_CONNECTING,
3637
MAX_IDLE_TIME_SEC,
3738
MAX_MESSAGE_SIZE,
3839
MAX_POOL_SIZE,
@@ -285,6 +286,20 @@ def _raise_connection_failure(address, error, msg_prefix=None):
285286
else:
286287
raise AutoReconnect(msg)
287288

289+
if PY3:
290+
def _cond_wait(condition, deadline):
291+
timeout = deadline - _time() if deadline else None
292+
return condition.wait(timeout)
293+
else:
294+
def _cond_wait(condition, deadline):
295+
timeout = deadline - _time() if deadline else None
296+
condition.wait(timeout)
297+
# Python 2.7 always returns False for wait(),
298+
# manually check for a timeout.
299+
if timeout and _time() >= deadline:
300+
return False
301+
return True
302+
288303

289304
class PoolOptions(object):
290305

@@ -294,7 +309,7 @@ class PoolOptions(object):
294309
'__wait_queue_timeout', '__wait_queue_multiple',
295310
'__ssl_context', '__ssl_match_hostname', '__socket_keepalive',
296311
'__event_listeners', '__appname', '__driver', '__metadata',
297-
'__compression_settings')
312+
'__compression_settings', '__max_connecting')
298313

299314
def __init__(self, max_pool_size=MAX_POOL_SIZE,
300315
min_pool_size=MIN_POOL_SIZE,
@@ -303,7 +318,7 @@ def __init__(self, max_pool_size=MAX_POOL_SIZE,
303318
wait_queue_multiple=None, ssl_context=None,
304319
ssl_match_hostname=True, socket_keepalive=True,
305320
event_listeners=None, appname=None, driver=None,
306-
compression_settings=None):
321+
compression_settings=None, max_connecting=MAX_CONNECTING):
307322

308323
self.__max_pool_size = max_pool_size
309324
self.__min_pool_size = min_pool_size
@@ -319,6 +334,7 @@ def __init__(self, max_pool_size=MAX_POOL_SIZE,
319334
self.__appname = appname
320335
self.__driver = driver
321336
self.__compression_settings = compression_settings
337+
self.__max_connecting = max_connecting
322338
self.__metadata = copy.deepcopy(_METADATA)
323339
if appname:
324340
self.__metadata['application'] = {'name': appname}
@@ -357,6 +373,8 @@ def non_default_options(self):
357373
opts['maxIdleTimeMS'] = self.__max_idle_time_seconds * 1000
358374
if self.__wait_queue_timeout != WAIT_QUEUE_TIMEOUT:
359375
opts['waitQueueTimeoutMS'] = self.__wait_queue_timeout * 1000
376+
if self.__max_connecting != MAX_CONNECTING:
377+
opts['maxConnecting'] = self.__max_connecting
360378
return opts
361379

362380
@property
@@ -381,6 +399,13 @@ def min_pool_size(self):
381399
"""
382400
return self.__min_pool_size
383401

402+
@property
403+
def max_connecting(self):
404+
"""The maximum number of concurrent connection creation attempts per
405+
pool. Defaults to 2.
406+
"""
407+
return self.__max_connecting
408+
384409
@property
385410
def max_idle_time_seconds(self):
386411
"""The maximum number of seconds that a connection can remain
@@ -1080,6 +1105,9 @@ def __init__(self, address, options, handshake=True):
10801105

10811106
self._socket_semaphore = thread_util.create_semaphore(
10821107
self.opts.max_pool_size, max_waiters)
1108+
self._max_connecting_cond = threading.Condition(self.lock)
1109+
self._max_connecting = self.opts.max_connecting
1110+
self._pending = 0
10831111
if self.enabled_for_cmap:
10841112
self.opts.event_listeners.publish_pool_created(
10851113
self.address, self.opts.non_default_options)
@@ -1143,21 +1171,34 @@ def remove_stale_sockets(self, reference_generation, all_credentials):
11431171
if (len(self.sockets) + self.active_sockets >=
11441172
self.opts.min_pool_size):
11451173
# There are enough sockets in the pool.
1146-
break
1174+
return
11471175

11481176
# We must acquire the semaphore to respect max_pool_size.
11491177
if not self._socket_semaphore.acquire(False):
1150-
break
1178+
return
1179+
incremented = False
11511180
try:
1181+
with self._max_connecting_cond:
1182+
# If maxConnecting connections are already being created
1183+
# by this pool then try again later instead of waiting.
1184+
if self._pending >= self._max_connecting:
1185+
return
1186+
self._pending += 1
1187+
incremented = True
11521188
sock_info = self.connect(all_credentials)
11531189
with self.lock:
11541190
# Close connection and return if the pool was reset during
11551191
# socket creation or while acquiring the pool lock.
11561192
if self.generation != reference_generation:
11571193
sock_info.close_socket(ConnectionClosedReason.STALE)
1158-
break
1194+
return
11591195
self.sockets.appendleft(sock_info)
11601196
finally:
1197+
if incremented:
1198+
# Notify after adding the socket to the pool.
1199+
with self._max_connecting_cond:
1200+
self._pending -= 1
1201+
self._max_connecting_cond.notify()
11611202
self._socket_semaphore.release()
11621203

11631204
def connect(self, all_credentials=None):
@@ -1260,28 +1301,53 @@ def _get_socket(self, all_credentials):
12601301
'pool')
12611302

12621303
# Get a free socket or create one.
1304+
if self.opts.wait_queue_timeout:
1305+
deadline = _time() + self.opts.wait_queue_timeout
1306+
else:
1307+
deadline = None
12631308
if not self._socket_semaphore.acquire(
12641309
True, self.opts.wait_queue_timeout):
12651310
self._raise_wait_queue_timeout()
12661311

12671312
# We've now acquired the semaphore and must release it on error.
12681313
sock_info = None
12691314
incremented = False
1315+
emitted_event = False
12701316
try:
12711317
with self.lock:
12721318
self.active_sockets += 1
12731319
incremented = True
12741320

12751321
while sock_info is None:
1276-
try:
1277-
with self.lock:
1322+
# CMAP: we MUST wait for either maxConnecting OR for a socket
1323+
# to be checked back into the pool.
1324+
with self._max_connecting_cond:
1325+
while not (self.sockets or
1326+
self._pending < self._max_connecting):
1327+
if not _cond_wait(self._max_connecting_cond, deadline):
1328+
# Timed out, notify the next thread to ensure a
1329+
# timeout doesn't consume the condition.
1330+
if (self.sockets or
1331+
self._pending < self._max_connecting):
1332+
self._max_connecting_cond.notify()
1333+
emitted_event = True
1334+
self._raise_wait_queue_timeout()
1335+
1336+
try:
12781337
sock_info = self.sockets.popleft()
1279-
except IndexError:
1280-
# Can raise ConnectionFailure or CertificateError.
1281-
sock_info = self.connect(all_credentials)
1282-
else:
1338+
except IndexError:
1339+
self._pending += 1
1340+
if sock_info: # We got a socket from the pool
12831341
if self._perished(sock_info):
12841342
sock_info = None
1343+
continue
1344+
else: # We need to create a new connection
1345+
try:
1346+
sock_info = self.connect(all_credentials)
1347+
finally:
1348+
with self._max_connecting_cond:
1349+
self._pending -= 1
1350+
self._max_connecting_cond.notify()
12851351
sock_info.check_auth(all_credentials)
12861352
except Exception:
12871353
if sock_info:
@@ -1293,7 +1359,7 @@ def _get_socket(self, all_credentials):
12931359
with self.lock:
12941360
self.active_sockets -= 1
12951361

1296-
if self.enabled_for_cmap:
1362+
if self.enabled_for_cmap and not emitted_event:
12971363
self.opts.event_listeners.publish_connection_check_out_failed(
12981364
self.address, ConnectionCheckOutFailedReason.CONN_ERROR)
12991365
raise
@@ -1324,6 +1390,8 @@ def return_socket(self, sock_info):
13241390
sock_info.update_last_checkin_time()
13251391
sock_info.update_is_writable(self.is_writable)
13261392
self.sockets.appendleft(sock_info)
1393+
# Notify any threads waiting to create a connection.
1394+
self._max_connecting_cond.notify()
13271395

13281396
self._socket_semaphore.release()
13291397
with self.lock:
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
{
2+
"version": 1,
3+
"style": "integration",
4+
"description": "maxConnecting is enforced",
5+
"runOn": [
6+
{
7+
"minServerVersion": "4.4.0"
8+
}
9+
],
10+
"failPoint": {
11+
"configureFailPoint": "failCommand",
12+
"mode": {
13+
"times": 50
14+
},
15+
"data": {
16+
"failCommands": [
17+
"isMaster"
18+
],
19+
"closeConnection": false,
20+
"blockConnection": true,
21+
"blockTimeMS": 750
22+
}
23+
},
24+
"poolOptions": {
25+
"maxPoolSize": 10,
26+
"waitQueueTimeoutMS": 5000
27+
},
28+
"operations": [
29+
{
30+
"name": "start",
31+
"target": "thread1"
32+
},
33+
{
34+
"name": "checkOut",
35+
"thread": "thread1"
36+
},
37+
{
38+
"name": "start",
39+
"target": "thread2"
40+
},
41+
{
42+
"name": "wait",
43+
"thread": "thread2",
44+
"ms": 100
45+
},
46+
{
47+
"name": "checkOut",
48+
"thread": "thread2"
49+
},
50+
{
51+
"name": "start",
52+
"target": "thread3"
53+
},
54+
{
55+
"name": "wait",
56+
"thread": "thread3",
57+
"ms": 100
58+
},
59+
{
60+
"name": "checkOut",
61+
"thread": "thread3"
62+
},
63+
{
64+
"name": "waitForEvent",
65+
"event": "ConnectionReady",
66+
"count": 3
67+
}
68+
],
69+
"events": [
70+
{
71+
"type": "ConnectionCreated",
72+
"address": 42,
73+
"connectionId": 1
74+
},
75+
{
76+
"type": "ConnectionCreated",
77+
"address": 42
78+
},
79+
{
80+
"type": "ConnectionReady",
81+
"address": 42,
82+
"connectionId": 1
83+
},
84+
{
85+
"type": "ConnectionCreated",
86+
"address": 42
87+
},
88+
{
89+
"type": "ConnectionReady",
90+
"address": 42
91+
},
92+
{
93+
"type": "ConnectionReady",
94+
"address": 42
95+
}
96+
],
97+
"ignore": [
98+
"ConnectionCheckOutStarted",
99+
"ConnectionCheckedIn",
100+
"ConnectionCheckedOut",
101+
"ConnectionClosed",
102+
"ConnectionPoolCreated"
103+
]
104+
}

0 commit comments

Comments
 (0)