Skip to content

Commit ead96be

Browse files
committed
PYTHON-2363 Rate limit new connection creations via maxConnecting
At most 2 connections can be in the pending state per connection pool. The pending state covers all the work required to setup a new connection including TCP, TLS, and MongoDB authentication. For example, if two threads are currently creating connections, a third thread will wait for either an existing connection to be checked back into the pool or for one of the two threads to finish creating a connection. The change reduces the likelihood of connection storms and improves the driver's ability to reuse existing connections.
1 parent 98205b8 commit ead96be

9 files changed

+470
-23
lines changed

doc/faq.rst

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,32 @@ to 100. If there are ``maxPoolSize`` connections to a server and all are in
5858
use, the next request to that server will wait until one of the connections
5959
becomes available.
6060

61-
The client instance opens one additional socket per server in your MongoDB
61+
The client instance opens two additional sockets per server in your MongoDB
6262
topology for monitoring the server's state.
6363

64-
For example, a client connected to a 3-node replica set opens 3 monitoring
64+
For example, a client connected to a 3-node replica set opens 6 monitoring
6565
sockets. It also opens as many sockets as needed to support a multi-threaded
6666
application's concurrent operations on each server, up to ``maxPoolSize``. With
6767
a ``maxPoolSize`` of 100, if the application only uses the primary (the
6868
default), then only the primary connection pool grows and the total connections
69-
is at most 103. If the application uses a
69+
is at most 106. If the application uses a
7070
:class:`~pymongo.read_preferences.ReadPreference` to query the secondaries,
71-
their pools also grow and the total connections can reach 303.
71+
their pools also grow and the total connections can reach 306.
72+
73+
Additionally, the pools are rate limited such that each connection pool can
74+
only create at most 2 connections in parallel at any time. The connection
75+
creation covers covers all the work required to setup a new connection
76+
including DNS, TCP, SSL/TLS, MongoDB handshake, and MongoDB authentication.
77+
For example, if three threads concurrently attempt to check out a connection
78+
from an empty pool, the first two threads will begin creating new connections
79+
while the third thread will wait. The third thread stops waiting when either:
80+
81+
- one of the first two threads finishes creating a connection, or
82+
- an existing connection is checked back into the pool.
83+
84+
Rate limiting concurrent connection creation reduces the likelihood of
85+
connection storms and improves the driver's ability to reuse existing
86+
connections.
7287

7388
It is possible to set the minimum number of concurrent connections to each
7489
server with ``minPoolSize``, which defaults to 0. The connection pool will be

pymongo/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@
8989
# Default value for minPoolSize.
9090
MIN_POOL_SIZE = 0
9191

92+
# The maximum number of concurrent connection creation attempts per pool.
93+
MAX_CONNECTING = 2
94+
9295
# Default value for maxIdleTimeMS.
9396
MAX_IDLE_TIME_MS = None
9497

pymongo/pool.py

Lines changed: 82 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@
2828
IPADDR_SAFE as _IPADDR_SAFE)
2929

3030
from bson import DEFAULT_CODEC_OPTIONS
31-
from bson.py3compat import imap, itervalues, _unicode
31+
from bson.py3compat import imap, itervalues, _unicode, PY3
3232
from bson.son import SON
3333
from pymongo import auth, helpers, thread_util, __version__
3434
from pymongo.client_session import _validate_session_write_concern
3535
from pymongo.common import (MAX_BSON_SIZE,
36+
MAX_CONNECTING,
3637
MAX_IDLE_TIME_SEC,
3738
MAX_MESSAGE_SIZE,
3839
MAX_POOL_SIZE,
@@ -285,6 +286,18 @@ def _raise_connection_failure(address, error, msg_prefix=None):
285286
else:
286287
raise AutoReconnect(msg)
287288

289+
if PY3:
290+
def _cond_wait(condition, timeout, deadline):
291+
return condition.wait(timeout)
292+
else:
293+
def _cond_wait(condition, timeout, deadline):
294+
condition.wait(timeout)
295+
# Python 2.7 always returns False for wait(),
296+
# manually check for a timeout.
297+
if timeout and _time() >= deadline:
298+
return False
299+
return True
300+
288301

289302
class PoolOptions(object):
290303

@@ -294,7 +307,7 @@ class PoolOptions(object):
294307
'__wait_queue_timeout', '__wait_queue_multiple',
295308
'__ssl_context', '__ssl_match_hostname', '__socket_keepalive',
296309
'__event_listeners', '__appname', '__driver', '__metadata',
297-
'__compression_settings')
310+
'__compression_settings', '__max_connecting')
298311

299312
def __init__(self, max_pool_size=MAX_POOL_SIZE,
300313
min_pool_size=MIN_POOL_SIZE,
@@ -303,7 +316,7 @@ def __init__(self, max_pool_size=MAX_POOL_SIZE,
303316
wait_queue_multiple=None, ssl_context=None,
304317
ssl_match_hostname=True, socket_keepalive=True,
305318
event_listeners=None, appname=None, driver=None,
306-
compression_settings=None):
319+
compression_settings=None, max_connecting=MAX_CONNECTING):
307320

308321
self.__max_pool_size = max_pool_size
309322
self.__min_pool_size = min_pool_size
@@ -319,6 +332,7 @@ def __init__(self, max_pool_size=MAX_POOL_SIZE,
319332
self.__appname = appname
320333
self.__driver = driver
321334
self.__compression_settings = compression_settings
335+
self.__max_connecting = max_connecting
322336
self.__metadata = copy.deepcopy(_METADATA)
323337
if appname:
324338
self.__metadata['application'] = {'name': appname}
@@ -357,6 +371,8 @@ def non_default_options(self):
357371
opts['maxIdleTimeMS'] = self.__max_idle_time_seconds * 1000
358372
if self.__wait_queue_timeout != WAIT_QUEUE_TIMEOUT:
359373
opts['waitQueueTimeoutMS'] = self.__wait_queue_timeout * 1000
374+
if self.__max_connecting != MAX_CONNECTING:
375+
opts['maxConnecting'] = self.__max_connecting
360376
return opts
361377

362378
@property
@@ -381,6 +397,13 @@ def min_pool_size(self):
381397
"""
382398
return self.__min_pool_size
383399

400+
@property
401+
def max_connecting(self):
402+
"""The maximum number of concurrent connection creation attempts per
403+
pool. Defaults to 2.
404+
"""
405+
return self.__max_connecting
406+
384407
@property
385408
def max_idle_time_seconds(self):
386409
"""The maximum number of seconds that a connection can remain
@@ -1080,6 +1103,9 @@ def __init__(self, address, options, handshake=True):
10801103

10811104
self._socket_semaphore = thread_util.create_semaphore(
10821105
self.opts.max_pool_size, max_waiters)
1106+
self._max_connecting_cond = threading.Condition(self.lock)
1107+
self._max_connecting = self.opts.max_connecting
1108+
self._pending = 0
10831109
if self.enabled_for_cmap:
10841110
self.opts.event_listeners.publish_pool_created(
10851111
self.address, self.opts.non_default_options)
@@ -1143,21 +1169,34 @@ def remove_stale_sockets(self, reference_generation, all_credentials):
11431169
if (len(self.sockets) + self.active_sockets >=
11441170
self.opts.min_pool_size):
11451171
# There are enough sockets in the pool.
1146-
break
1172+
return
11471173

11481174
# We must acquire the semaphore to respect max_pool_size.
11491175
if not self._socket_semaphore.acquire(False):
1150-
break
1176+
return
1177+
incremented = False
11511178
try:
1179+
with self._max_connecting_cond:
1180+
# If maxConnecting connections are already being created
1181+
# by this pool then try again later instead of waiting.
1182+
if self._pending >= self._max_connecting:
1183+
return
1184+
self._pending += 1
1185+
incremented = True
11521186
sock_info = self.connect(all_credentials)
11531187
with self.lock:
11541188
# Close connection and return if the pool was reset during
11551189
# socket creation or while acquiring the pool lock.
11561190
if self.generation != reference_generation:
11571191
sock_info.close_socket(ConnectionClosedReason.STALE)
1158-
break
1192+
return
11591193
self.sockets.appendleft(sock_info)
11601194
finally:
1195+
if incremented:
1196+
# Notify after adding the socket to the pool.
1197+
with self._max_connecting_cond:
1198+
self._pending -= 1
1199+
self._max_connecting_cond.notify()
11611200
self._socket_semaphore.release()
11621201

11631202
def connect(self, all_credentials=None):
@@ -1260,28 +1299,56 @@ def _get_socket(self, all_credentials):
12601299
'pool')
12611300

12621301
# Get a free socket or create one.
1302+
if self.opts.wait_queue_timeout:
1303+
deadline = _time() + self.opts.wait_queue_timeout
1304+
else:
1305+
deadline = None
12631306
if not self._socket_semaphore.acquire(
12641307
True, self.opts.wait_queue_timeout):
12651308
self._raise_wait_queue_timeout()
12661309

12671310
# We've now acquired the semaphore and must release it on error.
12681311
sock_info = None
12691312
incremented = False
1313+
emitted_event = False
12701314
try:
12711315
with self.lock:
12721316
self.active_sockets += 1
12731317
incremented = True
12741318

12751319
while sock_info is None:
1276-
try:
1277-
with self.lock:
1320+
# CMAP: we MUST wait for either maxConnecting OR for a socket
1321+
# to be checked back into the pool.
1322+
with self._max_connecting_cond:
1323+
while (self._pending >= self._max_connecting and
1324+
not self.sockets):
1325+
if self.opts.wait_queue_timeout:
1326+
# TODO: What if timeout is <= zero here?
1327+
# timeout = max(deadline - _time(), .001)
1328+
timeout = deadline - _time()
1329+
else:
1330+
timeout = None
1331+
if not _cond_wait(self._max_connecting_cond,
1332+
timeout, deadline):
1333+
# timeout
1334+
emitted_event = True
1335+
self._raise_wait_queue_timeout()
1336+
1337+
try:
12781338
sock_info = self.sockets.popleft()
1279-
except IndexError:
1280-
# Can raise ConnectionFailure or CertificateError.
1281-
sock_info = self.connect(all_credentials)
1282-
else:
1339+
except IndexError:
1340+
self._pending += 1
1341+
if sock_info: # We got a socket from the pool
12831342
if self._perished(sock_info):
12841343
sock_info = None
1344+
continue
1345+
else: # We need to create a new connection
1346+
try:
1347+
sock_info = self.connect(all_credentials)
1348+
finally:
1349+
with self._max_connecting_cond:
1350+
self._pending -= 1
1351+
self._max_connecting_cond.notify()
12851352
sock_info.check_auth(all_credentials)
12861353
except Exception:
12871354
if sock_info:
@@ -1293,7 +1360,7 @@ def _get_socket(self, all_credentials):
12931360
with self.lock:
12941361
self.active_sockets -= 1
12951362

1296-
if self.enabled_for_cmap:
1363+
if self.enabled_for_cmap and not emitted_event:
12971364
self.opts.event_listeners.publish_connection_check_out_failed(
12981365
self.address, ConnectionCheckOutFailedReason.CONN_ERROR)
12991366
raise
@@ -1324,6 +1391,8 @@ def return_socket(self, sock_info):
13241391
sock_info.update_last_checkin_time()
13251392
sock_info.update_is_writable(self.is_writable)
13261393
self.sockets.appendleft(sock_info)
1394+
# Notify any threads waiting to create a connection.
1395+
self._max_connecting_cond.notify()
13271396

13281397
self._socket_semaphore.release()
13291398
with self.lock:
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
{
2+
"version": 1,
3+
"style": "integration",
4+
"description": "maxConnecting is enforced",
5+
"runOn": [
6+
{
7+
"minServerVersion": "4.4.0"
8+
}
9+
],
10+
"failPoint": {
11+
"configureFailPoint": "failCommand",
12+
"mode": {
13+
"times": 50
14+
},
15+
"data": {
16+
"failCommands": [
17+
"isMaster"
18+
],
19+
"closeConnection": false,
20+
"blockConnection": true,
21+
"blockTimeMS": 750
22+
}
23+
},
24+
"poolOptions": {
25+
"maxPoolSize": 10,
26+
"waitQueueTimeoutMS": 5000
27+
},
28+
"operations": [
29+
{
30+
"name": "start",
31+
"target": "thread1"
32+
},
33+
{
34+
"name": "checkOut",
35+
"thread": "thread1"
36+
},
37+
{
38+
"name": "start",
39+
"target": "thread2"
40+
},
41+
{
42+
"name": "wait",
43+
"thread": "thread2",
44+
"ms": 100
45+
},
46+
{
47+
"name": "checkOut",
48+
"thread": "thread2"
49+
},
50+
{
51+
"name": "start",
52+
"target": "thread3"
53+
},
54+
{
55+
"name": "wait",
56+
"thread": "thread3",
57+
"ms": 100
58+
},
59+
{
60+
"name": "checkOut",
61+
"thread": "thread3"
62+
},
63+
{
64+
"name": "waitForEvent",
65+
"event": "ConnectionReady",
66+
"count": 3
67+
}
68+
],
69+
"events": [
70+
{
71+
"type": "ConnectionCreated",
72+
"address": 42,
73+
"connectionId": 1
74+
},
75+
{
76+
"type": "ConnectionCreated",
77+
"address": 42
78+
},
79+
{
80+
"type": "ConnectionReady",
81+
"address": 42,
82+
"connectionId": 1
83+
},
84+
{
85+
"type": "ConnectionCreated",
86+
"address": 42
87+
},
88+
{
89+
"type": "ConnectionReady",
90+
"address": 42
91+
},
92+
{
93+
"type": "ConnectionReady",
94+
"address": 42
95+
}
96+
],
97+
"ignore": [
98+
"ConnectionCheckOutStarted",
99+
"ConnectionCheckedIn",
100+
"ConnectionCheckedOut",
101+
"ConnectionClosed",
102+
"ConnectionPoolCreated"
103+
]
104+
}

0 commit comments

Comments
 (0)