Skip to content

Commit 0aefe09

Browse files
kevinAlbseramongodb
andcommitted
CDRIVER-3645 Close operation connections to removed servers (#1618)
* add regression test: check connection counts of servers removed by SRV polling * prune clients on push Track last known server IDs in the client pool. Before doing an expensive prune, do a cheaper check to see if server IDs changed. * test direct topology changes Servers may be removed from the topology in other scenarios aside from SRV polling (e.g. during SDAM) * document expected use of pop/push --------- Co-authored-by: Ezra Chung <[email protected]>
1 parent ad8e82b commit 0aefe09

File tree

7 files changed

+476
-0
lines changed

7 files changed

+476
-0
lines changed

src/libmongoc/doc/mongoc_client_pool_pop.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ Retrieve a :symbol:`mongoc_client_t` from the client pool, or create one. The to
1616

1717
The returned :symbol:`mongoc_client_t` must be returned to the pool with :symbol:`mongoc_client_pool_push()`.
1818

19+
.. note::
20+
21+
Return a checked out :symbol:`mongoc_client_t` to the pool with :symbol:`mongoc_client_pool_push` quickly to encourage reuse of clients among threads.
22+
1923
Parameters
2024
----------
2125

src/libmongoc/doc/mongoc_client_pool_try_pop.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ Synopsis
1414
1515
This function is identical to :symbol:`mongoc_client_pool_pop()` except it will return ``NULL`` instead of blocking for a client to become available.
1616

17+
.. note::
18+
19+
Return a checked out :symbol:`mongoc_client_t` to the pool with :symbol:`mongoc_client_pool_push` quickly to encourage reuse of clients among threads.
20+
1721
Parameters
1822
----------
1923

src/libmongoc/src/mongoc/mongoc-client-pool.c

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "mongoc.h"
1919
#include "mongoc-apm-private.h"
20+
#include "mongoc-array-private.h"
2021
#include "mongoc-counters-private.h"
2122
#include "mongoc-client-pool-private.h"
2223
#include "mongoc-client-pool.h"
@@ -52,6 +53,8 @@ struct _mongoc_client_pool_t {
5253
bool error_api_set;
5354
mongoc_server_api_t *api;
5455
bool client_initialized;
56+
// `last_known_serverids` is a sorted array of uint32_t.
57+
mongoc_array_t last_known_serverids;
5558
};
5659

5760

@@ -146,6 +149,7 @@ mongoc_client_pool_new_with_error (const mongoc_uri_t *uri, bson_error_t *error)
146149
}
147150

148151
pool = (mongoc_client_pool_t *) bson_malloc0 (sizeof *pool);
152+
_mongoc_array_init (&pool->last_known_serverids, sizeof (uint32_t));
149153
bson_mutex_init (&pool->mutex);
150154
mongoc_cond_init (&pool->cond);
151155
_mongoc_queue_init (&pool->queue);
@@ -229,6 +233,8 @@ mongoc_client_pool_destroy (mongoc_client_pool_t *pool)
229233
_mongoc_ssl_opts_cleanup (&pool->ssl_opts, true);
230234
#endif
231235

236+
_mongoc_array_destroy (&pool->last_known_serverids);
237+
232238
bson_free (pool);
233239

234240
mongoc_counter_client_pools_active_dec ();
@@ -357,6 +363,53 @@ mongoc_client_pool_try_pop (mongoc_client_pool_t *pool)
357363
RETURN (client);
358364
}
359365

366+
typedef struct {
367+
mongoc_array_t *known_server_ids;
368+
mongoc_cluster_t *cluster;
369+
} prune_ctx;
370+
371+
static int
372+
server_id_cmp (const void *a_, const void *b_)
373+
{
374+
const uint32_t *const a = (const uint32_t *) a_;
375+
const uint32_t *const b = (const uint32_t *) b_;
376+
377+
if (*a == *b) {
378+
return 0;
379+
}
380+
381+
return *a < *b ? -1 : 1;
382+
}
383+
384+
// `maybe_prune` removes a `mongoc_cluster_node_t` if the node refers to a removed server.
385+
static bool
386+
maybe_prune (void *item, void *ctx_)
387+
{
388+
mongoc_cluster_node_t *cn = (mongoc_cluster_node_t *) item;
389+
prune_ctx *ctx = (prune_ctx *) ctx_;
390+
// Get the server ID from the cluster node.
391+
uint32_t server_id = cn->handshake_sd->id;
392+
393+
// Check if the cluster node's server ID references a removed server.
394+
if (!bsearch (
395+
&server_id, ctx->known_server_ids->data, ctx->known_server_ids->len, sizeof (uint32_t), server_id_cmp)) {
396+
mongoc_cluster_disconnect_node (ctx->cluster, server_id);
397+
}
398+
return true;
399+
}
400+
401+
// `prune_client` closes connections from `client` to servers not contained in `known_server_ids`.
402+
static void
403+
prune_client (mongoc_client_t *client, mongoc_array_t *known_server_ids)
404+
{
405+
BSON_ASSERT_PARAM (client);
406+
BSON_ASSERT_PARAM (known_server_ids);
407+
408+
mongoc_cluster_t *cluster = &client->cluster;
409+
prune_ctx ctx = {.cluster = cluster, .known_server_ids = known_server_ids};
410+
mongoc_set_for_each (cluster->nodes, maybe_prune, &ctx);
411+
}
412+
360413

361414
void
362415
mongoc_client_pool_push (mongoc_client_pool_t *pool, mongoc_client_t *client)
@@ -370,6 +423,48 @@ mongoc_client_pool_push (mongoc_client_pool_t *pool, mongoc_client_t *client)
370423
mongoc_cluster_reset_sockettimeoutms (&client->cluster);
371424

372425
bson_mutex_lock (&pool->mutex);
426+
// Check if `last_known_server_ids` needs update.
427+
bool serverids_have_changed = false;
428+
{
429+
mongoc_array_t current_serverids;
430+
_mongoc_array_init (&current_serverids, sizeof (uint32_t));
431+
432+
{
433+
mc_shared_tpld td = mc_tpld_take_ref (pool->topology);
434+
const mongoc_set_t *servers = mc_tpld_servers_const (td.ptr);
435+
for (size_t i = 0; i < servers->items_len; i++) {
436+
_mongoc_array_append_val (&current_serverids, servers->items[i].id);
437+
}
438+
mc_tpld_drop_ref (&td);
439+
}
440+
441+
serverids_have_changed = (current_serverids.len != pool->last_known_serverids.len) ||
442+
memcmp (current_serverids.data,
443+
pool->last_known_serverids.data,
444+
current_serverids.len * current_serverids.element_size) != 0;
445+
446+
if (serverids_have_changed) {
447+
_mongoc_array_destroy (&pool->last_known_serverids);
448+
pool->last_known_serverids = current_serverids; // Ownership transfer.
449+
} else {
450+
_mongoc_array_destroy (&current_serverids);
451+
}
452+
}
453+
454+
// Check if pooled clients need to be pruned.
455+
if (serverids_have_changed) {
456+
// The set of last known server IDs has changed. Prune all clients in pool.
457+
mongoc_queue_item_t *ptr = pool->queue.head;
458+
while (ptr != NULL) {
459+
prune_client ((mongoc_client_t *) ptr->data, &pool->last_known_serverids);
460+
ptr = ptr->next;
461+
}
462+
}
463+
464+
// Always prune incoming client. The topology may have changed while client was checked out.
465+
prune_client (client, &pool->last_known_serverids);
466+
467+
// Push client back into pool.
373468
_mongoc_queue_push_head (&pool->queue, client);
374469

375470
if (pool->min_pool_size && _mongoc_queue_get_length (&pool->queue) > pool->min_pool_size) {

src/libmongoc/tests/TestSuite.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,3 +1226,35 @@ bson_value_eq (const bson_value_t *a, const bson_value_t *b)
12261226
bson_destroy (tmp_a);
12271227
return ret;
12281228
}
1229+
1230+
int32_t
1231+
get_current_connection_count (const char *host_and_port)
1232+
{
1233+
char *uri_str = bson_strdup_printf ("mongodb://%s\n", host_and_port);
1234+
char *uri_str_with_auth = test_framework_add_user_password_from_env (uri_str);
1235+
mongoc_client_t *client = mongoc_client_new (uri_str_with_auth);
1236+
test_framework_set_ssl_opts (client);
1237+
bson_t *cmd = BCON_NEW ("serverStatus", BCON_INT32 (1));
1238+
bson_t reply;
1239+
bson_error_t error;
1240+
bool ok = mongoc_client_command_simple (client, "admin", cmd, NULL, &reply, &error);
1241+
if (!ok) {
1242+
printf ("serverStatus failed: %s\n", error.message);
1243+
abort ();
1244+
}
1245+
int32_t conns;
1246+
// Get `connections.current` from the reply.
1247+
{
1248+
bson_iter_t iter;
1249+
BSON_ASSERT (bson_iter_init_find (&iter, &reply, "connections"));
1250+
BSON_ASSERT (bson_iter_recurse (&iter, &iter));
1251+
BSON_ASSERT (bson_iter_find (&iter, "current"));
1252+
conns = bson_iter_int32 (&iter);
1253+
}
1254+
bson_destroy (&reply);
1255+
bson_destroy (cmd);
1256+
mongoc_client_destroy (client);
1257+
bson_free (uri_str_with_auth);
1258+
bson_free (uri_str);
1259+
return conns;
1260+
}

src/libmongoc/tests/TestSuite.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,44 @@ bson_value_eq (const bson_value_t *a, const bson_value_t *b);
577577
} else \
578578
(void) 0
579579

580+
581+
// `get_current_connection_count` returns the server reported connection count.
582+
int32_t
583+
get_current_connection_count (const char *host_and_port);
584+
585+
#define ASSERT_CONN_COUNT(host, expect) \
586+
if (1) { \
587+
int32_t _got = get_current_connection_count (host); \
588+
if (_got != expect) { \
589+
test_error ("Got unexpected connection count to %s:\n" \
590+
" Expected %" PRId32 ", got %" PRId32 "\n", \
591+
host, \
592+
expect, \
593+
_got); \
594+
} \
595+
} else \
596+
(void) 0
597+
598+
#define ASSERT_EVENTUAL_CONN_COUNT(host, expect) \
599+
if (1) { \
600+
int64_t _start = bson_get_monotonic_time (); \
601+
while (true) { \
602+
int32_t _got = get_current_connection_count (host); \
603+
if (_got == expect) { \
604+
break; \
605+
} \
606+
int64_t _now = bson_get_monotonic_time (); \
607+
if (_now - _start > 5 * 1000 * 1000 /* five seconds */) { \
608+
test_error ("Timed out waiting for expected connection count to %s:\n" \
609+
" Expected %" PRId32 ", got %" PRId32 "\n", \
610+
host, \
611+
expect, \
612+
_got); \
613+
} \
614+
} \
615+
} else \
616+
(void) 0
617+
580618
#define MAX_TEST_NAME_LENGTH 500
581619
#define MAX_TEST_CHECK_FUNCS 10
582620

0 commit comments

Comments
 (0)