Skip to content

Commit 922b3f6

Browse files
committed
CDRIVER-699 obey cooldownTimeMS
Server Description And Monitoring Spec change: "After a single-threaded client gets a network error trying to check a server, the client skips re-checking the server until cooldownMS has passed." Additional timing fixes in topology: don't quit with a timeout until *after* checking if a suitable server was discovered, and don't sleep minHeartbeatFrequencyMS unless needed.
1 parent 73a3693 commit 922b3f6

File tree

6 files changed

+251
-22
lines changed

6 files changed

+251
-22
lines changed

src/mongoc/mongoc-topology-private.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#define MONGOC_TOPOLOGY_MIN_HEARTBEAT_FREQUENCY_MS 500
2828
#define MONGOC_TOPOLOGY_SOCKET_CHECK_INTERVAL_MS 5000
29+
#define MONGOC_TOPOLOGY_COOLDOWN_MS 5000
2930
#define MONGOC_TOPOLOGY_SERVER_SELECTION_TIMEOUT_MS 30000
3031
#define MONGOC_TOPOLOGY_HEARTBEAT_FREQUENCY_MS_MULTI_THREADED 10000
3132
#define MONGOC_TOPOLOGY_HEARTBEAT_FREQUENCY_MS_SINGLE_THREADED 60000

src/mongoc/mongoc-topology-scanner-private.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ typedef struct mongoc_topology_scanner_node
4545
mongoc_stream_t *stream;
4646
int64_t timestamp;
4747
int64_t last_used;
48+
int64_t last_failed;
4849
bool has_auth;
4950
mongoc_host_list_t host;
5051
struct addrinfo *dns_results;
@@ -93,7 +94,8 @@ mongoc_topology_scanner_node_destroy (mongoc_topology_scanner_node_t *node,
9394

9495
void
9596
mongoc_topology_scanner_start (mongoc_topology_scanner_t *ts,
96-
int32_t timeout_msec);
97+
int32_t timeout_msec,
98+
bool obey_cooldown);
9799

98100
bool
99101
mongoc_topology_scanner_work (mongoc_topology_scanner_t *ts,

src/mongoc/mongoc-topology-scanner.c

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "mongoc-async-private.h"
3030
#include "mongoc-async-cmd-private.h"
3131
#include "utlist.h"
32+
#include "mongoc-topology-private.h"
3233

3334
#undef MONGOC_LOG_DOMAIN
3435
#define MONGOC_LOG_DOMAIN "topology_scanner"
@@ -97,6 +98,7 @@ mongoc_topology_scanner_add (mongoc_topology_scanner_t *ts,
9798

9899
node->id = id;
99100
node->ts = ts;
101+
node->last_failed = -1;
100102

101103
DL_APPEND(ts->nodes, node);
102104

@@ -177,6 +179,7 @@ mongoc_topology_scanner_ismaster_handler (mongoc_async_cmd_result_t async_status
177179
bson_error_t *error)
178180
{
179181
mongoc_topology_scanner_node_t *node;
182+
int64_t now = bson_get_monotonic_time ();
180183

181184
bson_return_if_fail (data);
182185

@@ -189,9 +192,12 @@ mongoc_topology_scanner_ismaster_handler (mongoc_async_cmd_result_t async_status
189192
async_status == MONGOC_ASYNC_CMD_TIMEOUT) {
190193
mongoc_stream_failed (node->stream);
191194
node->stream = NULL;
195+
node->last_failed = now;
196+
} else {
197+
node->last_failed = -1;
192198
}
193199

194-
node->last_used = bson_get_monotonic_time ();
200+
node->last_used = now;
195201

196202
node->ts->cb (node->id, ismaster_response, rtt_msec,
197203
node->ts->cb_data, error);
@@ -372,28 +378,52 @@ mongoc_topology_scanner_node_setup (mongoc_topology_scanner_node_t *node)
372378
* should be called once before calling mongoc_topology_scanner_work()
373379
* repeatedly to complete the scan.
374380
*
381+
* If "obey_cooldown" is true, this is a single-threaded blocking scan
382+
* that must obey the Server Discovery And Monitoring Spec's cooldownMS:
383+
*
384+
* "After a single-threaded client gets a network error trying to check
385+
* a server, the client skips re-checking the server until cooldownMS has
386+
* passed.
387+
*
388+
* "This avoids spending connectTimeoutMS on each unavailable server
389+
* during each scan.
390+
*
391+
* "This value MUST be 5000 ms, and it MUST NOT be configurable."
392+
*
375393
*--------------------------------------------------------------------------
376394
*/
377395

378396
void
379397
mongoc_topology_scanner_start (mongoc_topology_scanner_t *ts,
380-
int32_t timeout_msec)
398+
int32_t timeout_msec,
399+
bool obey_cooldown)
381400
{
382401
mongoc_topology_scanner_node_t *node, *tmp;
383-
402+
int64_t cooldown;
384403
bson_return_if_fail (ts);
385404

386405
if (ts->in_progress) {
387406
return;
388407
}
389408

409+
if (obey_cooldown) {
410+
cooldown = bson_get_monotonic_time ()
411+
- 1000 * MONGOC_TOPOLOGY_COOLDOWN_MS;
412+
} else {
413+
cooldown = INT64_MIN;
414+
}
415+
390416
DL_FOREACH_SAFE (ts->nodes, node, tmp)
391417
{
392-
if (mongoc_topology_scanner_node_setup (node)) {
393-
node->cmd = mongoc_async_cmd (ts->async, node->stream, ts->setup,
394-
node->host.host, "admin", &ts->ismaster_cmd,
395-
&mongoc_topology_scanner_ismaster_handler, node,
396-
timeout_msec);
418+
if (node->last_failed < cooldown) {
419+
if (mongoc_topology_scanner_node_setup (node)) {
420+
node->cmd = mongoc_async_cmd (
421+
ts->async, node->stream, ts->setup,
422+
node->host.host, "admin",
423+
&ts->ismaster_cmd,
424+
&mongoc_topology_scanner_ismaster_handler,
425+
node, timeout_msec);
426+
}
397427
}
398428
}
399429
}

src/mongoc/mongoc-topology.c

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616

1717
#include "mongoc-error.h"
1818
#include "mongoc-topology-private.h"
19-
#include "mongoc-topology-scanner-private.h"
2019
#include "mongoc-uri-private.h"
21-
#include "mongoc-trace.h"
2220

2321
#include "utlist.h"
2422

@@ -267,6 +265,31 @@ _mongoc_topology_time_to_scan (mongoc_topology_t *topology) {
267265
topology->heartbeat_msec * 1000;
268266
}
269267

268+
/*
269+
*--------------------------------------------------------------------------
270+
*
271+
* _mongoc_topology_sleep_min_heartbeat --
272+
*
273+
* Wait until we're allowed to rescan.
274+
*
275+
* Server Discovery And Monitoring Spec: "If a client frequently
276+
* rechecks a server, it MUST wait at least minHeartbeatFrequencyMS
277+
* milliseconds since the previous check to avoid pointless effort.
278+
* This value MUST be 500 ms, and it MUST NOT be configurable."
279+
*
280+
*--------------------------------------------------------------------------
281+
*/
282+
static void
283+
_mongoc_topology_sleep_min_heartbeat (mongoc_topology_t *topology) {
284+
int64_t next_scan = topology->last_scan
285+
+ MONGOC_TOPOLOGY_MIN_HEARTBEAT_FREQUENCY_MS * 1000;
286+
int64_t sleep_usec = next_scan - bson_get_monotonic_time ();
287+
288+
if (sleep_usec > 0) {
289+
usleep (sleep_usec);
290+
}
291+
}
292+
270293
/*
271294
*--------------------------------------------------------------------------
272295
*
@@ -317,7 +340,10 @@ _mongoc_topology_run_scanner (mongoc_topology_t *topology,
317340
*/
318341
static void
319342
_mongoc_topology_do_blocking_scan (mongoc_topology_t *topology) {
320-
mongoc_topology_scanner_start (topology->scanner, topology->timeout_msec);
343+
mongoc_topology_scanner_start (topology->scanner,
344+
topology->timeout_msec,
345+
true);
346+
321347
while (_mongoc_topology_run_scanner (topology, topology->timeout_msec)) {}
322348
topology->last_scan = bson_get_monotonic_time ();
323349
}
@@ -372,12 +398,6 @@ mongoc_topology_select (mongoc_topology_t *topology,
372398

373399
/* until we find a server or timeout */
374400
for (;;) {
375-
/* error if we've timed out */
376-
now = bson_get_monotonic_time();
377-
if (now >= expire_at) {
378-
goto TIMEOUT;
379-
}
380-
381401
/* attempt to select a server */
382402
selected_server = mongoc_topology_description_select(&topology->description,
383403
optype,
@@ -389,9 +409,15 @@ mongoc_topology_select (mongoc_topology_t *topology,
389409
return mongoc_server_description_new_copy(selected_server);
390410
}
391411

412+
/* error if we've timed out */
413+
now = bson_get_monotonic_time();
414+
if (now >= expire_at) {
415+
goto TIMEOUT;
416+
}
417+
392418
/* rescan */
393-
usleep (MONGOC_TOPOLOGY_MIN_HEARTBEAT_FREQUENCY_MS * 1000);
394-
_mongoc_topology_do_blocking_scan(topology);
419+
_mongoc_topology_sleep_min_heartbeat (topology);
420+
_mongoc_topology_do_blocking_scan (topology);
395421
}
396422
}
397423

@@ -628,7 +654,9 @@ void * _mongoc_topology_run_background (void *data)
628654

629655
/* if we can start scanning, do so immediately */
630656
if (timeout <= 0) {
631-
mongoc_topology_scanner_start (topology->scanner, topology->timeout_msec);
657+
mongoc_topology_scanner_start (topology->scanner,
658+
topology->timeout_msec,
659+
false);
632660
break;
633661
} else {
634662
/* otherwise wait until someone:

tests/test-mongoc-topology-scanner.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ _test_topology_scanner(bool with_ssl)
9090
}
9191

9292
for (i = 0; i < 3; i++) {
93-
mongoc_topology_scanner_start (topology_scanner, TIMEOUT);
93+
mongoc_topology_scanner_start (topology_scanner, TIMEOUT, false);
9494

9595
more_to_do = mongoc_topology_scanner_work (topology_scanner, TIMEOUT);
9696

0 commit comments

Comments
 (0)