Skip to content

Commit d934cd5

Browse files
CDRIVER-5517 retry KMS encrypt requests on transient errors (#1577)
--------- Co-authored-by: Kevin Albertson <[email protected]>
1 parent 8adcc14 commit d934cd5

File tree

5 files changed

+201
-23
lines changed

5 files changed

+201
-23
lines changed

.evergreen/config_generator/components/funcs/run_mock_kms_servers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class RunMockKMSServers(Function):
4747
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/expired.pem --port 9000 &
4848
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/wrong-host.pem --port 9001 &
4949
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/server.pem --require_client_cert --port 9002 &
50+
python -u kms_failpoint_server.py --port 9003 &
5051
python -u kms_kmip_server.py &
5152
deactivate
5253
echo "Starting mock KMS TLS servers... done."

.evergreen/generated_configs/functions.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ functions:
384384
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/expired.pem --port 9000 &
385385
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/wrong-host.pem --port 9001 &
386386
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/server.pem --require_client_cert --port 9002 &
387+
python -u kms_failpoint_server.py --port 9003 &
387388
python -u kms_kmip_server.py &
388389
deactivate
389390
echo "Starting mock KMS TLS servers... done."

.evergreen/scripts/compile-libmongocrypt.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ compile_libmongocrypt() {
99
# libmongocrypt's kms-message in `src/kms-message`. Run
1010
# `.evergreen/scripts/kms-divergence-check.sh` to ensure that there is no
1111
# divergence in the copied files.
12-
# TODO: once 1.12.0 is released replace the following with:
12+
13+
# TODO: once 1.12.0 is released (containing MONGOCRYPT-599) replace the following with:
1314
# git clone -q --depth=1 https://github.com/mongodb/libmongocrypt --branch 1.12.0 || return
1415
{
1516
git clone -q https://github.com/mongodb/libmongocrypt || return
16-
git -C libmongocrypt checkout bca8e7dc1ecb7b1c039132e07de5e0db2703c701
17+
# Check out commit containing MONGOCRYPT-599
18+
git -C libmongocrypt checkout 7aeaec4ae1369c7d3c5b3aea6f1da35c5e9478b0
1719
}
1820

1921
declare -a crypt_cmake_flags=(

src/libmongoc/src/mongoc/mongoc-crypt.c

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -542,8 +542,9 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
542542
mongocrypt_binary_t *http_reply = NULL;
543543
const char *endpoint;
544544
const int32_t sockettimeout = MONGOC_DEFAULT_SOCKETTIMEOUTMS;
545-
kms_ctx = mongocrypt_ctx_next_kms_ctx (state_machine->ctx);
546-
while (kms_ctx) {
545+
int64_t sleep_usec = 0;
546+
547+
while ((kms_ctx = mongocrypt_ctx_next_kms_ctx (state_machine->ctx))) {
547548
mongoc_iovec_t iov;
548549
const mongoc_ssl_opt_t *ssl_opt;
549550
const char *provider;
@@ -576,6 +577,11 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
576577
goto fail;
577578
}
578579

580+
sleep_usec = mongocrypt_kms_ctx_usleep (kms_ctx);
581+
if (sleep_usec > 0) {
582+
_mongoc_usleep (sleep_usec);
583+
}
584+
579585
mongoc_stream_destroy (tls_stream);
580586
tls_stream = _get_stream (endpoint, sockettimeout, ssl_opt, error);
581587
#ifdef MONGOC_ENABLE_SSL_SECURE_CHANNEL
@@ -585,14 +591,31 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
585591
}
586592
#endif
587593
if (!tls_stream) {
588-
goto fail;
594+
if (mongocrypt_kms_ctx_fail (kms_ctx)) {
595+
continue;
596+
} else {
597+
/* TLS errors are set in _get_stream */
598+
goto fail;
599+
}
589600
}
590601

591602
iov.iov_base = (char *) mongocrypt_binary_data (http_req);
592603
iov.iov_len = mongocrypt_binary_len (http_req);
593604

594605
if (!_mongoc_stream_writev_full (tls_stream, &iov, 1, sockettimeout, error)) {
595-
goto fail;
606+
if (mongocrypt_kms_ctx_fail (kms_ctx)) {
607+
continue;
608+
} else {
609+
bson_error_t kms_error;
610+
BSON_ASSERT (!_kms_ctx_check_error (kms_ctx, &kms_error, true));
611+
bson_set_error (error,
612+
MONGOC_ERROR_STREAM,
613+
MONGOC_ERROR_STREAM_SOCKET,
614+
"%s. Failed to write to KMS stream: %s",
615+
kms_error.message,
616+
endpoint);
617+
goto fail;
618+
}
596619
}
597620

598621
/* Read and feed reply. */
@@ -608,17 +631,21 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
608631
}
609632

610633
read_ret = mongoc_stream_read (tls_stream, buf, bytes_needed, 1 /* min_bytes. */, sockettimeout);
611-
if (read_ret == -1) {
612-
bson_set_error (
613-
error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_SOCKET, "failed to read from KMS stream: %d", errno);
614-
goto fail;
634+
if (read_ret <= 0) {
635+
if (mongocrypt_kms_ctx_fail (kms_ctx)) {
636+
break; // Stop reading reply.
637+
} else {
638+
bson_error_t kms_error;
639+
BSON_ASSERT (!_kms_ctx_check_error (kms_ctx, &kms_error, true));
640+
bson_set_error (error,
641+
MONGOC_ERROR_STREAM,
642+
MONGOC_ERROR_STREAM_SOCKET,
643+
"%s. Failed to read from KMS stream to: %s",
644+
kms_error.message,
645+
endpoint);
646+
goto fail;
647+
}
615648
}
616-
617-
if (read_ret == 0) {
618-
bson_set_error (error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_SOCKET, "unexpected EOF from KMS stream");
619-
goto fail;
620-
}
621-
622649
mongocrypt_binary_destroy (http_reply);
623650

624651
BSON_ASSERT (bson_in_range_signed (uint32_t, read_ret));
@@ -628,7 +655,6 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
628655
goto fail;
629656
}
630657
}
631-
kms_ctx = mongocrypt_ctx_next_kms_ctx (state_machine->ctx);
632658
}
633659
/* When NULL is returned by mongocrypt_ctx_next_kms_ctx, this can either be
634660
* an error or end-of-list. */
@@ -1366,6 +1392,7 @@ _mongoc_crypt_new (const bson_t *kms_providers,
13661392
crypt = bson_malloc0 (sizeof (*crypt));
13671393
crypt->kmsid_to_tlsopts = mcd_mapof_kmsid_to_tlsopts_new ();
13681394
crypt->handle = mongocrypt_new ();
1395+
mongocrypt_setopt_retry_kms (crypt->handle, true);
13691396

13701397
// Stash away a copy of the user's kmsProviders in case we need to lazily
13711398
// load credentials.

src/libmongoc/tests/test-mongoc-client-side-encryption.c

Lines changed: 153 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "mongoc/mongoc-client-side-encryption-private.h"
3232

3333
#include "mongoc/mongoc-uri.h"
34+
#include "mongoc/mongoc-http-private.h"
3435

3536
static void
3637
_before_test (json_test_ctx_t *ctx, const bson_t *test)
@@ -2596,7 +2597,7 @@ test_kms_tls_cert_wrong_host (void *unused)
25962597
mongoc_client_destroy (client);
25972598
}
25982599

2599-
typedef enum { NO_CLIENT_CERT, WITH_TLS, INVALID_HOSTNAME, EXPIRED, WITH_NAMES } tls_test_ce_t;
2600+
typedef enum { NO_CLIENT_CERT, WITH_TLS, INVALID_HOSTNAME, EXPIRED, WITH_NAMES, RETRY } tls_test_ce_t;
26002601

26012602
static mongoc_client_encryption_t *
26022603
_tls_test_make_client_encryption (mongoc_client_t *keyvault_client, tls_test_ce_t test_ce)
@@ -2619,7 +2620,9 @@ _tls_test_make_client_encryption (mongoc_client_t *keyvault_client, tls_test_ce_
26192620
char *ca_file = test_framework_getenv_required ("MONGOC_TEST_CSFLE_TLS_CA_FILE");
26202621
char *certificate_key_file = test_framework_getenv_required ("MONGOC_TEST_CSFLE_TLS_CERTIFICATE_KEY_FILE");
26212622

2622-
if (test_ce == WITH_TLS) {
2623+
if (test_ce == WITH_TLS || test_ce == RETRY) {
2624+
const char *port = test_ce == RETRY ? "9003" : "9002";
2625+
26232626
kms_providers = tmp_bson ("{'aws': {'accessKeyId': '%s', 'secretAccessKey': '%s' }}",
26242627
mongoc_test_aws_access_key_id,
26252628
mongoc_test_aws_secret_access_key);
@@ -2629,19 +2632,21 @@ _tls_test_make_client_encryption (mongoc_client_t *keyvault_client, tls_test_ce_
26292632
bson_concat (kms_providers,
26302633
tmp_bson ("{'azure': {'tenantId': '%s', 'clientId': '%s', "
26312634
"'clientSecret': '%s', "
2632-
"'identityPlatformEndpoint': '127.0.0.1:9002' }}",
2635+
"'identityPlatformEndpoint': '127.0.0.1:%s' }}",
26332636
mongoc_test_azure_tenant_id,
26342637
mongoc_test_azure_client_id,
2635-
mongoc_test_azure_client_secret));
2638+
mongoc_test_azure_client_secret,
2639+
port));
26362640
bson_concat (
26372641
tls_opts,
26382642
tmp_bson ("{'azure': {'tlsCaFile': '%s', 'tlsCertificateKeyFile': '%s' }}", ca_file, certificate_key_file));
26392643

26402644
bson_concat (kms_providers,
26412645
tmp_bson ("{'gcp': { 'email': '%s', 'privateKey': '%s', "
2642-
"'endpoint': '127.0.0.1:9002' }}",
2646+
"'endpoint': '127.0.0.1:%s' }}",
26432647
mongoc_test_gcp_email,
2644-
mongoc_test_gcp_privatekey));
2648+
mongoc_test_gcp_privatekey,
2649+
port));
26452650
bson_concat (
26462651
tls_opts,
26472652
tmp_bson ("{'gcp': {'tlsCaFile': '%s', 'tlsCertificateKeyFile': '%s' }}", ca_file, certificate_key_file));
@@ -3228,6 +3233,60 @@ test_kms_tls_options_extra_rejected (void *unused)
32283233
mongoc_client_destroy (keyvault_client);
32293234
}
32303235

3236+
static const char *failpoint_server_ip = "127.0.0.1";
3237+
static const int failpoint_server_port = 9003;
3238+
3239+
static void
3240+
reset_failpoints (mongoc_ssl_opt_t *ssl_opts)
3241+
{
3242+
mongoc_http_request_t req;
3243+
mongoc_http_response_t res;
3244+
bool r;
3245+
bson_error_t error = {0};
3246+
3247+
_mongoc_http_request_init (&req);
3248+
_mongoc_http_response_init (&res);
3249+
3250+
req.method = "POST";
3251+
req.host = failpoint_server_ip;
3252+
req.port = failpoint_server_port;
3253+
req.path = "/reset";
3254+
3255+
r = _mongoc_http_send (&req, 10000, true, ssl_opts, &res, &error);
3256+
ASSERT_OR_PRINT (r, error);
3257+
_mongoc_http_response_cleanup (&res);
3258+
}
3259+
3260+
static void
3261+
set_retry_failpoint (mongoc_ssl_opt_t *ssl_opts, bool network, uint32_t count)
3262+
{
3263+
mongoc_http_request_t req;
3264+
mongoc_http_response_t res;
3265+
bool r;
3266+
bson_error_t error = {0};
3267+
3268+
_mongoc_http_request_init (&req);
3269+
_mongoc_http_response_init (&res);
3270+
3271+
req.method = "POST";
3272+
req.host = failpoint_server_ip;
3273+
req.port = failpoint_server_port;
3274+
if (network) {
3275+
req.path = "/set_failpoint/network";
3276+
} else {
3277+
req.path = "/set_failpoint/http";
3278+
}
3279+
req.extra_headers = "Content-Type: application/json\r\n";
3280+
char count_json[25];
3281+
sprintf (count_json, "{\"count\": %" PRIu32 "}", count);
3282+
req.body = count_json;
3283+
req.body_len = strlen (count_json);
3284+
3285+
r = _mongoc_http_send (&req, 10000, true, ssl_opts, &res, &error);
3286+
ASSERT_OR_PRINT (r, error);
3287+
_mongoc_http_response_cleanup (&res);
3288+
}
3289+
32313290
/* ee_fixture is a fixture for the Explicit Encryption prose test. */
32323291
typedef struct {
32333292
bson_value_t key1ID;
@@ -6232,6 +6291,88 @@ test_range_explicit_encryption_applies_defaults (void *unused)
62326291
mongoc_client_destroy (keyVaultClient);
62336292
}
62346293

6294+
static void
6295+
_test_retry_with_masterkey (const char *provider, bson_t *masterkey)
6296+
{
6297+
mongoc_client_t *keyvault_client = test_framework_new_default_client ();
6298+
mongoc_client_encryption_t *client_encryption = _tls_test_make_client_encryption (keyvault_client, RETRY);
6299+
bson_error_t error = {0};
6300+
bson_value_t keyid;
6301+
mongoc_client_encryption_datakey_opts_t *dkopts;
6302+
char *ca_file = test_framework_getenv_required ("MONGOC_TEST_CSFLE_TLS_CA_FILE");
6303+
char *pem_file = test_framework_getenv_required ("MONGOC_TEST_CSFLE_TLS_CERTIFICATE_KEY_FILE");
6304+
mongoc_ssl_opt_t ssl_opts = {.ca_file = ca_file, .pem_file = pem_file};
6305+
bool res;
6306+
6307+
bson_value_t to_encrypt = {.value_type = BSON_TYPE_INT32, .value.v_int32 = 123};
6308+
bson_value_t encrypted_field = {0};
6309+
mongoc_client_encryption_encrypt_opts_t *encrypt_opts = mongoc_client_encryption_encrypt_opts_new ();
6310+
mongoc_client_encryption_encrypt_opts_set_algorithm (encrypt_opts,
6311+
MONGOC_AEAD_AES_256_CBC_HMAC_SHA_512_DETERMINISTIC);
6312+
6313+
reset_failpoints (&ssl_opts);
6314+
6315+
// Case 1: createDataKey and encrypt with TCP retry
6316+
dkopts = mongoc_client_encryption_datakey_opts_new ();
6317+
mongoc_client_encryption_datakey_opts_set_masterkey (dkopts, masterkey);
6318+
set_retry_failpoint (&ssl_opts, true, 1);
6319+
res = mongoc_client_encryption_create_datakey (client_encryption, provider, dkopts, &keyid, &error);
6320+
ASSERT_OR_PRINT (res, error);
6321+
6322+
set_retry_failpoint (&ssl_opts, true, 1);
6323+
mongoc_client_encryption_encrypt_opts_set_keyid (encrypt_opts, &keyid);
6324+
res = mongoc_client_encryption_encrypt (client_encryption, &to_encrypt, encrypt_opts, &encrypted_field, &error);
6325+
ASSERT_OR_PRINT (res, error);
6326+
bson_value_destroy (&keyid);
6327+
bson_value_destroy (&encrypted_field);
6328+
mongoc_client_encryption_datakey_opts_destroy (dkopts);
6329+
6330+
// Case 2: createDataKey and encrypt with HTTP retry
6331+
dkopts = mongoc_client_encryption_datakey_opts_new ();
6332+
mongoc_client_encryption_datakey_opts_set_masterkey (dkopts, masterkey);
6333+
set_retry_failpoint (&ssl_opts, false, 1);
6334+
res = mongoc_client_encryption_create_datakey (client_encryption, provider, dkopts, &keyid, &error);
6335+
ASSERT_OR_PRINT (res, error);
6336+
6337+
set_retry_failpoint (&ssl_opts, false, 1);
6338+
mongoc_client_encryption_encrypt_opts_set_keyid (encrypt_opts, &keyid);
6339+
res = mongoc_client_encryption_encrypt (client_encryption, &to_encrypt, encrypt_opts, &encrypted_field, &error);
6340+
ASSERT_OR_PRINT (res, error);
6341+
bson_value_destroy (&keyid);
6342+
bson_value_destroy (&encrypted_field);
6343+
mongoc_client_encryption_datakey_opts_destroy (dkopts);
6344+
6345+
// Case 3: createDataKey fails after too many retries
6346+
dkopts = mongoc_client_encryption_datakey_opts_new ();
6347+
mongoc_client_encryption_datakey_opts_set_masterkey (dkopts, masterkey);
6348+
set_retry_failpoint (&ssl_opts, true, 4);
6349+
res = mongoc_client_encryption_create_datakey (client_encryption, provider, dkopts, &keyid, &error);
6350+
ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_SOCKET, "KMS request failed after");
6351+
6352+
bson_value_destroy (&keyid);
6353+
mongoc_client_encryption_datakey_opts_destroy (dkopts);
6354+
6355+
bson_free (ca_file);
6356+
bson_free (pem_file);
6357+
mongoc_client_encryption_encrypt_opts_destroy (encrypt_opts);
6358+
mongoc_client_encryption_destroy (client_encryption);
6359+
mongoc_client_destroy (keyvault_client);
6360+
}
6361+
6362+
/* Prose test 23: KMS Retry Tests */
6363+
static void
6364+
test_kms_retry (void *unused)
6365+
{
6366+
bson_t *aws_masterkey = tmp_bson (BSON_STR ({"region" : "r", "key" : "k", "endpoint" : "127.0.0.1:9003"}));
6367+
bson_t *azure_masterkey = tmp_bson (BSON_STR ({"keyVaultEndpoint" : "127.0.0.1:9003", "keyName" : "foo"}));
6368+
bson_t *gcp_masterkey = tmp_bson (BSON_STR (
6369+
{"projectId" : "foo", "location" : "bar", "keyRing" : "baz", "keyName" : "qux", "endpoint" : "127.0.0.1:9003"}));
6370+
6371+
_test_retry_with_masterkey ("aws", aws_masterkey);
6372+
_test_retry_with_masterkey ("azure", azure_masterkey);
6373+
_test_retry_with_masterkey ("gcp", gcp_masterkey);
6374+
}
6375+
62356376
void
62366377
test_client_side_encryption_install (TestSuite *suite)
62376378
{
@@ -6412,6 +6553,12 @@ test_client_side_encryption_install (TestSuite *suite)
64126553
NULL,
64136554
NULL,
64146555
test_framework_skip_if_no_client_side_encryption);
6556+
TestSuite_AddFull (suite,
6557+
"/client_side_encryption/kms_retry",
6558+
test_kms_retry,
6559+
NULL,
6560+
NULL,
6561+
test_framework_skip_if_no_client_side_encryption);
64156562

64166563
TestSuite_AddFull (suite,
64176564
"/client_side_encryption/explicit_encryption/case1",

0 commit comments

Comments
 (0)