Skip to content

CDRIVER-5517 retry KMS encrypt requests on transient errors #1577

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
7766613
Driver-side retry support
adriandole Apr 4, 2024
6466030
Retry prose tests
adriandole Apr 4, 2024
09b6124
Run retry mock serveron port 9003
adriandole Apr 12, 2024
5135e25
evg config generator
adriandole Apr 16, 2024
ec65479
Erase test datakeys
adriandole Apr 17, 2024
82cccb0
Set port in kms_providers
adriandole Apr 17, 2024
5f89053
Use new config
adriandole Apr 17, 2024
1a1c2c6
BSON_STR syntax
adriandole Apr 19, 2024
df621fb
Remove unnecessary cleanup
adriandole Apr 19, 2024
57523dc
TCP retry draft
adriandole Apr 26, 2024
1066d4a
Use failpoint server to test TCP errors
adriandole May 7, 2024
3115561
Retry TCP writes
adriandole May 7, 2024
5929686
mock server renamed
adriandole May 7, 2024
5df640e
strict prototype
adriandole May 8, 2024
23979b2
retry logic revision
adriandole May 8, 2024
327ec95
Don't manually set TLS errors
adriandole May 8, 2024
5f6bcbb
Add retry sleep, refactor retry control flow
adriandole May 17, 2024
ec66aee
extra break
adriandole May 20, 2024
f7bb67b
Field names from spec
adriandole May 20, 2024
a7b119f
Reorder to prose test number
adriandole May 20, 2024
e1d8966
Combine TCP and HTTP retry logic
adriandole May 28, 2024
2ceffe4
Use POST requests for mock server
adriandole May 28, 2024
ada77f5
Remove unused sleep time
adriandole May 28, 2024
811befb
Simplified network error handling
adriandole Jul 18, 2024
23e58f5
warning fix
adriandole Jul 18, 2024
c6bd720
Apply suggestions from code review
adriandole Sep 9, 2024
c78802f
set errors from kms
adriandole Sep 10, 2024
b15bdde
fix test leaks
adriandole Sep 10, 2024
82d3fa0
don't set KMS error for TLS
adriandole Sep 10, 2024
fc99a75
checkout libmongocrypt commit
adriandole Sep 12, 2024
f1b3241
Merge branch 'master' into retryable
adriandole Sep 12, 2024
6e58205
new prose test cases
adriandole Oct 3, 2024
fd47229
failpoint before createDataKey
adriandole Oct 7, 2024
03cb283
bump libmongocrypt
adriandole Oct 7, 2024
161735e
failpoint reset
adriandole Oct 7, 2024
5172d11
updated prose test
adriandole Oct 7, 2024
4c462fc
Update src/libmongoc/tests/test-mongoc-client-side-encryption.c
adriandole Oct 9, 2024
42ab78e
Update src/libmongoc/tests/test-mongoc-client-side-encryption.c with …
adriandole Oct 10, 2024
d59e65c
Merge branch 'master' into retryable
adriandole Oct 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class RunMockKMSServers(Function):
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/expired.pem --port 9000 &
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/wrong-host.pem --port 9001 &
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/server.pem --require_client_cert --port 9002 &
python -u kms_failpoint_server.py --port 9003 &
python -u kms_kmip_server.py &
deactivate
echo "Starting mock KMS TLS servers... done."
Expand Down
1 change: 1 addition & 0 deletions .evergreen/generated_configs/functions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ functions:
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/expired.pem --port 9000 &
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/wrong-host.pem --port 9001 &
python -u kms_http_server.py --ca_file ../x509gen/ca.pem --cert_file ../x509gen/server.pem --require_client_cert --port 9002 &
python -u kms_failpoint_server.py --port 9003 &
python -u kms_kmip_server.py &
deactivate
echo "Starting mock KMS TLS servers... done."
Expand Down
6 changes: 4 additions & 2 deletions .evergreen/scripts/compile-libmongocrypt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ compile_libmongocrypt() {
# libmongocrypt's kms-message in `src/kms-message`. Run
# `.evergreen/scripts/kms-divergence-check.sh` to ensure that there is no
# divergence in the copied files.
# TODO: once 1.12.0 is released replace the following with:

# TODO: once 1.12.0 is released (containing MONGOCRYPT-599) replace the following with:
# git clone -q --depth=1 https://github.com/mongodb/libmongocrypt --branch 1.12.0 || return
{
git clone -q https://github.com/mongodb/libmongocrypt || return
git -C libmongocrypt checkout bca8e7dc1ecb7b1c039132e07de5e0db2703c701
# Check out commit containing MONGOCRYPT-599
git -C libmongocrypt checkout 7aeaec4ae1369c7d3c5b3aea6f1da35c5e9478b0
}

declare -a crypt_cmake_flags=(
Expand Down
57 changes: 42 additions & 15 deletions src/libmongoc/src/mongoc/mongoc-crypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,9 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
mongocrypt_binary_t *http_reply = NULL;
const char *endpoint;
const int32_t sockettimeout = MONGOC_DEFAULT_SOCKETTIMEOUTMS;
kms_ctx = mongocrypt_ctx_next_kms_ctx (state_machine->ctx);
while (kms_ctx) {
int64_t sleep_usec = 0;

while ((kms_ctx = mongocrypt_ctx_next_kms_ctx (state_machine->ctx))) {
mongoc_iovec_t iov;
const mongoc_ssl_opt_t *ssl_opt;
const char *provider;
Expand Down Expand Up @@ -576,6 +577,11 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
goto fail;
}

sleep_usec = mongocrypt_kms_ctx_usleep (kms_ctx);
if (sleep_usec > 0) {
_mongoc_usleep (sleep_usec);
}

mongoc_stream_destroy (tls_stream);
tls_stream = _get_stream (endpoint, sockettimeout, ssl_opt, error);
#ifdef MONGOC_ENABLE_SSL_SECURE_CHANNEL
Expand All @@ -585,14 +591,31 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
}
#endif
if (!tls_stream) {
goto fail;
if (mongocrypt_kms_ctx_fail (kms_ctx)) {
continue;
} else {
/* TLS errors are set in _get_stream */
goto fail;
}
}

iov.iov_base = (char *) mongocrypt_binary_data (http_req);
iov.iov_len = mongocrypt_binary_len (http_req);

if (!_mongoc_stream_writev_full (tls_stream, &iov, 1, sockettimeout, error)) {
goto fail;
if (mongocrypt_kms_ctx_fail (kms_ctx)) {
continue;
} else {
bson_error_t kms_error;
BSON_ASSERT (!_kms_ctx_check_error (kms_ctx, &kms_error, true));
bson_set_error (error,
MONGOC_ERROR_STREAM,
MONGOC_ERROR_STREAM_SOCKET,
"%s. Failed to write to KMS stream: %s",
kms_error.message,
endpoint);
goto fail;
}
}

/* Read and feed reply. */
Expand All @@ -608,17 +631,21 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
}

read_ret = mongoc_stream_read (tls_stream, buf, bytes_needed, 1 /* min_bytes. */, sockettimeout);
if (read_ret == -1) {
bson_set_error (
error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_SOCKET, "failed to read from KMS stream: %d", errno);
goto fail;
if (read_ret <= 0) {
if (mongocrypt_kms_ctx_fail (kms_ctx)) {
break; // Stop reading reply.
} else {
bson_error_t kms_error;
BSON_ASSERT (!_kms_ctx_check_error (kms_ctx, &kms_error, true));
bson_set_error (error,
MONGOC_ERROR_STREAM,
MONGOC_ERROR_STREAM_SOCKET,
"%s. Failed to read from KMS stream to: %s",
kms_error.message,
endpoint);
goto fail;
}
}

if (read_ret == 0) {
bson_set_error (error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_SOCKET, "unexpected EOF from KMS stream");
goto fail;
}

mongocrypt_binary_destroy (http_reply);

BSON_ASSERT (bson_in_range_signed (uint32_t, read_ret));
Expand All @@ -628,7 +655,6 @@ _state_need_kms (_state_machine_t *state_machine, bson_error_t *error)
goto fail;
}
}
kms_ctx = mongocrypt_ctx_next_kms_ctx (state_machine->ctx);
}
/* When NULL is returned by mongocrypt_ctx_next_kms_ctx, this can either be
* an error or end-of-list. */
Expand Down Expand Up @@ -1366,6 +1392,7 @@ _mongoc_crypt_new (const bson_t *kms_providers,
crypt = bson_malloc0 (sizeof (*crypt));
crypt->kmsid_to_tlsopts = mcd_mapof_kmsid_to_tlsopts_new ();
crypt->handle = mongocrypt_new ();
mongocrypt_setopt_retry_kms (crypt->handle, true);

// Stash away a copy of the user's kmsProviders in case we need to lazily
// load credentials.
Expand Down
159 changes: 153 additions & 6 deletions src/libmongoc/tests/test-mongoc-client-side-encryption.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "mongoc/mongoc-client-side-encryption-private.h"

#include "mongoc/mongoc-uri.h"
#include "mongoc/mongoc-http-private.h"

static void
_before_test (json_test_ctx_t *ctx, const bson_t *test)
Expand Down Expand Up @@ -2596,7 +2597,7 @@ test_kms_tls_cert_wrong_host (void *unused)
mongoc_client_destroy (client);
}

typedef enum { NO_CLIENT_CERT, WITH_TLS, INVALID_HOSTNAME, EXPIRED, WITH_NAMES } tls_test_ce_t;
typedef enum { NO_CLIENT_CERT, WITH_TLS, INVALID_HOSTNAME, EXPIRED, WITH_NAMES, RETRY } tls_test_ce_t;

static mongoc_client_encryption_t *
_tls_test_make_client_encryption (mongoc_client_t *keyvault_client, tls_test_ce_t test_ce)
Expand All @@ -2619,7 +2620,9 @@ _tls_test_make_client_encryption (mongoc_client_t *keyvault_client, tls_test_ce_
char *ca_file = test_framework_getenv_required ("MONGOC_TEST_CSFLE_TLS_CA_FILE");
char *certificate_key_file = test_framework_getenv_required ("MONGOC_TEST_CSFLE_TLS_CERTIFICATE_KEY_FILE");

if (test_ce == WITH_TLS) {
if (test_ce == WITH_TLS || test_ce == RETRY) {
const char *port = test_ce == RETRY ? "9003" : "9002";

kms_providers = tmp_bson ("{'aws': {'accessKeyId': '%s', 'secretAccessKey': '%s' }}",
mongoc_test_aws_access_key_id,
mongoc_test_aws_secret_access_key);
Expand All @@ -2629,19 +2632,21 @@ _tls_test_make_client_encryption (mongoc_client_t *keyvault_client, tls_test_ce_
bson_concat (kms_providers,
tmp_bson ("{'azure': {'tenantId': '%s', 'clientId': '%s', "
"'clientSecret': '%s', "
"'identityPlatformEndpoint': '127.0.0.1:9002' }}",
"'identityPlatformEndpoint': '127.0.0.1:%s' }}",
mongoc_test_azure_tenant_id,
mongoc_test_azure_client_id,
mongoc_test_azure_client_secret));
mongoc_test_azure_client_secret,
port));
bson_concat (
tls_opts,
tmp_bson ("{'azure': {'tlsCaFile': '%s', 'tlsCertificateKeyFile': '%s' }}", ca_file, certificate_key_file));

bson_concat (kms_providers,
tmp_bson ("{'gcp': { 'email': '%s', 'privateKey': '%s', "
"'endpoint': '127.0.0.1:9002' }}",
"'endpoint': '127.0.0.1:%s' }}",
mongoc_test_gcp_email,
mongoc_test_gcp_privatekey));
mongoc_test_gcp_privatekey,
port));
bson_concat (
tls_opts,
tmp_bson ("{'gcp': {'tlsCaFile': '%s', 'tlsCertificateKeyFile': '%s' }}", ca_file, certificate_key_file));
Expand Down Expand Up @@ -3228,6 +3233,60 @@ test_kms_tls_options_extra_rejected (void *unused)
mongoc_client_destroy (keyvault_client);
}

static const char *failpoint_server_ip = "127.0.0.1";
static const int failpoint_server_port = 9003;

static void
reset_failpoints (mongoc_ssl_opt_t *ssl_opts)
{
mongoc_http_request_t req;
mongoc_http_response_t res;
bool r;
bson_error_t error = {0};

_mongoc_http_request_init (&req);
_mongoc_http_response_init (&res);

req.method = "POST";
req.host = failpoint_server_ip;
req.port = failpoint_server_port;
req.path = "/reset";

r = _mongoc_http_send (&req, 10000, true, ssl_opts, &res, &error);
ASSERT_OR_PRINT (r, error);
_mongoc_http_response_cleanup (&res);
}

static void
set_retry_failpoint (mongoc_ssl_opt_t *ssl_opts, bool network, uint32_t count)
{
mongoc_http_request_t req;
mongoc_http_response_t res;
bool r;
bson_error_t error = {0};

_mongoc_http_request_init (&req);
_mongoc_http_response_init (&res);

req.method = "POST";
req.host = failpoint_server_ip;
req.port = failpoint_server_port;
if (network) {
req.path = "/set_failpoint/network";
} else {
req.path = "/set_failpoint/http";
}
req.extra_headers = "Content-Type: application/json\r\n";
char count_json[25];
sprintf (count_json, "{\"count\": %" PRIu32 "}", count);
req.body = count_json;
req.body_len = strlen (count_json);

r = _mongoc_http_send (&req, 10000, true, ssl_opts, &res, &error);
ASSERT_OR_PRINT (r, error);
_mongoc_http_response_cleanup (&res);
}

/* ee_fixture is a fixture for the Explicit Encryption prose test. */
typedef struct {
bson_value_t key1ID;
Expand Down Expand Up @@ -6232,6 +6291,88 @@ test_range_explicit_encryption_applies_defaults (void *unused)
mongoc_client_destroy (keyVaultClient);
}

static void
_test_retry_with_masterkey (const char *provider, bson_t *masterkey)
{
mongoc_client_t *keyvault_client = test_framework_new_default_client ();
mongoc_client_encryption_t *client_encryption = _tls_test_make_client_encryption (keyvault_client, RETRY);
bson_error_t error = {0};
bson_value_t keyid;
mongoc_client_encryption_datakey_opts_t *dkopts;
char *ca_file = test_framework_getenv_required ("MONGOC_TEST_CSFLE_TLS_CA_FILE");
char *pem_file = test_framework_getenv_required ("MONGOC_TEST_CSFLE_TLS_CERTIFICATE_KEY_FILE");
mongoc_ssl_opt_t ssl_opts = {.ca_file = ca_file, .pem_file = pem_file};
bool res;

bson_value_t to_encrypt = {.value_type = BSON_TYPE_INT32, .value.v_int32 = 123};
bson_value_t encrypted_field = {0};
mongoc_client_encryption_encrypt_opts_t *encrypt_opts = mongoc_client_encryption_encrypt_opts_new ();
mongoc_client_encryption_encrypt_opts_set_algorithm (encrypt_opts,
MONGOC_AEAD_AES_256_CBC_HMAC_SHA_512_DETERMINISTIC);

reset_failpoints (&ssl_opts);

// Case 1: createDataKey and encrypt with TCP retry
dkopts = mongoc_client_encryption_datakey_opts_new ();
mongoc_client_encryption_datakey_opts_set_masterkey (dkopts, masterkey);
set_retry_failpoint (&ssl_opts, true, 1);
res = mongoc_client_encryption_create_datakey (client_encryption, provider, dkopts, &keyid, &error);
ASSERT_OR_PRINT (res, error);

set_retry_failpoint (&ssl_opts, true, 1);
mongoc_client_encryption_encrypt_opts_set_keyid (encrypt_opts, &keyid);
res = mongoc_client_encryption_encrypt (client_encryption, &to_encrypt, encrypt_opts, &encrypted_field, &error);
ASSERT_OR_PRINT (res, error);
bson_value_destroy (&keyid);
bson_value_destroy (&encrypted_field);
mongoc_client_encryption_datakey_opts_destroy (dkopts);

// Case 2: createDataKey and encrypt with HTTP retry
dkopts = mongoc_client_encryption_datakey_opts_new ();
mongoc_client_encryption_datakey_opts_set_masterkey (dkopts, masterkey);
set_retry_failpoint (&ssl_opts, false, 1);
res = mongoc_client_encryption_create_datakey (client_encryption, provider, dkopts, &keyid, &error);
ASSERT_OR_PRINT (res, error);

set_retry_failpoint (&ssl_opts, false, 1);
mongoc_client_encryption_encrypt_opts_set_keyid (encrypt_opts, &keyid);
res = mongoc_client_encryption_encrypt (client_encryption, &to_encrypt, encrypt_opts, &encrypted_field, &error);
ASSERT_OR_PRINT (res, error);
bson_value_destroy (&keyid);
bson_value_destroy (&encrypted_field);
mongoc_client_encryption_datakey_opts_destroy (dkopts);

// Case 3: createDataKey fails after too many retries
dkopts = mongoc_client_encryption_datakey_opts_new ();
mongoc_client_encryption_datakey_opts_set_masterkey (dkopts, masterkey);
set_retry_failpoint (&ssl_opts, true, 4);
res = mongoc_client_encryption_create_datakey (client_encryption, provider, dkopts, &keyid, &error);
ASSERT_ERROR_CONTAINS (error, MONGOC_ERROR_STREAM, MONGOC_ERROR_STREAM_SOCKET, "KMS request failed after");

bson_value_destroy (&keyid);
mongoc_client_encryption_datakey_opts_destroy (dkopts);

bson_free (ca_file);
bson_free (pem_file);
mongoc_client_encryption_encrypt_opts_destroy (encrypt_opts);
mongoc_client_encryption_destroy (client_encryption);
mongoc_client_destroy (keyvault_client);
}

/* Prose test 23: KMS Retry Tests */
static void
test_kms_retry (void *unused)
{
bson_t *aws_masterkey = tmp_bson (BSON_STR ({"region" : "r", "key" : "k", "endpoint" : "127.0.0.1:9003"}));
bson_t *azure_masterkey = tmp_bson (BSON_STR ({"keyVaultEndpoint" : "127.0.0.1:9003", "keyName" : "foo"}));
bson_t *gcp_masterkey = tmp_bson (BSON_STR (
{"projectId" : "foo", "location" : "bar", "keyRing" : "baz", "keyName" : "qux", "endpoint" : "127.0.0.1:9003"}));

_test_retry_with_masterkey ("aws", aws_masterkey);
_test_retry_with_masterkey ("azure", azure_masterkey);
_test_retry_with_masterkey ("gcp", gcp_masterkey);
}

void
test_client_side_encryption_install (TestSuite *suite)
{
Expand Down Expand Up @@ -6412,6 +6553,12 @@ test_client_side_encryption_install (TestSuite *suite)
NULL,
NULL,
test_framework_skip_if_no_client_side_encryption);
TestSuite_AddFull (suite,
"/client_side_encryption/kms_retry",
test_kms_retry,
NULL,
NULL,
test_framework_skip_if_no_client_side_encryption);

TestSuite_AddFull (suite,
"/client_side_encryption/explicit_encryption/case1",
Expand Down