Skip to content

Commit 84ac397

Browse files
kevinAlbseramongodbrcsanchez97
authored
CDRIVER-5669 improve string function handling of large strings (#1696)
* add `bson_string_ensure_space` to check addition and casts. * add private `bson_next_power_of_two_u32` to avoid casts. * document length expectations * add test for maximum capacity of `bson_string_t` Skip test by default. Large allocations fail on TSan tasks, and are slow on ASan tasks. Test may be used to verify behavior locally. --------- Co-authored-by: Ezra Chung <[email protected]> Co-authored-by: Roberto C. Sánchez <[email protected]>
1 parent dc3747c commit 84ac397

File tree

8 files changed

+132
-43
lines changed

8 files changed

+132
-43
lines changed

CONTRIBUTING.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,10 @@ To test a load balanced deployment, set the following environment variables:
324324
* `SINGLE_MONGOS_LB_URI=<string>` to a MongoDB URI with a host of a load balancer fronting one mongos.
325325
* `MULTI_MONGOS_LB_URI=<string>` to a MongoDB URI with a host of a load balancer fronting multiple mongos processes.
326326

327+
To run test cases with large allocations, set:
328+
329+
* `MONGOC_TEST_LARGE_ALLOCATIONS=on` This may result in sudden test suite termination due to allocation failure. Use with caution.
330+
327331
All tests should pass before submitting a patch.
328332

329333
## Configuring the test runner

src/libbson/doc/bson_string_append.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ Description
2222

2323
Appends the ASCII or UTF-8 encoded string ``str`` to ``string``. This is not suitable for embedding NULLs in strings.
2424

25+
.. warning:: This function will abort if the length of the resulting string (including the NULL terminator) would exceed ``UINT32_MAX``.

src/libbson/doc/bson_string_append_c.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ Description
2222

2323
Appends ``c`` to the string builder ``string``.
2424

25+
.. warning:: This function will abort if the length of the resulting string (including the NULL terminator) would exceed ``UINT32_MAX``.

src/libbson/doc/bson_string_append_printf.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ Description
2323

2424
Like bson_string_append() but formats a printf style string and then appends that to ``string``.
2525

26+
.. warning:: This function will abort if the length of the resulting string (including the NULL terminator) would exceed ``UINT32_MAX``.

src/libbson/doc/bson_string_append_unichar.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ Description
2222

2323
Appends a unicode character to string. The character will be encoded into its multi-byte UTF-8 representation.
2424

25+
.. warning:: This function will abort if the length of the resulting string (including the NULL terminator) would exceed ``UINT32_MAX``.

src/libbson/doc/bson_string_new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ Description
2121

2222
Creates a new string builder, which uses power-of-two growth of buffers. Use the various bson_string_append*() functions to append to the string.
2323

24+
.. warning:: This function will abort if the length of the resulting string (including the NULL terminator) would exceed ``UINT32_MAX``.
25+
2426
Returns
2527
-------
2628

src/libbson/src/bson/bson-string.c

Lines changed: 57 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,48 @@
3131
#include <string.h>
3232
#endif
3333

34+
// `bson_next_power_of_two_u32` returns 0 on overflow.
35+
static BSON_INLINE uint32_t
36+
bson_next_power_of_two_u32 (uint32_t v)
37+
{
38+
BSON_ASSERT (v > 0);
39+
40+
// https://graphics.stanford.edu/%7Eseander/bithacks.html#RoundUpPowerOf2
41+
v--;
42+
v |= v >> 1;
43+
v |= v >> 2;
44+
v |= v >> 4;
45+
v |= v >> 8;
46+
v |= v >> 16;
47+
v++;
48+
49+
return v;
50+
}
51+
52+
// `bson_string_ensure_space` ensures `string` has enough room for `needed` + a null terminator.
53+
static void
54+
bson_string_ensure_space (bson_string_t *string, uint32_t needed)
55+
{
56+
BSON_ASSERT_PARAM (string);
57+
BSON_ASSERT (needed <= UINT32_MAX - 1u);
58+
needed += 1u; // Add one for trailing NULL byte.
59+
if (string->alloc >= needed) {
60+
return;
61+
}
62+
// Get the next largest power of 2 if possible.
63+
uint32_t alloc = bson_next_power_of_two_u32 (needed);
64+
if (alloc == 0) {
65+
// Overflowed: saturate at UINT32_MAX.
66+
alloc = UINT32_MAX;
67+
}
68+
if (!string->str) {
69+
string->str = bson_malloc (alloc);
70+
} else {
71+
string->str = bson_realloc (string->str, alloc);
72+
}
73+
string->alloc = alloc;
74+
}
75+
3476
/*
3577
*--------------------------------------------------------------------------
3678
*
@@ -62,33 +104,18 @@ bson_string_t *
62104
bson_string_new (const char *str) /* IN */
63105
{
64106
bson_string_t *ret;
65-
size_t len_sz;
66107

67108
ret = bson_malloc0 (sizeof *ret);
109+
const size_t len_sz = str == NULL ? 0u : strlen (str);
110+
BSON_ASSERT (bson_in_range_unsigned (uint32_t, len_sz));
111+
const uint32_t len_u32 = (uint32_t) len_sz;
112+
bson_string_ensure_space (ret, len_u32);
68113
if (str) {
69-
len_sz = strlen (str);
70-
BSON_ASSERT (len_sz <= UINT32_MAX);
71-
ret->len = (uint32_t) len_sz;
72-
} else {
73-
ret->len = 0;
74-
}
75-
ret->alloc = ret->len + 1;
76-
77-
if (!bson_is_power_of_two (ret->alloc)) {
78-
len_sz = bson_next_power_of_two ((size_t) ret->alloc);
79-
BSON_ASSERT (len_sz <= UINT32_MAX);
80-
ret->alloc = (uint32_t) len_sz;
81-
}
82-
83-
BSON_ASSERT (ret->alloc >= ret->len + 1);
84-
85-
ret->str = bson_malloc (ret->alloc);
86-
87-
if (str) {
88-
memcpy (ret->str, str, ret->len);
114+
memcpy (ret->str, str, len_sz);
89115
}
90116

91-
ret->str[ret->len] = '\0';
117+
ret->str[len_u32] = '\0';
118+
ret->len = len_u32;
92119

93120
return ret;
94121
}
@@ -135,31 +162,18 @@ void
135162
bson_string_append (bson_string_t *string, /* IN */
136163
const char *str) /* IN */
137164
{
138-
uint32_t len;
139-
size_t len_sz;
140-
141165
BSON_ASSERT (string);
142166
BSON_ASSERT (str);
143167

144-
len_sz = strlen (str);
168+
const size_t len_sz = strlen (str);
145169
BSON_ASSERT (bson_in_range_unsigned (uint32_t, len_sz));
146-
len = (uint32_t) len_sz;
147-
148-
if ((string->alloc - string->len - 1) < len) {
149-
BSON_ASSERT (string->alloc <= UINT32_MAX - len);
150-
string->alloc += len;
151-
if (!bson_is_power_of_two (string->alloc)) {
152-
len_sz = bson_next_power_of_two ((size_t) string->alloc);
153-
BSON_ASSERT (len_sz <= UINT32_MAX);
154-
string->alloc = (uint32_t) len_sz;
155-
}
156-
BSON_ASSERT (string->alloc >= string->len + len);
157-
string->str = bson_realloc (string->str, string->alloc);
158-
}
159-
160-
memcpy (string->str + string->len, str, len);
161-
string->len += len;
162-
string->str[string->len] = '\0';
170+
const uint32_t len_u32 = (uint32_t) len_sz;
171+
BSON_ASSERT (len_u32 <= UINT32_MAX - string->len);
172+
const uint32_t new_len = len_u32 + string->len;
173+
bson_string_ensure_space (string, new_len);
174+
memcpy (string->str + string->len, str, len_sz);
175+
string->str[new_len] = '\0';
176+
string->len = new_len;
163177
}
164178

165179

src/libbson/tests/test-string.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <bson/bson.h>
1919

2020
#include "TestSuite.h"
21+
#include "test-libmongoc.h"
2122

2223

2324
static void
@@ -303,6 +304,68 @@ test_bson_strcasecmp (void)
303304
BSON_ASSERT (bson_strcasecmp ("FoZ", "foo") > 0);
304305
}
305306

307+
static void
308+
test_bson_string_capacity (void *unused)
309+
{
310+
BSON_UNUSED (unused);
311+
312+
char *large_str = bson_malloc (UINT32_MAX);
313+
memset (large_str, 's', UINT32_MAX); // Do not NULL terminate. Each test case sets NULL byte.
314+
315+
// Test the largest possible string that can be constructed.
316+
{
317+
large_str[UINT32_MAX - 1u] = '\0'; // Set size.
318+
bson_string_t *str = bson_string_new (large_str);
319+
bson_string_free (str, true);
320+
large_str[UINT32_MAX - 1u] = 's'; // Restore.
321+
}
322+
323+
// Test appending with `bson_string_append` to get maximum size.
324+
{
325+
large_str[UINT32_MAX - 1u] = '\0'; // Set size.
326+
bson_string_t *str = bson_string_new ("");
327+
bson_string_append (str, large_str);
328+
bson_string_free (str, true);
329+
large_str[UINT32_MAX - 1u] = 's'; // Restore.
330+
}
331+
332+
// Test appending with `bson_string_append_c` to get maximum size.
333+
{
334+
large_str[UINT32_MAX - 2u] = '\0'; // Set size.
335+
bson_string_t *str = bson_string_new (large_str);
336+
bson_string_append_c (str, 'c');
337+
bson_string_free (str, true);
338+
large_str[UINT32_MAX - 2u] = 's'; // Restore.
339+
}
340+
341+
// Test appending with `bson_string_append_printf` to get maximum size.
342+
{
343+
large_str[UINT32_MAX - 2u] = '\0'; // Set size.
344+
bson_string_t *str = bson_string_new (large_str);
345+
bson_string_append_printf (str, "c");
346+
bson_string_free (str, true);
347+
large_str[UINT32_MAX - 2u] = 's'; // Restore.
348+
}
349+
350+
// Test appending with single characters.
351+
{
352+
large_str[UINT32_MAX - 2u] = '\0'; // Set size.
353+
bson_string_t *str = bson_string_new (large_str);
354+
bson_string_append_unichar (str, (bson_unichar_t) 's');
355+
bson_string_free (str, true);
356+
large_str[UINT32_MAX - 2u] = 's'; // Restore.
357+
}
358+
359+
bson_free (large_str);
360+
}
361+
362+
static int
363+
skip_if_no_large_allocations (void)
364+
{
365+
// Skip tests requiring large allocations.
366+
// Large allocations were observed to fail when run with TSan, and are time consuming with ASan.
367+
return test_framework_getenv_bool ("MONGOC_TEST_LARGE_ALLOCATIONS");
368+
}
306369

307370
void
308371
test_string_install (TestSuite *suite)
@@ -320,4 +383,6 @@ test_string_install (TestSuite *suite)
320383
TestSuite_Add (suite, "/bson/string/snprintf", test_bson_snprintf);
321384
TestSuite_Add (suite, "/bson/string/strnlen", test_bson_strnlen);
322385
TestSuite_Add (suite, "/bson/string/strcasecmp", test_bson_strcasecmp);
386+
TestSuite_AddFull (
387+
suite, "/bson/string/capacity", test_bson_string_capacity, NULL, NULL, skip_if_no_large_allocations);
323388
}

0 commit comments

Comments
 (0)