Skip to content

PYTHON-3718 Faster INT2STRING #1221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 99 additions & 2 deletions bson/_cbsonmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,99 @@ struct module_state {
#define DATETIME_MS 3
#define DATETIME_AUTO 4

/* Converts integer to its string representation in decimal notation. */
extern int cbson_long_long_to_str(long long num, char* str, size_t size) {
// Buffer should fit 64-bit signed integer
if (size < 21) {
PyErr_Format(
PyExc_RuntimeError,
"Buffer too small to hold long long: %d < 21", size);
return -1;
}
int index = 0;
int sign = 1;
// Convert to unsigned to handle -LLONG_MIN overflow
unsigned long long absNum;
// Handle the case of 0
if (num == 0) {
str[index++] = '0';
str[index] = '\0';
return 0;
}
// Handle negative numbers
if (num < 0) {
sign = -1;
absNum = 0ULL - (unsigned long long)num;
} else {
absNum = (unsigned long long)num;
}
// Convert the number to string
unsigned long long digit;
while (absNum > 0) {
digit = absNum % 10ULL;
str[index++] = (char)digit + '0'; // Convert digit to character
absNum /= 10;
}
// Add minus sign if negative
if (sign == -1) {
str[index++] = '-';
}
str[index] = '\0'; // Null terminator
// Reverse the string
int start = 0;
int end = index - 1;
while (start < end) {
char temp = str[start];
str[start++] = str[end];
str[end--] = temp;
}
return 0;
}

static PyObject* _test_long_long_to_str(PyObject* self, PyObject* args) {
// Test extreme values
Py_ssize_t maxNum = PY_SSIZE_T_MAX;
Py_ssize_t minNum = PY_SSIZE_T_MIN;
Py_ssize_t num;
char str_1[BUF_SIZE];
char str_2[BUF_SIZE];
int res = LL2STR(str_1, (long long)minNum);
if (res == -1) {
return NULL;
}
INT2STRING(str_2, (long long)minNum);
if (strcmp(str_1, str_2) != 0) {
PyErr_Format(
PyExc_RuntimeError,
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
return NULL;
}
LL2STR(str_1, (long long)maxNum);
INT2STRING(str_2, (long long)maxNum);
if (strcmp(str_1, str_2) != 0) {
PyErr_Format(
PyExc_RuntimeError,
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
return NULL;
}

// Test common values
for (num = 0; num < 10000; num++) {
char str_1[BUF_SIZE];
char str_2[BUF_SIZE];
LL2STR(str_1, (long long)num);
INT2STRING(str_2, (long long)num);
if (strcmp(str_1, str_2) != 0) {
PyErr_Format(
PyExc_RuntimeError,
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
return NULL;
}
}

return args;
}

/* Get an error class from the bson.errors module.
*
* Returns a new ref */
Expand Down Expand Up @@ -1030,13 +1123,16 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
}
for(i = 0; i < items; i++) {
int list_type_byte = pymongo_buffer_save_space(buffer, 1);
char name[16];
char name[BUF_SIZE];
PyObject* item_value;

if (list_type_byte == -1) {
return 0;
}
INT2STRING(name, (int)i);
int res = LL2STR(name, (long long)i);
if (res == -1) {
return 0;
}
if (!buffer_write_bytes(buffer, name, (int)strlen(name) + 1)) {
return 0;
}
Expand Down Expand Up @@ -2935,6 +3031,7 @@ static PyMethodDef _CBSONMethods[] = {
{"_element_to_dict", _cbson_element_to_dict, METH_VARARGS,
"Decode a single key, value pair."},
{"_array_of_documents_to_buffer", _cbson_array_of_documents_to_buffer, METH_VARARGS, "Convert raw array of documents to a stream of BSON documents"},
{"_test_long_long_to_str", _test_long_long_to_str, METH_VARARGS, "Test conversion of extreme and common Py_ssize_t values to str."},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great idea!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Full credit to @1fish2! Are the Evergreen tests failing due to an issue in the PR? Can't see the details.

{NULL, NULL, 0, NULL}
};

Expand Down
19 changes: 13 additions & 6 deletions bson/_cbsonmodule.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,35 @@
/*
* This macro is basically an implementation of asprintf for win32
* We print to the provided buffer to get the string value as an int.
* USE LL2STR. This is kept only to test LL2STR.
*/
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#define INT2STRING(buffer, i) \
_snprintf_s((buffer), \
_scprintf("%d", (i)) + 1, \
_scprintf("%d", (i)) + 1, \
"%d", \
_scprintf("%lld", (i)) + 1, \
_scprintf("%lld", (i)) + 1, \
"%lld", \
(i))
#define STRCAT(dest, n, src) strcat_s((dest), (n), (src))
#else
#define INT2STRING(buffer, i) \
_snprintf((buffer), \
_scprintf("%d", (i)) + 1, \
"%d", \
_scprintf("%lld", (i)) + 1, \
"%lld", \
(i))
#define STRCAT(dest, n, src) strcat((dest), (src))
#endif
#else
#define INT2STRING(buffer, i) snprintf((buffer), sizeof((buffer)), "%d", (i))
#define INT2STRING(buffer, i) snprintf((buffer), sizeof((buffer)), "%lld", (i))
#define STRCAT(dest, n, src) strcat((dest), (src))
#endif

/* Just enough space in char array to hold LLONG_MIN and null terminator */
#define BUF_SIZE 21
/* Converts integer to its string representation in decimal notation. */
extern int cbson_long_long_to_str(long long int num, char* str, size_t size);
#define LL2STR(buffer, i) cbson_long_long_to_str((i), (buffer), sizeof(buffer))

typedef struct type_registry_t {
PyObject* encoder_map;
PyObject* decoder_map;
Expand Down
1 change: 1 addition & 0 deletions doc/contributors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,4 @@ The following is a list of people who have contributed to
- Arie Bovenberg (ariebovenberg)
- Ben Warner (bcwarner)
- Jean-Christophe Fillion-Robin (jcfr)
- Sean Cheah (thalassemia)
7 changes: 5 additions & 2 deletions pymongo/_cmessagemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -761,8 +761,11 @@ _batched_write_command(
int cur_doc_begin;
int cur_size;
int enough_data = 0;
char key[16];
INT2STRING(key, idx);
char key[BUF_SIZE];
int res = LL2STR(key, (long long)idx);
if (res == -1) {
return 0;
}
if (!buffer_write_bytes(buffer, "\x03", 1) ||
!buffer_write_bytes(buffer, key, (int)strlen(key) + 1)) {
goto fail;
Expand Down
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,12 @@ def build_extension(self, ext):
Extension(
"pymongo._cmessage",
include_dirs=["bson"],
sources=["pymongo/_cmessagemodule.c", "bson/buffer.c"],
sources=[
"pymongo/_cmessagemodule.c",
"bson/_cbsonmodule.c",
"bson/time64.c",
"bson/buffer.c",
],
),
]

Expand Down
10 changes: 10 additions & 0 deletions test/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -1310,5 +1310,15 @@ def __int__(self):
encode({"x": float_ms})


class TestLongLongToString(unittest.TestCase):
def test_long_long_to_string(self):
try:
from bson import _cbson

_cbson._test_long_long_to_str()
except ImportError:
print("_cbson was not imported. Check compilation logs.")


if __name__ == "__main__":
unittest.main()