Skip to content

Commit 1ba4c0b

Browse files
authored
PYTHON-3718 Faster INT2STRING (#1221)
1 parent 5831934 commit 1ba4c0b

File tree

6 files changed

+134
-11
lines changed

6 files changed

+134
-11
lines changed

bson/_cbsonmodule.c

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,99 @@ struct module_state {
8282
#define DATETIME_MS 3
8383
#define DATETIME_AUTO 4
8484

85+
/* Converts integer to its string representation in decimal notation. */
86+
extern int cbson_long_long_to_str(long long num, char* str, size_t size) {
87+
// Buffer should fit 64-bit signed integer
88+
if (size < 21) {
89+
PyErr_Format(
90+
PyExc_RuntimeError,
91+
"Buffer too small to hold long long: %d < 21", size);
92+
return -1;
93+
}
94+
int index = 0;
95+
int sign = 1;
96+
// Convert to unsigned to handle -LLONG_MIN overflow
97+
unsigned long long absNum;
98+
// Handle the case of 0
99+
if (num == 0) {
100+
str[index++] = '0';
101+
str[index] = '\0';
102+
return 0;
103+
}
104+
// Handle negative numbers
105+
if (num < 0) {
106+
sign = -1;
107+
absNum = 0ULL - (unsigned long long)num;
108+
} else {
109+
absNum = (unsigned long long)num;
110+
}
111+
// Convert the number to string
112+
unsigned long long digit;
113+
while (absNum > 0) {
114+
digit = absNum % 10ULL;
115+
str[index++] = (char)digit + '0'; // Convert digit to character
116+
absNum /= 10;
117+
}
118+
// Add minus sign if negative
119+
if (sign == -1) {
120+
str[index++] = '-';
121+
}
122+
str[index] = '\0'; // Null terminator
123+
// Reverse the string
124+
int start = 0;
125+
int end = index - 1;
126+
while (start < end) {
127+
char temp = str[start];
128+
str[start++] = str[end];
129+
str[end--] = temp;
130+
}
131+
return 0;
132+
}
133+
134+
static PyObject* _test_long_long_to_str(PyObject* self, PyObject* args) {
135+
// Test extreme values
136+
Py_ssize_t maxNum = PY_SSIZE_T_MAX;
137+
Py_ssize_t minNum = PY_SSIZE_T_MIN;
138+
Py_ssize_t num;
139+
char str_1[BUF_SIZE];
140+
char str_2[BUF_SIZE];
141+
int res = LL2STR(str_1, (long long)minNum);
142+
if (res == -1) {
143+
return NULL;
144+
}
145+
INT2STRING(str_2, (long long)minNum);
146+
if (strcmp(str_1, str_2) != 0) {
147+
PyErr_Format(
148+
PyExc_RuntimeError,
149+
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
150+
return NULL;
151+
}
152+
LL2STR(str_1, (long long)maxNum);
153+
INT2STRING(str_2, (long long)maxNum);
154+
if (strcmp(str_1, str_2) != 0) {
155+
PyErr_Format(
156+
PyExc_RuntimeError,
157+
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
158+
return NULL;
159+
}
160+
161+
// Test common values
162+
for (num = 0; num < 10000; num++) {
163+
char str_1[BUF_SIZE];
164+
char str_2[BUF_SIZE];
165+
LL2STR(str_1, (long long)num);
166+
INT2STRING(str_2, (long long)num);
167+
if (strcmp(str_1, str_2) != 0) {
168+
PyErr_Format(
169+
PyExc_RuntimeError,
170+
"LL2STR != INT2STRING: %s != %s", str_1, str_2);
171+
return NULL;
172+
}
173+
}
174+
175+
return args;
176+
}
177+
85178
/* Get an error class from the bson.errors module.
86179
*
87180
* Returns a new ref */
@@ -1027,13 +1120,16 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
10271120
}
10281121
for(i = 0; i < items; i++) {
10291122
int list_type_byte = pymongo_buffer_save_space(buffer, 1);
1030-
char name[16];
1123+
char name[BUF_SIZE];
10311124
PyObject* item_value;
10321125

10331126
if (list_type_byte == -1) {
10341127
return 0;
10351128
}
1036-
INT2STRING(name, (int)i);
1129+
int res = LL2STR(name, (long long)i);
1130+
if (res == -1) {
1131+
return 0;
1132+
}
10371133
if (!buffer_write_bytes(buffer, name, (int)strlen(name) + 1)) {
10381134
return 0;
10391135
}
@@ -2934,6 +3030,7 @@ static PyMethodDef _CBSONMethods[] = {
29343030
{"_element_to_dict", _cbson_element_to_dict, METH_VARARGS,
29353031
"Decode a single key, value pair."},
29363032
{"_array_of_documents_to_buffer", _cbson_array_of_documents_to_buffer, METH_VARARGS, "Convert raw array of documents to a stream of BSON documents"},
3033+
{"_test_long_long_to_str", _test_long_long_to_str, METH_VARARGS, "Test conversion of extreme and common Py_ssize_t values to str."},
29373034
{NULL, NULL, 0, NULL}
29383035
};
29393036

bson/_cbsonmodule.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,35 @@
2323
/*
2424
* This macro is basically an implementation of asprintf for win32
2525
* We print to the provided buffer to get the string value as an int.
26+
* USE LL2STR. This is kept only to test LL2STR.
2627
*/
2728
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
2829
#define INT2STRING(buffer, i) \
2930
_snprintf_s((buffer), \
30-
_scprintf("%d", (i)) + 1, \
31-
_scprintf("%d", (i)) + 1, \
32-
"%d", \
31+
_scprintf("%lld", (i)) + 1, \
32+
_scprintf("%lld", (i)) + 1, \
33+
"%lld", \
3334
(i))
3435
#define STRCAT(dest, n, src) strcat_s((dest), (n), (src))
3536
#else
3637
#define INT2STRING(buffer, i) \
3738
_snprintf((buffer), \
38-
_scprintf("%d", (i)) + 1, \
39-
"%d", \
39+
_scprintf("%lld", (i)) + 1, \
40+
"%lld", \
4041
(i))
4142
#define STRCAT(dest, n, src) strcat((dest), (src))
4243
#endif
4344
#else
44-
#define INT2STRING(buffer, i) snprintf((buffer), sizeof((buffer)), "%d", (i))
45+
#define INT2STRING(buffer, i) snprintf((buffer), sizeof((buffer)), "%lld", (i))
4546
#define STRCAT(dest, n, src) strcat((dest), (src))
4647
#endif
4748

49+
/* Just enough space in char array to hold LLONG_MIN and null terminator */
50+
#define BUF_SIZE 21
51+
/* Converts integer to its string representation in decimal notation. */
52+
extern int cbson_long_long_to_str(long long int num, char* str, size_t size);
53+
#define LL2STR(buffer, i) cbson_long_long_to_str((i), (buffer), sizeof(buffer))
54+
4855
typedef struct type_registry_t {
4956
PyObject* encoder_map;
5057
PyObject* decoder_map;

doc/contributors.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,4 @@ The following is a list of people who have contributed to
9494
- Arie Bovenberg (ariebovenberg)
9595
- Ben Warner (bcwarner)
9696
- Jean-Christophe Fillion-Robin (jcfr)
97+
- Sean Cheah (thalassemia)

pymongo/_cmessagemodule.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -767,8 +767,11 @@ _batched_write_command(
767767
int cur_doc_begin;
768768
int cur_size;
769769
int enough_data = 0;
770-
char key[16];
771-
INT2STRING(key, idx);
770+
char key[BUF_SIZE];
771+
int res = LL2STR(key, (long long)idx);
772+
if (res == -1) {
773+
return 0;
774+
}
772775
if (!buffer_write_bytes(buffer, "\x03", 1) ||
773776
!buffer_write_bytes(buffer, key, (int)strlen(key) + 1)) {
774777
goto fail;

setup.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,12 @@ def build_extension(self, ext):
263263
Extension(
264264
"pymongo._cmessage",
265265
include_dirs=["bson"],
266-
sources=["pymongo/_cmessagemodule.c", "bson/buffer.c"],
266+
sources=[
267+
"pymongo/_cmessagemodule.c",
268+
"bson/_cbsonmodule.c",
269+
"bson/time64.c",
270+
"bson/buffer.c",
271+
],
267272
),
268273
]
269274

test/test_bson.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,5 +1310,15 @@ def __int__(self):
13101310
encode({"x": float_ms})
13111311

13121312

1313+
class TestLongLongToString(unittest.TestCase):
1314+
def test_long_long_to_string(self):
1315+
try:
1316+
from bson import _cbson
1317+
1318+
_cbson._test_long_long_to_str()
1319+
except ImportError:
1320+
print("_cbson was not imported. Check compilation logs.")
1321+
1322+
13131323
if __name__ == "__main__":
13141324
unittest.main()

0 commit comments

Comments
 (0)