Skip to content

Commit a2059dc

Browse files
authored
PYTHON-4663 Fix compatibility with dateutil timezones (mongodb#1812)
1 parent c6967ab commit a2059dc

File tree

9 files changed

+205
-150
lines changed

9 files changed

+205
-150
lines changed

bson/_cbsonmodule.c

Lines changed: 158 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,10 @@ struct module_state {
5353
PyObject* Decimal128;
5454
PyObject* Mapping;
5555
PyObject* DatetimeMS;
56-
PyObject* _min_datetime_ms;
57-
PyObject* _max_datetime_ms;
56+
PyObject* min_datetime;
57+
PyObject* max_datetime;
58+
PyObject* replace_args;
59+
PyObject* replace_kwargs;
5860
PyObject* _type_marker_str;
5961
PyObject* _flags_str;
6062
PyObject* _pattern_str;
@@ -80,6 +82,8 @@ struct module_state {
8082
PyObject* _from_uuid_str;
8183
PyObject* _as_uuid_str;
8284
PyObject* _from_bid_str;
85+
int64_t min_millis;
86+
int64_t max_millis;
8387
};
8488

8589
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
@@ -253,7 +257,7 @@ static PyObject* datetime_from_millis(long long millis) {
253257
* 2. Multiply that by 1000: 253402300799000
254258
* 3. Add in microseconds divided by 1000 253402300799999
255259
*
256-
* (Note: BSON doesn't support microsecond accuracy, hence the rounding.)
260+
* (Note: BSON doesn't support microsecond accuracy, hence the truncation.)
257261
*
258262
* To decode we could do:
259263
* 1. Get seconds: timestamp / 1000: 253402300799
@@ -376,6 +380,118 @@ static int millis_from_datetime_ms(PyObject* dt, long long* out){
376380
return 1;
377381
}
378382

383+
static PyObject* decode_datetime(PyObject* self, long long millis, const codec_options_t* options){
384+
PyObject* naive = NULL;
385+
PyObject* replace = NULL;
386+
PyObject* args = NULL;
387+
PyObject* kwargs = NULL;
388+
PyObject* value = NULL;
389+
struct module_state *state = GETSTATE(self);
390+
if (options->datetime_conversion == DATETIME_MS){
391+
return datetime_ms_from_millis(self, millis);
392+
}
393+
394+
int dt_clamp = options->datetime_conversion == DATETIME_CLAMP;
395+
int dt_auto = options->datetime_conversion == DATETIME_AUTO;
396+
397+
if (dt_clamp || dt_auto){
398+
int64_t min_millis = state->min_millis;
399+
int64_t max_millis = state->max_millis;
400+
int64_t min_millis_offset = 0;
401+
int64_t max_millis_offset = 0;
402+
if (options->tz_aware && options->tzinfo && options->tzinfo != Py_None) {
403+
PyObject* utcoffset = PyObject_CallMethodObjArgs(options->tzinfo, state->_utcoffset_str, state->min_datetime, NULL);
404+
if (utcoffset == NULL) {
405+
return 0;
406+
}
407+
if (utcoffset != Py_None) {
408+
if (!PyDelta_Check(utcoffset)) {
409+
PyObject* BSONError = _error("BSONError");
410+
if (BSONError) {
411+
PyErr_SetString(BSONError, "tzinfo.utcoffset() did not return a datetime.timedelta");
412+
Py_DECREF(BSONError);
413+
}
414+
Py_DECREF(utcoffset);
415+
return 0;
416+
}
417+
min_millis_offset = (PyDateTime_DELTA_GET_DAYS(utcoffset) * 86400 +
418+
PyDateTime_DELTA_GET_SECONDS(utcoffset)) * 1000 +
419+
(PyDateTime_DELTA_GET_MICROSECONDS(utcoffset) / 1000);
420+
}
421+
Py_DECREF(utcoffset);
422+
utcoffset = PyObject_CallMethodObjArgs(options->tzinfo, state->_utcoffset_str, state->max_datetime, NULL);
423+
if (utcoffset == NULL) {
424+
return 0;
425+
}
426+
if (utcoffset != Py_None) {
427+
if (!PyDelta_Check(utcoffset)) {
428+
PyObject* BSONError = _error("BSONError");
429+
if (BSONError) {
430+
PyErr_SetString(BSONError, "tzinfo.utcoffset() did not return a datetime.timedelta");
431+
Py_DECREF(BSONError);
432+
}
433+
Py_DECREF(utcoffset);
434+
return 0;
435+
}
436+
max_millis_offset = (PyDateTime_DELTA_GET_DAYS(utcoffset) * 86400 +
437+
PyDateTime_DELTA_GET_SECONDS(utcoffset)) * 1000 +
438+
(PyDateTime_DELTA_GET_MICROSECONDS(utcoffset) / 1000);
439+
}
440+
Py_DECREF(utcoffset);
441+
}
442+
if (min_millis_offset < 0) {
443+
min_millis -= min_millis_offset;
444+
}
445+
446+
if (max_millis_offset > 0) {
447+
max_millis -= max_millis_offset;
448+
}
449+
450+
if (dt_clamp) {
451+
if (millis < min_millis) {
452+
millis = min_millis;
453+
} else if (millis > max_millis) {
454+
millis = max_millis;
455+
}
456+
// Continues from here to return a datetime.
457+
} else { // dt_auto
458+
if (millis < min_millis || millis > max_millis){
459+
return datetime_ms_from_millis(self, millis);
460+
}
461+
}
462+
}
463+
464+
naive = datetime_from_millis(millis);
465+
if (!naive) {
466+
goto invalid;
467+
}
468+
469+
if (!options->tz_aware) { /* In the naive case, we're done here. */
470+
return naive;
471+
}
472+
replace = PyObject_GetAttr(naive, state->_replace_str);
473+
if (!replace) {
474+
goto invalid;
475+
}
476+
value = PyObject_Call(replace, state->replace_args, state->replace_kwargs);
477+
if (!value) {
478+
goto invalid;
479+
}
480+
481+
/* convert to local time */
482+
if (options->tzinfo != Py_None) {
483+
PyObject* temp = PyObject_CallMethodObjArgs(value, state->_astimezone_str, options->tzinfo, NULL);
484+
Py_DECREF(value);
485+
value = temp;
486+
}
487+
invalid:
488+
Py_XDECREF(naive);
489+
Py_XDECREF(replace);
490+
Py_XDECREF(args);
491+
Py_XDECREF(kwargs);
492+
return value;
493+
}
494+
379495
/* Just make this compatible w/ the old API. */
380496
int buffer_write_bytes(buffer_t buffer, const char* data, int size) {
381497
if (pymongo_buffer_write(buffer, data, size)) {
@@ -482,6 +598,8 @@ static int _load_python_objects(PyObject* module) {
482598
PyObject* empty_string = NULL;
483599
PyObject* re_compile = NULL;
484600
PyObject* compiled = NULL;
601+
PyObject* min_datetime_ms = NULL;
602+
PyObject* max_datetime_ms = NULL;
485603
struct module_state *state = GETSTATE(module);
486604
if (!state) {
487605
return 1;
@@ -530,10 +648,34 @@ static int _load_python_objects(PyObject* module) {
530648
_load_object(&state->UUID, "uuid", "UUID") ||
531649
_load_object(&state->Mapping, "collections.abc", "Mapping") ||
532650
_load_object(&state->DatetimeMS, "bson.datetime_ms", "DatetimeMS") ||
533-
_load_object(&state->_min_datetime_ms, "bson.datetime_ms", "_min_datetime_ms") ||
534-
_load_object(&state->_max_datetime_ms, "bson.datetime_ms", "_max_datetime_ms")) {
651+
_load_object(&min_datetime_ms, "bson.datetime_ms", "_MIN_UTC_MS") ||
652+
_load_object(&max_datetime_ms, "bson.datetime_ms", "_MAX_UTC_MS") ||
653+
_load_object(&state->min_datetime, "bson.datetime_ms", "_MIN_UTC") ||
654+
_load_object(&state->max_datetime, "bson.datetime_ms", "_MAX_UTC")) {
655+
return 1;
656+
}
657+
658+
state->min_millis = PyLong_AsLongLong(min_datetime_ms);
659+
state->max_millis = PyLong_AsLongLong(max_datetime_ms);
660+
Py_DECREF(min_datetime_ms);
661+
Py_DECREF(max_datetime_ms);
662+
if ((state->min_millis == -1 || state->max_millis == -1) && PyErr_Occurred()) {
663+
return 1;
664+
}
665+
666+
/* Speed up datetime.replace(tzinfo=utc) call */
667+
state->replace_args = PyTuple_New(0);
668+
if (!state->replace_args) {
669+
return 1;
670+
}
671+
state->replace_kwargs = PyDict_New();
672+
if (!state->replace_kwargs) {
535673
return 1;
536674
}
675+
if (PyDict_SetItem(state->replace_kwargs, state->_tzinfo_str, state->UTC) == -1) {
676+
return 1;
677+
}
678+
537679
/* Reload our REType hack too. */
538680
empty_string = PyBytes_FromString("");
539681
if (empty_string == NULL) {
@@ -1247,15 +1389,16 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
12471389
return 0;
12481390
if (utcoffset != Py_None) {
12491391
PyObject* result = PyNumber_Subtract(value, utcoffset);
1250-
Py_DECREF(utcoffset);
12511392
if (!result) {
1393+
Py_DECREF(utcoffset);
12521394
return 0;
12531395
}
12541396
millis = millis_from_datetime(result);
12551397
Py_DECREF(result);
12561398
} else {
12571399
millis = millis_from_datetime(value);
12581400
}
1401+
Py_DECREF(utcoffset);
12591402
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x09;
12601403
return buffer_write_int64(buffer, (int64_t)millis);
12611404
} else if (PyObject_TypeCheck(value, state->REType)) {
@@ -2043,11 +2186,6 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
20432186
}
20442187
case 9:
20452188
{
2046-
PyObject* naive;
2047-
PyObject* replace;
2048-
PyObject* args;
2049-
PyObject* kwargs;
2050-
PyObject* astimezone;
20512189
int64_t millis;
20522190
if (max < 8) {
20532191
goto invalid;
@@ -2056,120 +2194,7 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
20562194
millis = (int64_t)BSON_UINT64_FROM_LE(millis);
20572195
*position += 8;
20582196

2059-
if (options->datetime_conversion == DATETIME_MS){
2060-
value = datetime_ms_from_millis(self, millis);
2061-
break;
2062-
}
2063-
2064-
int dt_clamp = options->datetime_conversion == DATETIME_CLAMP;
2065-
int dt_auto = options->datetime_conversion == DATETIME_AUTO;
2066-
2067-
2068-
if (dt_clamp || dt_auto){
2069-
PyObject *min_millis_fn_res;
2070-
PyObject *max_millis_fn_res;
2071-
int64_t min_millis;
2072-
int64_t max_millis;
2073-
2074-
if (options->tz_aware){
2075-
PyObject* tzinfo = options->tzinfo;
2076-
if (tzinfo == Py_None) {
2077-
// Default to UTC.
2078-
tzinfo = state->UTC;
2079-
}
2080-
min_millis_fn_res = PyObject_CallFunctionObjArgs(state->_min_datetime_ms, tzinfo, NULL);
2081-
max_millis_fn_res = PyObject_CallFunctionObjArgs(state->_max_datetime_ms, tzinfo, NULL);
2082-
} else {
2083-
min_millis_fn_res = PyObject_CallObject(state->_min_datetime_ms, NULL);
2084-
max_millis_fn_res = PyObject_CallObject(state->_max_datetime_ms, NULL);
2085-
}
2086-
2087-
if (!min_millis_fn_res || !max_millis_fn_res){
2088-
Py_XDECREF(min_millis_fn_res);
2089-
Py_XDECREF(max_millis_fn_res);
2090-
goto invalid;
2091-
}
2092-
2093-
min_millis = PyLong_AsLongLong(min_millis_fn_res);
2094-
max_millis = PyLong_AsLongLong(max_millis_fn_res);
2095-
2096-
if ((min_millis == -1 || max_millis == -1) && PyErr_Occurred())
2097-
{
2098-
// min/max_millis check
2099-
goto invalid;
2100-
}
2101-
2102-
if (dt_clamp) {
2103-
if (millis < min_millis) {
2104-
millis = min_millis;
2105-
} else if (millis > max_millis) {
2106-
millis = max_millis;
2107-
}
2108-
// Continues from here to return a datetime.
2109-
} else { // dt_auto
2110-
if (millis < min_millis || millis > max_millis){
2111-
value = datetime_ms_from_millis(self, millis);
2112-
break; // Out-of-range so done.
2113-
}
2114-
}
2115-
}
2116-
2117-
naive = datetime_from_millis(millis);
2118-
if (!options->tz_aware) { /* In the naive case, we're done here. */
2119-
value = naive;
2120-
break;
2121-
}
2122-
2123-
if (!naive) {
2124-
goto invalid;
2125-
}
2126-
replace = PyObject_GetAttr(naive, state->_replace_str);
2127-
Py_DECREF(naive);
2128-
if (!replace) {
2129-
goto invalid;
2130-
}
2131-
args = PyTuple_New(0);
2132-
if (!args) {
2133-
Py_DECREF(replace);
2134-
goto invalid;
2135-
}
2136-
kwargs = PyDict_New();
2137-
if (!kwargs) {
2138-
Py_DECREF(replace);
2139-
Py_DECREF(args);
2140-
goto invalid;
2141-
}
2142-
if (PyDict_SetItem(kwargs, state->_tzinfo_str, state->UTC) == -1) {
2143-
Py_DECREF(replace);
2144-
Py_DECREF(args);
2145-
Py_DECREF(kwargs);
2146-
goto invalid;
2147-
}
2148-
value = PyObject_Call(replace, args, kwargs);
2149-
if (!value) {
2150-
Py_DECREF(replace);
2151-
Py_DECREF(args);
2152-
Py_DECREF(kwargs);
2153-
goto invalid;
2154-
}
2155-
2156-
/* convert to local time */
2157-
if (options->tzinfo != Py_None) {
2158-
astimezone = PyObject_GetAttr(value, state->_astimezone_str);
2159-
Py_DECREF(value);
2160-
if (!astimezone) {
2161-
Py_DECREF(replace);
2162-
Py_DECREF(args);
2163-
Py_DECREF(kwargs);
2164-
goto invalid;
2165-
}
2166-
value = PyObject_CallFunctionObjArgs(astimezone, options->tzinfo, NULL);
2167-
Py_DECREF(astimezone);
2168-
}
2169-
2170-
Py_DECREF(replace);
2171-
Py_DECREF(args);
2172-
Py_DECREF(kwargs);
2197+
value = decode_datetime(self, millis, options);
21732198
break;
21742199
}
21752200
case 11:
@@ -3053,6 +3078,10 @@ static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) {
30533078
Py_VISIT(state->_from_uuid_str);
30543079
Py_VISIT(state->_as_uuid_str);
30553080
Py_VISIT(state->_from_bid_str);
3081+
Py_VISIT(state->min_datetime);
3082+
Py_VISIT(state->max_datetime);
3083+
Py_VISIT(state->replace_args);
3084+
Py_VISIT(state->replace_kwargs);
30563085
return 0;
30573086
}
30583087

@@ -3097,6 +3126,10 @@ static int _cbson_clear(PyObject *m) {
30973126
Py_CLEAR(state->_from_uuid_str);
30983127
Py_CLEAR(state->_as_uuid_str);
30993128
Py_CLEAR(state->_from_bid_str);
3129+
Py_CLEAR(state->min_datetime);
3130+
Py_CLEAR(state->max_datetime);
3131+
Py_CLEAR(state->replace_args);
3132+
Py_CLEAR(state->replace_kwargs);
31003133
return 0;
31013134
}
31023135

0 commit comments

Comments
 (0)