Skip to content

Commit 2f48e53

Browse files
committed
Fix issue with non-UTF8 separator strings
It is possible to pass a non-UTF-8 string as a separator in datetime.isoformat, but the current implementation starts by decoding to UTF-8, which will fail even for some valid strings. In the special case of non-UTF-8 separators, we take a performance hit by encoding the string as ASCII and replacing any invalid characters with ?.
1 parent 89487f5 commit 2f48e53

File tree

1 file changed

+40
-3
lines changed

1 file changed

+40
-3
lines changed

Modules/_datetimemodule.c

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4848,8 +4848,33 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
48484848
return NULL;
48494849
}
48504850

4851+
const PyObject * dtstr_bytes = NULL;
4852+
unsigned char bytes_needs_decref = 0;
4853+
48514854
Py_ssize_t len;
48524855
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
4856+
4857+
if (dt_ptr == NULL) {
4858+
len = PyUnicode_GET_LENGTH(dtstr);
4859+
if (len == 10) {
4860+
goto invalid_string_error;
4861+
}
4862+
PyErr_Clear();
4863+
4864+
// If the datetime string cannot be encoded as UTF8 because the
4865+
// separator character is an invalid character, this could still
4866+
// be a valid isoformat, so we decode it and ignore.
4867+
dtstr_bytes = PyUnicode_AsEncodedString(dtstr, "ascii", "replace");
4868+
if (dtstr_bytes == NULL) {
4869+
goto finally;
4870+
}
4871+
bytes_needs_decref = 1;
4872+
dt_ptr = PyBytes_AS_STRING(dtstr_bytes);
4873+
if (dt_ptr == NULL) {
4874+
goto finally;
4875+
}
4876+
}
4877+
48534878
const char * p = dt_ptr;
48544879

48554880
int year = 0, month = 0, day = 0;
@@ -4883,20 +4908,32 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
48834908
&tzoffset, &tzusec);
48844909
}
48854910
if (rv < 0) {
4886-
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr);
4887-
return NULL;
4911+
goto invalid_string_error;
48884912
}
48894913

48904914
PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec);
48914915
if (tzinfo == NULL) {
4892-
return NULL;
4916+
goto finally;
48934917
}
48944918

48954919
PyObject *dt = new_datetime_subclass_ex(year, month, day, hour, minute,
48964920
second, microsecond, tzinfo, cls);
48974921

48984922
Py_DECREF(tzinfo);
4923+
if (bytes_needs_decref) {
4924+
Py_DECREF(dtstr_bytes);
4925+
}
48994926
return dt;
4927+
4928+
invalid_string_error:
4929+
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", dtstr);
4930+
4931+
finally:
4932+
if (bytes_needs_decref) {
4933+
Py_DECREF(dtstr_bytes);
4934+
}
4935+
4936+
return NULL;
49004937
}
49014938

49024939

0 commit comments

Comments
 (0)