Skip to content

Commit 6df3ca9

Browse files
committed
bpo-34454: avoid internally encoding fromisoformat() input to UTF-8
This breaks if a surrogate Unicode code point is used as the separator character in the input string.
1 parent 28853a2 commit 6df3ca9

File tree

1 file changed

+67
-37
lines changed

1 file changed

+67
-37
lines changed

Modules/_datetimemodule.c

Lines changed: 67 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2880,26 +2880,34 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) {
28802880
return NULL;
28812881
}
28822882

2883-
Py_ssize_t len;
2883+
Py_ssize_t len = PyUnicode_GET_LENGTH(dtstr);
2884+
if (len != 10) {
2885+
goto invalid_string_error;
2886+
}
28842887

2885-
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
2888+
PyObject * bytes = PyUnicode_AsASCIIString(dtstr);
2889+
if (bytes == NULL) {
2890+
return NULL;
2891+
}
2892+
const char * p = PyBytes_AS_STRING(bytes);
2893+
Py_DECREF(bytes);
2894+
if (p == NULL) {
2895+
goto invalid_string_error;
2896+
}
28862897

28872898
int year = 0, month = 0, day = 0;
28882899

2889-
int rv;
2890-
if (len == 10) {
2891-
rv = parse_isoformat_date(dt_ptr, &year, &month, &day);
2892-
} else {
2893-
rv = -1;
2894-
}
2900+
int rv = parse_isoformat_date(p, &year, &month, &day);
28952901

28962902
if (rv < 0) {
2897-
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s",
2898-
dt_ptr);
2899-
return NULL;
2903+
goto invalid_string_error;
29002904
}
29012905

29022906
return new_date_subclass_ex(year, month, day, cls);
2907+
2908+
invalid_string_error:
2909+
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", dtstr);
2910+
return NULL;
29032911
}
29042912

29052913

@@ -4848,43 +4856,61 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
48484856
return NULL;
48494857
}
48504858

4851-
Py_ssize_t len;
4852-
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
4853-
const char * p = dt_ptr;
4859+
Py_ssize_t len = PyUnicode_GET_LENGTH(dtstr);
4860+
if (len < 10) {
4861+
goto invalid_string_error;
4862+
}
48544863

48554864
int year = 0, month = 0, day = 0;
48564865
int hour = 0, minute = 0, second = 0, microsecond = 0;
48574866
int tzoffset = 0, tzusec = 0;
4867+
int rv;
4868+
PyObject *substr, *substr_bytes;
4869+
const char * p;
48584870

48594871
// date has a fixed length of 10
4860-
int rv = parse_isoformat_date(p, &year, &month, &day);
4872+
substr = PyUnicode_Substring(dtstr, 0, 10);
4873+
if (substr == NULL) {
4874+
return NULL;
4875+
}
4876+
substr_bytes = PyUnicode_AsASCIIString(substr);
4877+
Py_DECREF(substr);
4878+
if (substr_bytes == NULL) {
4879+
goto invalid_string_error;
4880+
}
4881+
p = PyBytes_AS_STRING(substr_bytes);
4882+
Py_DECREF(substr_bytes);
4883+
if (p == NULL) {
4884+
return NULL;
4885+
}
48614886

4862-
if (!rv && len > 10) {
4863-
// In UTF-8, the length of multi-byte characters is encoded in the MSB
4864-
if ((p[10] & 0x80) == 0) {
4865-
p += 11;
4866-
} else {
4867-
switch(p[10] & 0xf0) {
4868-
case 0xe0:
4869-
p += 13;
4870-
break;
4871-
case 0xf0:
4872-
p += 14;
4873-
break;
4874-
default:
4875-
p += 12;
4876-
break;
4877-
}
4887+
rv = parse_isoformat_date(p, &year, &month, &day);
4888+
if (rv != 0) {
4889+
goto invalid_string_error;
4890+
}
4891+
4892+
if (len > 10) {
4893+
substr = PyUnicode_Substring(dtstr, 11, len);
4894+
if (substr == NULL) {
4895+
return NULL;
4896+
}
4897+
substr_bytes = PyUnicode_AsASCIIString(substr);
4898+
Py_DECREF(substr);
4899+
if (substr_bytes == NULL) {
4900+
goto invalid_string_error;
4901+
}
4902+
p = PyBytes_AS_STRING(substr_bytes);
4903+
Py_DECREF(substr_bytes);
4904+
if (p == NULL) {
4905+
return NULL;
48784906
}
48794907

4880-
len -= (p - dt_ptr);
4881-
rv = parse_isoformat_time(p, len,
4908+
rv = parse_isoformat_time(p, len - 11,
48824909
&hour, &minute, &second, &microsecond,
48834910
&tzoffset, &tzusec);
4884-
}
4885-
if (rv < 0) {
4886-
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr);
4887-
return NULL;
4911+
if (rv < 0) {
4912+
goto invalid_string_error;
4913+
}
48884914
}
48894915

48904916
PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec);
@@ -4897,6 +4923,10 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
48974923

48984924
Py_DECREF(tzinfo);
48994925
return dt;
4926+
4927+
invalid_string_error:
4928+
PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %R", dtstr);
4929+
return NULL;
49004930
}
49014931

49024932

0 commit comments

Comments
 (0)