@@ -4852,6 +4852,40 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw)
4852
4852
return result ;
4853
4853
}
4854
4854
4855
+ static PyObject *
4856
+ _sanitize_isoformat_str (PyObject * dtstr , int * needs_decref ) {
4857
+ // `fromisoformat` allows surrogate characters in exactly one position,
4858
+ // the separator; to allow datetime_fromisoformat to make the simplifying
4859
+ // assumption that all valid strings can be encoded in UTF-8, this function
4860
+ // replaces any surrogate character separators with `T`.
4861
+ Py_ssize_t len = PyUnicode_GetLength (dtstr );
4862
+ * needs_decref = 0 ;
4863
+ if (len <= 10 || !Py_UNICODE_IS_SURROGATE (PyUnicode_READ_CHAR (dtstr , 10 ))) {
4864
+ return dtstr ;
4865
+ }
4866
+
4867
+ PyObject * left = PyUnicode_Substring (dtstr , 0 , 10 );
4868
+ if (left == NULL ) {
4869
+ return NULL ;
4870
+ }
4871
+
4872
+ PyObject * right = PyUnicode_Substring (dtstr , 11 , len );
4873
+ if (right == NULL ) {
4874
+ Py_DECREF (left );
4875
+ return NULL ;
4876
+ }
4877
+
4878
+ PyObject * str_out = PyUnicode_FromFormat ("%UT%U" , left , right );
4879
+ Py_DECREF (left );
4880
+ Py_DECREF (right );
4881
+ if (str_out == NULL ) {
4882
+ return NULL ;
4883
+ }
4884
+
4885
+ * needs_decref = 1 ;
4886
+ return str_out ;
4887
+ }
4888
+
4855
4889
static PyObject *
4856
4890
datetime_fromisoformat (PyObject * cls , PyObject * dtstr ) {
4857
4891
assert (dtstr != NULL );
@@ -4861,34 +4895,20 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
4861
4895
return NULL ;
4862
4896
}
4863
4897
4864
- const PyObject * dtstr_bytes = NULL ;
4865
- unsigned char bytes_needs_decref = 0 ;
4898
+ int needs_decref = 0 ;
4899
+ dtstr = _sanitize_isoformat_str (dtstr , & needs_decref );
4900
+ if (dtstr == NULL ) {
4901
+ goto error ;
4902
+ }
4866
4903
4867
4904
Py_ssize_t len ;
4868
4905
const char * dt_ptr = PyUnicode_AsUTF8AndSize (dtstr , & len );
4869
4906
4870
4907
if (dt_ptr == NULL ) {
4871
- len = PyUnicode_GET_LENGTH (dtstr );
4872
- if (len == 10 ) {
4873
- goto invalid_string_error ;
4874
- }
4875
- PyErr_Clear ();
4876
-
4877
- // If the datetime string cannot be encoded as UTF8 because the
4878
- // separator character is an invalid character, this could still
4879
- // be a valid isoformat, so we decode it and ignore.
4880
- dtstr_bytes = PyUnicode_AsEncodedString (dtstr , "ascii" , "replace" );
4881
- if (dtstr_bytes == NULL ) {
4882
- goto finally ;
4883
- }
4884
- bytes_needs_decref = 1 ;
4885
- dt_ptr = PyBytes_AS_STRING (dtstr_bytes );
4886
- if (dt_ptr == NULL ) {
4887
- goto finally ;
4888
- }
4908
+ goto invalid_string_error ;
4889
4909
}
4890
4910
4891
- const char * p = dt_ptr ;
4911
+ const char * p = dt_ptr ;
4892
4912
4893
4913
int year = 0 , month = 0 , day = 0 ;
4894
4914
int hour = 0 , minute = 0 , second = 0 , microsecond = 0 ;
@@ -4926,24 +4946,24 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {
4926
4946
4927
4947
PyObject * tzinfo = tzinfo_from_isoformat_results (rv , tzoffset , tzusec );
4928
4948
if (tzinfo == NULL ) {
4929
- goto finally ;
4949
+ goto error ;
4930
4950
}
4931
4951
4932
4952
PyObject * dt = new_datetime_subclass_ex (year , month , day , hour , minute ,
4933
4953
second , microsecond , tzinfo , cls );
4934
4954
4935
4955
Py_DECREF (tzinfo );
4936
- if (bytes_needs_decref ) {
4937
- Py_DECREF (dtstr_bytes );
4956
+ if (needs_decref ) {
4957
+ Py_DECREF (dtstr );
4938
4958
}
4939
4959
return dt ;
4940
4960
4941
4961
invalid_string_error :
4942
4962
PyErr_Format (PyExc_ValueError , "Invalid isoformat string: %R" , dtstr );
4943
4963
4944
- finally :
4945
- if (bytes_needs_decref ) {
4946
- Py_DECREF (dtstr_bytes );
4964
+ error :
4965
+ if (needs_decref ) {
4966
+ Py_DECREF (dtstr );
4947
4967
}
4948
4968
4949
4969
return NULL ;
0 commit comments