@@ -47,14 +47,18 @@ from numpy cimport (
47
47
)
48
48
49
49
from pandas._libs.missing cimport checknull_with_nat_and_na
50
- from pandas._libs.tslibs.conversion cimport get_datetime64_nanos
50
+ from pandas._libs.tslibs.conversion cimport (
51
+ get_datetime64_nanos,
52
+ parse_pydatetime,
53
+ )
51
54
from pandas._libs.tslibs.dtypes cimport (
52
55
get_supported_reso,
53
56
npy_unit_to_abbrev,
54
57
npy_unit_to_attrname,
55
58
)
56
59
from pandas._libs.tslibs.nattype cimport (
57
60
NPY_NAT,
61
+ c_NaT as NaT,
58
62
c_nat_strings as nat_strings,
59
63
)
60
64
from pandas._libs.tslibs.np_datetime cimport (
@@ -65,7 +69,6 @@ from pandas._libs.tslibs.np_datetime cimport (
65
69
npy_datetimestruct,
66
70
npy_datetimestruct_to_datetime,
67
71
pydate_to_dt64,
68
- pydatetime_to_dt64,
69
72
string_to_dts,
70
73
)
71
74
@@ -82,6 +85,8 @@ from pandas._libs.util cimport (
82
85
83
86
from pandas._libs.tslibs.timestamps import Timestamp
84
87
88
+ from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single
89
+
85
90
cnp.import_array()
86
91
87
92
@@ -314,11 +319,13 @@ def array_strptime(
314
319
Py_ssize_t i, n = len (values)
315
320
npy_datetimestruct dts
316
321
int64_t[::1 ] iresult
317
- object [::1 ] result_timezone
318
322
object val, tz
323
+ bint seen_datetime_offset = False
319
324
bint is_raise = errors== " raise"
320
325
bint is_ignore = errors== " ignore"
321
326
bint is_coerce = errors== " coerce"
327
+ bint is_same_offsets
328
+ set out_tzoffset_vals = set ()
322
329
tzinfo tz_out = None
323
330
bint iso_format = format_is_iso(fmt)
324
331
NPY_DATETIMEUNIT out_bestunit, item_reso
@@ -338,7 +345,6 @@ def array_strptime(
338
345
abbrev = npy_unit_to_abbrev(creso)
339
346
result = np.empty(n, dtype = f" M8[{abbrev}]" )
340
347
iresult = result.view(" i8" )
341
- result_timezone = np.empty(n, dtype = " object" )
342
348
343
349
dts.us = dts.ps = dts.as = 0
344
350
@@ -361,23 +367,18 @@ def array_strptime(
361
367
if infer_reso:
362
368
creso = state.creso
363
369
tz_out = state.process_datetime(val, tz_out, utc)
364
- if isinstance (val, _Timestamp):
365
- val = (< _Timestamp> val)._as_creso(creso)
366
- iresult[i] = val.tz_localize(None )._value
367
- else :
368
- iresult[i] = pydatetime_to_dt64(
369
- val.replace(tzinfo = None ), & dts, reso = creso
370
- )
371
- result_timezone[i] = val.tzinfo
370
+ iresult[i] = parse_pydatetime(val, & dts, state.creso)
372
371
continue
373
372
elif PyDate_Check(val):
373
+ state.found_other = True
374
374
item_reso = NPY_DATETIMEUNIT.NPY_FR_s
375
375
state.update_creso(item_reso)
376
376
if infer_reso:
377
377
creso = state.creso
378
378
iresult[i] = pydate_to_dt64(val, & dts, reso = creso)
379
379
continue
380
380
elif cnp.is_datetime64_object(val):
381
+ state.found_other = True
381
382
item_reso = get_supported_reso(get_datetime64_unit(val))
382
383
state.update_creso(item_reso)
383
384
if infer_reso:
@@ -418,13 +419,17 @@ def array_strptime(
418
419
f" Out of bounds {attrname} timestamp: {val}"
419
420
) from err
420
421
if out_local == 1 :
421
- # Store the out_tzoffset in seconds
422
- # since we store the total_seconds of
423
- # dateutil.tz.tzoffset objects
422
+ nsecs = out_tzoffset * 60
423
+ out_tzoffset_vals.add(nsecs)
424
+ seen_datetime_offset = True
424
425
tz = timezone(timedelta(minutes = out_tzoffset))
425
- result_timezone[i] = tz
426
- out_local = 0
427
- out_tzoffset = 0
426
+ value = tz_localize_to_utc_single(
427
+ value, tz, ambiguous = " raise" , nonexistent = None , creso = creso
428
+ )
429
+ else :
430
+ tz = None
431
+ out_tzoffset_vals.add(" naive" )
432
+ state.found_naive_str = True
428
433
iresult[i] = value
429
434
continue
430
435
@@ -450,14 +455,34 @@ def array_strptime(
450
455
state.update_creso(item_reso)
451
456
if infer_reso:
452
457
creso = state.creso
458
+
453
459
try :
454
460
iresult[i] = npy_datetimestruct_to_datetime(creso, & dts)
455
461
except OverflowError as err:
456
462
attrname = npy_unit_to_attrname[creso]
457
463
raise OutOfBoundsDatetime(
458
464
f" Out of bounds {attrname} timestamp: {val}"
459
465
) from err
460
- result_timezone[i] = tz
466
+
467
+ if tz is not None :
468
+ ival = iresult[i]
469
+ iresult[i] = tz_localize_to_utc_single(
470
+ ival, tz, ambiguous = " raise" , nonexistent = None , creso = creso
471
+ )
472
+ nsecs = (ival - iresult[i])
473
+ if creso == NPY_FR_ns:
474
+ nsecs = nsecs // 10 ** 9
475
+ elif creso == NPY_DATETIMEUNIT.NPY_FR_us:
476
+ nsecs = nsecs // 10 ** 6
477
+ elif creso == NPY_DATETIMEUNIT.NPY_FR_ms:
478
+ nsecs = nsecs // 10 ** 3
479
+
480
+ out_tzoffset_vals.add(nsecs)
481
+ seen_datetime_offset = True
482
+ else :
483
+ state.found_naive_str = True
484
+ tz = None
485
+ out_tzoffset_vals.add(" naive" )
461
486
462
487
except (ValueError , OutOfBoundsDatetime) as ex:
463
488
ex.args = (
@@ -474,7 +499,37 @@ def array_strptime(
474
499
continue
475
500
elif is_raise:
476
501
raise
477
- return values, []
502
+ return values, None
503
+
504
+ if seen_datetime_offset and not utc:
505
+ is_same_offsets = len (out_tzoffset_vals) == 1
506
+ if not is_same_offsets or (state.found_naive or state.found_other):
507
+ result2 = _array_strptime_object_fallback(
508
+ values, fmt = fmt, exact = exact, errors = errors, utc = utc
509
+ )
510
+ return result2, None
511
+ elif tz_out is not None :
512
+ # GH#55693
513
+ tz_offset = out_tzoffset_vals.pop()
514
+ tz_out2 = timezone(timedelta(seconds = tz_offset))
515
+ if not tz_compare(tz_out, tz_out2):
516
+ # e.g. test_to_datetime_mixed_offsets_with_utc_false_deprecated
517
+ result2 = _array_strptime_object_fallback(
518
+ values, fmt = fmt, exact = exact, errors = errors, utc = utc
519
+ )
520
+ return result2, None
521
+ # e.g. test_guess_datetime_format_with_parseable_formats
522
+ else :
523
+ # e.g. test_to_datetime_iso8601_with_timezone_valid
524
+ tz_offset = out_tzoffset_vals.pop()
525
+ tz_out = timezone(timedelta(seconds = tz_offset))
526
+ elif not utc:
527
+ if tz_out and (state.found_other or state.found_naive_str):
528
+ # found_other indicates a tz-naive int, float, dt64, or date
529
+ result2 = _array_strptime_object_fallback(
530
+ values, fmt = fmt, exact = exact, errors = errors, utc = utc
531
+ )
532
+ return result2, None
478
533
479
534
if infer_reso:
480
535
if state.creso_ever_changed:
@@ -488,7 +543,6 @@ def array_strptime(
488
543
utc = utc,
489
544
creso = state.creso,
490
545
)
491
-
492
546
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
493
547
# i.e. we never encountered anything non-NaT, default to "s". This
494
548
# ensures that insert and concat-like operations with NaT
@@ -499,7 +553,7 @@ def array_strptime(
499
553
# a second pass.
500
554
abbrev = npy_unit_to_abbrev(state.creso)
501
555
result = iresult.base.view(f" M8[{abbrev}]" )
502
- return result, result_timezone.base
556
+ return result, tz_out
503
557
504
558
505
559
cdef tzinfo _parse_with_format(
@@ -737,6 +791,157 @@ cdef tzinfo _parse_with_format(
737
791
return tz
738
792
739
793
794
+ def _array_strptime_object_fallback (
795
+ ndarray[object] values ,
796
+ str fmt ,
797
+ bint exact = True ,
798
+ errors = " raise" ,
799
+ bint utc = False ,
800
+ ):
801
+
802
+ cdef:
803
+ Py_ssize_t i, n = len (values)
804
+ npy_datetimestruct dts
805
+ int64_t iresult
806
+ object val
807
+ tzinfo tz
808
+ bint is_raise = errors== " raise"
809
+ bint is_ignore = errors== " ignore"
810
+ bint is_coerce = errors== " coerce"
811
+ bint iso_format = format_is_iso(fmt)
812
+ NPY_DATETIMEUNIT creso, out_bestunit, item_reso
813
+ int out_local = 0 , out_tzoffset = 0
814
+ bint string_to_dts_succeeded = 0
815
+
816
+ assert is_raise or is_ignore or is_coerce
817
+
818
+ item_reso = NPY_DATETIMEUNIT.NPY_FR_GENERIC
819
+ format_regex, locale_time = _get_format_regex(fmt)
820
+
821
+ result = np.empty(n, dtype = object )
822
+
823
+ dts.us = dts.ps = dts.as = 0
824
+
825
+ for i in range (n):
826
+ val = values[i]
827
+ try :
828
+ if isinstance (val, str ):
829
+ if len (val) == 0 or val in nat_strings:
830
+ result[i] = NaT
831
+ continue
832
+ elif checknull_with_nat_and_na(val):
833
+ result[i] = NaT
834
+ continue
835
+ elif PyDateTime_Check(val):
836
+ result[i] = Timestamp(val)
837
+ continue
838
+ elif PyDate_Check(val):
839
+ result[i] = Timestamp(val)
840
+ continue
841
+ elif cnp.is_datetime64_object(val):
842
+ result[i] = Timestamp(val)
843
+ continue
844
+ elif (
845
+ (is_integer_object(val) or is_float_object(val))
846
+ and (val != val or val == NPY_NAT)
847
+ ):
848
+ result[i] = NaT
849
+ continue
850
+ else :
851
+ val = str (val)
852
+
853
+ if fmt == " ISO8601" :
854
+ string_to_dts_succeeded = not string_to_dts(
855
+ val, & dts, & out_bestunit, & out_local,
856
+ & out_tzoffset, False , None , False
857
+ )
858
+ elif iso_format:
859
+ string_to_dts_succeeded = not string_to_dts(
860
+ val, & dts, & out_bestunit, & out_local,
861
+ & out_tzoffset, False , fmt, exact
862
+ )
863
+ if string_to_dts_succeeded:
864
+ # No error reported by string_to_dts, pick back up
865
+ # where we left off
866
+ creso = get_supported_reso(out_bestunit)
867
+ try :
868
+ value = npy_datetimestruct_to_datetime(creso, & dts)
869
+ except OverflowError as err:
870
+ raise OutOfBoundsDatetime(
871
+ f" Out of bounds nanosecond timestamp: {val}"
872
+ ) from err
873
+ if out_local == 1 :
874
+ tz = timezone(timedelta(minutes = out_tzoffset))
875
+ value = tz_localize_to_utc_single(
876
+ value, tz, ambiguous = " raise" , nonexistent = None , creso = creso
877
+ )
878
+ else :
879
+ tz = None
880
+ ts = Timestamp._from_value_and_reso(value, creso, tz)
881
+ result[i] = ts
882
+ continue
883
+
884
+ if parse_today_now(val, & iresult, utc, NPY_FR_ns):
885
+ result[i] = Timestamp(val)
886
+ continue
887
+
888
+ # Some ISO formats can't be parsed by string_to_dts
889
+ # For example, 6-digit YYYYMD. So, if there's an error, and a format
890
+ # was specified, then try the string-matching code below. If the format
891
+ # specified was 'ISO8601', then we need to error, because
892
+ # only string_to_dts handles mixed ISO8601 formats.
893
+ if not string_to_dts_succeeded and fmt == " ISO8601" :
894
+ raise ValueError (f" Time data {val} is not ISO8601 format" )
895
+
896
+ tz = _parse_with_format(
897
+ val, fmt, exact, format_regex, locale_time, & dts, & item_reso
898
+ )
899
+ try :
900
+ iresult = npy_datetimestruct_to_datetime(item_reso, & dts)
901
+ except OverflowError as err:
902
+ raise OutOfBoundsDatetime(
903
+ f" Out of bounds nanosecond timestamp: {val}"
904
+ ) from err
905
+ if tz is not None :
906
+ iresult = tz_localize_to_utc_single(
907
+ iresult, tz, ambiguous = " raise" , nonexistent = None , creso = item_reso
908
+ )
909
+ ts = Timestamp._from_value_and_reso(iresult, item_reso, tz)
910
+ result[i] = ts
911
+
912
+ except (ValueError , OutOfBoundsDatetime) as ex:
913
+ ex.args = (
914
+ f" {str(ex)}, at position {i}. You might want to try:\n "
915
+ " - passing `format` if your strings have a consistent format;\n "
916
+ " - passing `format='ISO8601'` if your strings are "
917
+ " all ISO8601 but not necessarily in exactly the same format;\n "
918
+ " - passing `format='mixed'`, and the format will be "
919
+ " inferred for each element individually. "
920
+ " You might want to use `dayfirst` alongside this." ,
921
+ )
922
+ if is_coerce:
923
+ result[i] = NaT
924
+ continue
925
+ elif is_raise:
926
+ raise
927
+ return values
928
+
929
+ import warnings
930
+
931
+ from pandas.util._exceptions import find_stack_level
932
+ warnings.warn(
933
+ " In a future version of pandas, parsing datetimes with mixed time "
934
+ " zones will raise an error unless `utc=True`. Please specify `utc=True` "
935
+ " to opt in to the new behaviour and silence this warning. "
936
+ " To create a `Series` with mixed offsets and `object` dtype, "
937
+ " please use `apply` and `datetime.datetime.strptime`" ,
938
+ FutureWarning ,
939
+ stacklevel = find_stack_level(),
940
+ )
941
+
942
+ return result
943
+
944
+
740
945
class TimeRE (_TimeRE ):
741
946
"""
742
947
Handle conversion from format directives to regexes.
0 commit comments