Skip to content

ENH: support non-nano in Localizer #47246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
return obj

cdef:
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, NPY_FR_ns)

# Infer fold from offset-adjusted obj.value
# see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
Expand Down Expand Up @@ -584,7 +584,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
cdef:
int64_t local_val
Py_ssize_t outpos = -1
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, NPY_FR_ns)

assert obj.tzinfo is None

Expand Down
7 changes: 4 additions & 3 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1988,10 +1988,11 @@ default 'raise'
ambiguous = [ambiguous]
value = tz_localize_to_utc_single(self.value, tz,
ambiguous=ambiguous,
nonexistent=nonexistent)
nonexistent=nonexistent,
reso=self._reso)
elif tz is None:
# reset tz
value = tz_convert_from_utc_single(self.value, self.tz)
value = tz_convert_from_utc_single(self.value, self.tz, reso=self._reso)

else:
raise TypeError(
Expand Down Expand Up @@ -2139,7 +2140,7 @@ default 'raise'
fold = self.fold

if tzobj is not None:
value = tz_convert_from_utc_single(value, tzobj)
value = tz_convert_from_utc_single(value, tzobj, reso=self._reso)

# setup components
dt64_to_dtstruct(value, &dts)
Expand Down
11 changes: 9 additions & 2 deletions pandas/_libs/tslibs/tzconversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,25 @@ from numpy cimport (
ndarray,
)

from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT


cpdef int64_t tz_convert_from_utc_single(
int64_t utc_val, tzinfo tz
int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=*
) except? -1
cdef int64_t tz_localize_to_utc_single(
int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=*
int64_t val,
tzinfo tz,
object ambiguous=*,
object nonexistent=*,
NPY_DATETIMEUNIT reso=*,
) except? -1


cdef class Localizer:
cdef:
tzinfo tz
NPY_DATETIMEUNIT _reso
bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz
ndarray trans
Py_ssize_t ntrans
Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/tslibs/tzconversion.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ import numpy as np
from pandas._typing import npt

# tz_convert_from_utc_single exposed for testing
def tz_convert_from_utc_single(val: np.int64, tz: tzinfo) -> np.int64: ...
def tz_convert_from_utc_single(
val: np.int64, tz: tzinfo, reso: int = ...
) -> np.int64: ...
def tz_localize_to_utc(
vals: npt.NDArray[np.int64],
tz: tzinfo | None,
ambiguous: str | bool | Iterable[bool] | None = ...,
nonexistent: str | timedelta | np.timedelta64 | None = ...,
reso: int = ..., # NPY_DATETIMEUNIT
) -> npt.NDArray[np.int64]: ...
72 changes: 56 additions & 16 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ from pandas._libs.tslibs.ccalendar cimport (
DAY_NANOS,
HOUR_NANOS,
)
from pandas._libs.tslibs.dtypes cimport periods_per_second
from pandas._libs.tslibs.nattype cimport NPY_NAT
from pandas._libs.tslibs.np_datetime cimport (
dt64_to_dtstruct,
NPY_DATETIMEUNIT,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
)
from pandas._libs.tslibs.timezones cimport (
get_dst_info,
Expand All @@ -54,6 +56,7 @@ cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64)
cdef class Localizer:
# cdef:
# tzinfo tz
# NPY_DATETIMEUNIT _reso
# bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz
# ndarray trans
# Py_ssize_t ntrans
Expand All @@ -63,8 +66,9 @@ cdef class Localizer:

@cython.initializedcheck(False)
@cython.boundscheck(False)
def __cinit__(self, tzinfo tz):
def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT reso):
self.tz = tz
self._reso = reso
self.use_utc = self.use_tzlocal = self.use_fixed = False
self.use_dst = self.use_pytz = False
self.ntrans = -1 # placeholder
Expand All @@ -80,19 +84,35 @@ cdef class Localizer:

else:
trans, deltas, typ = get_dst_info(tz)
if reso != NPY_DATETIMEUNIT.NPY_FR_ns:
# NB: using floordiv here is implicitly assuming we will
# never see trans or deltas that are not an integer number
# of seconds.
if reso == NPY_DATETIMEUNIT.NPY_FR_us:
trans = trans // 1_000
deltas = deltas // 1_000
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
trans = trans // 1_000_000
deltas = deltas // 1_000_000
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
trans = trans // 1_000_000_000
deltas = deltas // 1_000_000_000
else:
raise NotImplementedError(reso)

self.trans = trans
self.ntrans = self.trans.shape[0]
self.deltas = deltas

if typ != "pytz" and typ != "dateutil":
# static/fixed; in this case we know that len(delta) == 1
self.use_fixed = True
self.delta = self.deltas[0]
self.delta = deltas[0]
else:
self.use_dst = True
if typ == "pytz":
self.use_pytz = True
self.tdata = <int64_t*>cnp.PyArray_DATA(self.trans)
self.tdata = <int64_t*>cnp.PyArray_DATA(trans)

@cython.boundscheck(False)
cdef inline int64_t utc_val_to_local_val(
Expand All @@ -102,7 +122,7 @@ cdef class Localizer:
return utc_val
elif self.use_tzlocal:
return utc_val + _tz_localize_using_tzinfo_api(
utc_val, self.tz, to_utc=False, fold=fold
utc_val, self.tz, to_utc=False, reso=self._reso, fold=fold
)
elif self.use_fixed:
return utc_val + self.delta
Expand All @@ -117,7 +137,11 @@ cdef class Localizer:


cdef int64_t tz_localize_to_utc_single(
int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None,
int64_t val,
tzinfo tz,
object ambiguous=None,
object nonexistent=None,
NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns,
) except? -1:
"""See tz_localize_to_utc.__doc__"""
cdef:
Expand All @@ -131,7 +155,7 @@ cdef int64_t tz_localize_to_utc_single(
return val

elif is_tzlocal(tz) or is_zoneinfo(tz):
return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True)
return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, reso=reso)

elif is_fixed_offset(tz):
_, deltas, _ = get_dst_info(tz)
Expand All @@ -144,13 +168,19 @@ cdef int64_t tz_localize_to_utc_single(
tz,
ambiguous=ambiguous,
nonexistent=nonexistent,
reso=reso,
)[0]


@cython.boundscheck(False)
@cython.wraparound(False)
def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None,
object nonexistent=None):
def tz_localize_to_utc(
ndarray[int64_t] vals,
tzinfo tz,
object ambiguous=None,
object nonexistent=None,
NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns,
):
"""
Localize tzinfo-naive i8 to given time zone (using pytz). If
there are ambiguities in the values, raise AmbiguousTimeError.
Expand All @@ -177,6 +207,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None,
nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \
timedelta-like}
How to handle non-existent times when converting wall times to UTC
reso : NPY_DATETIMEUNIT, default NPY_FR_ns

Returns
-------
Expand All @@ -196,7 +227,7 @@ timedelta-like}
bint shift_forward = False, shift_backward = False
bint fill_nonexist = False
str stamp
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, reso=reso)

# Vectorized version of DstTzInfo.localize
if info.use_utc:
Expand All @@ -210,7 +241,7 @@ timedelta-like}
if v == NPY_NAT:
result[i] = NPY_NAT
else:
result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True)
result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True, reso=reso)
return result.base # to return underlying ndarray

elif info.use_fixed:
Expand Down Expand Up @@ -512,7 +543,9 @@ cdef ndarray[int64_t] _get_dst_hours(
# ----------------------------------------------------------------------
# Timezone Conversion

cpdef int64_t tz_convert_from_utc_single(int64_t utc_val, tzinfo tz) except? -1:
cpdef int64_t tz_convert_from_utc_single(
int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns
) except? -1:
"""
Convert the val (in i8) from UTC to tz

Expand All @@ -522,13 +555,14 @@ cpdef int64_t tz_convert_from_utc_single(int64_t utc_val, tzinfo tz) except? -1:
----------
utc_val : int64
tz : tzinfo
reso : NPY_DATETIMEUNIT, default NPY_FR_ns

Returns
-------
converted: int64
"""
cdef:
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, reso=reso)
Py_ssize_t pos

# Note: caller is responsible for ensuring utc_val != NPY_NAT
Expand All @@ -538,7 +572,11 @@ cpdef int64_t tz_convert_from_utc_single(int64_t utc_val, tzinfo tz) except? -1:
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
cdef int64_t _tz_localize_using_tzinfo_api(
int64_t val, tzinfo tz, bint to_utc=True, bint* fold=NULL
int64_t val,
tzinfo tz,
bint to_utc=True,
NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns,
bint* fold=NULL,
) except? -1:
"""
Convert the i8 representation of a datetime from a general-case timezone to
Expand All @@ -552,6 +590,7 @@ cdef int64_t _tz_localize_using_tzinfo_api(
tz : tzinfo
to_utc : bint
True if converting _to_ UTC, False if going the other direction.
reso : NPY_DATETIMEUNIT
fold : bint*, default NULL
pointer to fold: whether datetime ends up in a fold or not
after adjustment.
Expand All @@ -571,8 +610,9 @@ cdef int64_t _tz_localize_using_tzinfo_api(
datetime dt
int64_t delta
timedelta td
int64_t pps = periods_per_second(reso)

dt64_to_dtstruct(val, &dts)
pandas_datetime_to_datetimestruct(val, reso, &dts)

# datetime_new is cython-optimized constructor
if not to_utc:
Expand All @@ -590,7 +630,7 @@ cdef int64_t _tz_localize_using_tzinfo_api(
dts.min, dts.sec, dts.us, None)

td = tz.utcoffset(dt)
delta = int(td.total_seconds() * 1_000_000_000)
delta = int(td.total_seconds() * pps)
return delta


Expand Down
12 changes: 6 additions & 6 deletions pandas/_libs/tslibs/vectorized.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def tz_convert_from_utc(ndarray stamps, tzinfo tz):
ndarray[int64]
"""
cdef:
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, reso=NPY_FR_ns)
int64_t utc_val, local_val
Py_ssize_t pos, i, n = stamps.size

Expand Down Expand Up @@ -130,7 +130,7 @@ def ints_to_pydatetime(
ndarray[object] of type specified by box
"""
cdef:
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, reso=NPY_FR_ns)
int64_t utc_val, local_val
Py_ssize_t i, n = stamps.size
Py_ssize_t pos = -1 # unused, avoid not-initialized warning
Expand Down Expand Up @@ -229,7 +229,7 @@ cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts):
def get_resolution(ndarray stamps, tzinfo tz=None) -> Resolution:
# stamps is int64_t, any ndim
cdef:
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, reso=NPY_FR_ns)
int64_t utc_val, local_val
Py_ssize_t i, n = stamps.size
Py_ssize_t pos = -1 # unused, avoid not-initialized warning
Expand Down Expand Up @@ -281,7 +281,7 @@ cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz, NPY_DATETIMEUNI
result : int64 ndarray of converted of normalized nanosecond timestamps
"""
cdef:
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, reso=reso)
int64_t utc_val, local_val, res_val
Py_ssize_t i, n = stamps.size
Py_ssize_t pos = -1 # unused, avoid not-initialized warning
Expand Down Expand Up @@ -328,7 +328,7 @@ def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) -
is_normalized : bool True if all stamps are normalized
"""
cdef:
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, reso=reso)
int64_t utc_val, local_val
Py_ssize_t i, n = stamps.size
Py_ssize_t pos = -1 # unused, avoid not-initialized warning
Expand Down Expand Up @@ -357,7 +357,7 @@ def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) -
def dt64arr_to_periodarr(ndarray stamps, int freq, tzinfo tz):
# stamps is int64_t, arbitrary ndim
cdef:
Localizer info = Localizer(tz)
Localizer info = Localizer(tz, reso=NPY_FR_ns)
Py_ssize_t i, n = stamps.size
Py_ssize_t pos = -1 # unused, avoid not-initialized warning
int64_t utc_val, local_val, res_val
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"_libs.tslibs.tzconversion": {
"pyxfile": "_libs/tslibs/tzconversion",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
},
"_libs.tslibs.vectorized": {"pyxfile": "_libs/tslibs/vectorized"},
"_libs.testing": {"pyxfile": "_libs/testing"},
Expand Down