Skip to content

ENH: date_range support reso keyword #49106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Nov 18, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Other enhancements
- Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
- :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
- :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`)
- :func:`date_range` now supports a ``reso`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49106`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_200.notable_bug_fixes:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT


cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...
def npy_unit_to_abbrev(reso: int) -> str: ...
def get_supported_reso(reso: int) -> int: ...
def abbrev_to_npy_unit(abbrev: str) -> int: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
raise NotImplementedError(unit)


cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev):
cpdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev):
if abbrev == "Y":
return NPY_DATETIMEUNIT.NPY_FR_Y
elif abbrev == "M":
Expand Down
19 changes: 17 additions & 2 deletions pandas/core/arrays/_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def generate_regular_range(
end: Timestamp | Timedelta | None,
periods: int | None,
freq: BaseOffset,
reso: str = "ns",
) -> npt.NDArray[np.intp]:
"""
Generate a range of dates or timestamps with the spans between dates
Expand All @@ -37,14 +38,28 @@ def generate_regular_range(
Number of periods in produced date range.
freq : Tick
Describes space between dates in produced date range.
reso : str, default "ns"
The resolution the output is meant to represent.

Returns
-------
ndarray[np.int64] Representing nanoseconds.
ndarray[np.int64]
Representing the given resolution.
"""
istart = start.value if start is not None else None
iend = end.value if end is not None else None
stride = freq.nanos
freq.nanos # raises if non-fixed frequency
td = Timedelta(freq)
try:
td = td._as_unit( # pyright: ignore[reportGeneralTypeIssues]
reso, round_ok=False
)
except ValueError as err:
raise ValueError(
f"freq={freq} is incompatible with reso={reso}. "
"Use a lower freq or a higher reso instead."
) from err
stride = td.value

if periods is None and istart is not None and iend is not None:
b = istart
Expand Down
27 changes: 23 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
tz_convert_from_utc,
tzconversion,
)
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
from pandas._typing import (
DateTimeErrorChoices,
IntervalClosedType,
Expand Down Expand Up @@ -369,6 +370,8 @@ def _generate_range( # type: ignore[override]
ambiguous: TimeAmbiguous = "raise",
nonexistent: TimeNonexistent = "raise",
inclusive: IntervalClosedType = "both",
*,
reso: str | None = None,
) -> DatetimeArray:

periods = dtl.validate_periods(periods)
Expand All @@ -391,6 +394,17 @@ def _generate_range( # type: ignore[override]
if start is NaT or end is NaT:
raise ValueError("Neither `start` nor `end` can be NaT")

if reso is not None:
if reso not in ["s", "ms", "us", "ns"]:
raise ValueError("'reso' must be one of 's', 'ms', 'us', 'ns'")
else:
reso = "ns"

if start is not None and reso is not None:
start = start._as_unit(reso)
if end is not None and reso is not None:
end = end._as_unit(reso)

left_inclusive, right_inclusive = validate_inclusive(inclusive)
start, end = _maybe_normalize_endpoints(start, end, normalize)
tz = _infer_tz_from_endpoints(start, end, tz)
Expand All @@ -416,7 +430,7 @@ def _generate_range( # type: ignore[override]
end = end.tz_localize(None)

if isinstance(freq, Tick):
i8values = generate_regular_range(start, end, periods, freq)
i8values = generate_regular_range(start, end, periods, freq, reso=reso)
else:
xdr = _generate_range(
start=start, end=end, periods=periods, offset=freq
Expand All @@ -430,8 +444,13 @@ def _generate_range( # type: ignore[override]
if not timezones.is_utc(tz):
# short-circuit tz_localize_to_utc which would make
# an unnecessary copy with UTC but be a no-op.
creso = abbrev_to_npy_unit(reso)
i8values = tzconversion.tz_localize_to_utc(
i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent
i8values,
tz,
ambiguous=ambiguous,
nonexistent=nonexistent,
reso=creso,
)

# i8values is localized datetime64 array -> have to convert
Expand Down Expand Up @@ -466,8 +485,8 @@ def _generate_range( # type: ignore[override]
if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
i8values = i8values[:-1]

dt64_values = i8values.view("datetime64[ns]")
dtype = tz_to_dtype(tz)
dt64_values = i8values.view(f"datetime64[{reso}]")
dtype = tz_to_dtype(tz, unit=reso)
return cls._simple_new(dt64_values, freq=freq, dtype=dtype)

# -----------------------------------------------------------------
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,8 @@ def date_range(
name: Hashable = None,
closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default,
inclusive: IntervalClosedType | None = None,
*,
reso: str | None = None,
**kwargs,
) -> DatetimeIndex:
"""
Expand Down Expand Up @@ -993,6 +995,10 @@ def date_range(
Include boundaries; Whether to set each bound as closed or open.

.. versionadded:: 1.4.0
reso : str, default None
Specify the desired resolution of the result.

.. versionadded:: 2.0.0
**kwargs
For compatibility. Has no effect on the result.

Expand Down Expand Up @@ -1137,6 +1143,7 @@ def date_range(
tz=tz,
normalize=normalize,
inclusive=inclusive,
reso=reso,
**kwargs,
)
return DatetimeIndex._simple_new(dtarr, name=name)
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -1166,3 +1166,33 @@ def test_date_range_with_custom_holidays():
freq=freq,
)
tm.assert_index_equal(result, expected)


class TestDateRangeNonNano:
def test_date_range_reso_validation(self):
msg = "'reso' must be one of 's', 'ms', 'us', 'ns'"
with pytest.raises(ValueError, match=msg):
date_range("2016-01-01", "2016-03-04", periods=3, reso="h")

def test_date_range_freq_higher_than_reso(self):
# freq being higher-resolution than reso is a problem
msg = "Use a lower freq or a higher reso instead"
with pytest.raises(ValueError, match=msg):
# TODO give a more useful or informative message?
date_range("2016-01-01", "2016-01-01 00:00:00.000001", freq="ns", reso="ms")

def test_date_range_non_nano(self):
start = np.datetime64("1066-10-14") # Battle of Hastings
end = np.datetime64("2305-07-13") # Jean-Luc Picard's birthday

dti = date_range(start, end, freq="D", reso="s")
assert dti.freq == "D"
assert dti.dtype == "M8[s]"

exp = np.arange(
start.astype("M8[s]").view("i8"),
(end + 1).astype("M8[s]").view("i8"),
24 * 3600,
).view("M8[s]")

tm.assert_numpy_array_equal(dti.to_numpy(), exp)