Skip to content

Commit bd508de

Browse files
authored
ENH: Allow dt accessor when using ArrowDtype with datetime types (#50954)
* Add methods * Adjust accessor, add tests * finished tests * Whatsnew number * Use hasattr * Fix tests and typing * Fix param * Fix typing * Clarify whatsnew * Only for M
1 parent f9ce6f0 commit bd508de

File tree

6 files changed

+500
-7
lines changed

6 files changed

+500
-7
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ Alternatively, copy on write can be enabled locally through:
264264

265265
Other enhancements
266266
^^^^^^^^^^^^^^^^^^
267+
- Added support for ``dt`` accessor methods when using :class:`ArrowDtype` with a ``pyarrow.timestamp`` type (:issue:`50954`)
267268
- :func:`read_sas` now supports using ``encoding='infer'`` to correctly read and use the encoding specified by the sas file. (:issue:`48048`)
268269
- :meth:`.DataFrameGroupBy.quantile`, :meth:`.SeriesGroupBy.quantile` and :meth:`.DataFrameGroupBy.std` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`)
269270
- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`)

pandas/core/accessor.py

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
"""
77
from __future__ import annotations
88

9-
from typing import final
9+
from typing import (
10+
Callable,
11+
final,
12+
)
1013
import warnings
1114

1215
from pandas.util._decorators import doc
@@ -59,7 +62,13 @@ def _delegate_method(self, name, *args, **kwargs):
5962

6063
@classmethod
6164
def _add_delegate_accessors(
62-
cls, delegate, accessors, typ: str, overwrite: bool = False
65+
cls,
66+
delegate,
67+
accessors: list[str],
68+
typ: str,
69+
overwrite: bool = False,
70+
accessor_mapping: Callable[[str], str] = lambda x: x,
71+
raise_on_missing: bool = True,
6372
) -> None:
6473
"""
6574
Add accessors to cls from the delegate class.
@@ -75,6 +84,11 @@ def _add_delegate_accessors(
7584
typ : {'property', 'method'}
7685
overwrite : bool, default False
7786
Overwrite the method/property in the target class if it exists.
87+
accessor_mapping: Callable, default lambda x: x
88+
Callable to map the delegate's function to the cls' function.
89+
raise_on_missing: bool, default True
90+
Raise if an accessor does not exist on delegate.
91+
False skips the missing accessor.
7892
"""
7993

8094
def _create_delegator_property(name):
@@ -88,20 +102,28 @@ def _setter(self, new_values):
88102
_setter.__name__ = name
89103

90104
return property(
91-
fget=_getter, fset=_setter, doc=getattr(delegate, name).__doc__
105+
fget=_getter,
106+
fset=_setter,
107+
doc=getattr(delegate, accessor_mapping(name)).__doc__,
92108
)
93109

94110
def _create_delegator_method(name):
95111
def f(self, *args, **kwargs):
96112
return self._delegate_method(name, *args, **kwargs)
97113

98114
f.__name__ = name
99-
f.__doc__ = getattr(delegate, name).__doc__
115+
f.__doc__ = getattr(delegate, accessor_mapping(name)).__doc__
100116

101117
return f
102118

103119
for name in accessors:
104120

121+
if (
122+
not raise_on_missing
123+
and getattr(delegate, accessor_mapping(name), None) is None
124+
):
125+
continue
126+
105127
if typ == "property":
106128
f = _create_delegator_property(name)
107129
else:
@@ -112,7 +134,14 @@ def f(self, *args, **kwargs):
112134
setattr(cls, name, f)
113135

114136

115-
def delegate_names(delegate, accessors, typ: str, overwrite: bool = False):
137+
def delegate_names(
138+
delegate,
139+
accessors: list[str],
140+
typ: str,
141+
overwrite: bool = False,
142+
accessor_mapping: Callable[[str], str] = lambda x: x,
143+
raise_on_missing: bool = True,
144+
):
116145
"""
117146
Add delegated names to a class using a class decorator. This provides
118147
an alternative usage to directly calling `_add_delegate_accessors`
@@ -127,6 +156,11 @@ def delegate_names(delegate, accessors, typ: str, overwrite: bool = False):
127156
typ : {'property', 'method'}
128157
overwrite : bool, default False
129158
Overwrite the method/property in the target class if it exists.
159+
accessor_mapping: Callable, default lambda x: x
160+
Callable to map the delegate's function to the cls' function.
161+
raise_on_missing: bool, default True
162+
Raise if an accessor does not exist on delegate.
163+
False skips the missing accessor.
130164
131165
Returns
132166
-------
@@ -141,7 +175,14 @@ class CategoricalAccessor(PandasDelegate):
141175
"""
142176

143177
def add_delegate_accessors(cls):
144-
cls._add_delegate_accessors(delegate, accessors, typ, overwrite=overwrite)
178+
cls._add_delegate_accessors(
179+
delegate,
180+
accessors,
181+
typ,
182+
overwrite=overwrite,
183+
accessor_mapping=accessor_mapping,
184+
raise_on_missing=raise_on_missing,
185+
)
145186
return cls
146187

147188
return add_delegate_accessors

pandas/core/arrays/arrow/array.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
Scalar,
2424
SortKind,
2525
TakeIndexer,
26+
TimeAmbiguous,
27+
TimeNonexistent,
2628
npt,
2729
)
2830
from pandas.compat import (
@@ -53,6 +55,8 @@
5355
validate_indices,
5456
)
5557

58+
from pandas.tseries.frequencies import to_offset
59+
5660
if not pa_version_under7p0:
5761
import pyarrow as pa
5862
import pyarrow.compute as pc
@@ -1413,3 +1417,154 @@ def _replace_with_mask(
14131417
result = np.array(values, dtype=object)
14141418
result[mask] = replacements
14151419
return pa.array(result, type=values.type, from_pandas=True)
1420+
1421+
@property
1422+
def _dt_day(self):
1423+
return type(self)(pc.day(self._data))
1424+
1425+
@property
1426+
def _dt_day_of_week(self):
1427+
return type(self)(pc.day_of_week(self._data))
1428+
1429+
_dt_dayofweek = _dt_day_of_week
1430+
_dt_weekday = _dt_day_of_week
1431+
1432+
@property
1433+
def _dt_day_of_year(self):
1434+
return type(self)(pc.day_of_year(self._data))
1435+
1436+
_dt_dayofyear = _dt_day_of_year
1437+
1438+
@property
1439+
def _dt_hour(self):
1440+
return type(self)(pc.hour(self._data))
1441+
1442+
def _dt_isocalendar(self):
1443+
return type(self)(pc.iso_calendar(self._data))
1444+
1445+
@property
1446+
def _dt_is_leap_year(self):
1447+
return type(self)(pc.is_leap_year(self._data))
1448+
1449+
@property
1450+
def _dt_microsecond(self):
1451+
return type(self)(pc.microsecond(self._data))
1452+
1453+
@property
1454+
def _dt_minute(self):
1455+
return type(self)(pc.minute(self._data))
1456+
1457+
@property
1458+
def _dt_month(self):
1459+
return type(self)(pc.month(self._data))
1460+
1461+
@property
1462+
def _dt_nanosecond(self):
1463+
return type(self)(pc.nanosecond(self._data))
1464+
1465+
@property
1466+
def _dt_quarter(self):
1467+
return type(self)(pc.quarter(self._data))
1468+
1469+
@property
1470+
def _dt_second(self):
1471+
return type(self)(pc.second(self._data))
1472+
1473+
@property
1474+
def _dt_date(self):
1475+
return type(self)(self._data.cast(pa.date64()))
1476+
1477+
@property
1478+
def _dt_time(self):
1479+
unit = (
1480+
self.dtype.pyarrow_dtype.unit
1481+
if self.dtype.pyarrow_dtype.unit in {"us", "ns"}
1482+
else "ns"
1483+
)
1484+
return type(self)(self._data.cast(pa.time64(unit)))
1485+
1486+
@property
1487+
def _dt_tz(self):
1488+
return self.dtype.pyarrow_dtype.tz
1489+
1490+
def _dt_strftime(self, format: str):
1491+
return type(self)(pc.strftime(self._data, format=format))
1492+
1493+
def _round_temporally(
1494+
self,
1495+
method: Literal["ceil", "floor", "round"],
1496+
freq,
1497+
ambiguous: TimeAmbiguous = "raise",
1498+
nonexistent: TimeNonexistent = "raise",
1499+
):
1500+
if ambiguous != "raise":
1501+
raise NotImplementedError("ambiguous is not supported.")
1502+
if nonexistent != "raise":
1503+
raise NotImplementedError("nonexistent is not supported.")
1504+
offset = to_offset(freq)
1505+
if offset is None:
1506+
raise ValueError(f"Must specify a valid frequency: {freq}")
1507+
pa_supported_unit = {
1508+
"A": "year",
1509+
"AS": "year",
1510+
"Q": "quarter",
1511+
"QS": "quarter",
1512+
"M": "month",
1513+
"MS": "month",
1514+
"W": "week",
1515+
"D": "day",
1516+
"H": "hour",
1517+
"T": "minute",
1518+
"S": "second",
1519+
"L": "millisecond",
1520+
"U": "microsecond",
1521+
"N": "nanosecond",
1522+
}
1523+
unit = pa_supported_unit.get(offset._prefix, None)
1524+
if unit is None:
1525+
raise ValueError(f"{freq=} is not supported")
1526+
multiple = offset.n
1527+
rounding_method = getattr(pc, f"{method}_temporal")
1528+
return type(self)(rounding_method(self._data, multiple=multiple, unit=unit))
1529+
1530+
def _dt_ceil(
1531+
self,
1532+
freq,
1533+
ambiguous: TimeAmbiguous = "raise",
1534+
nonexistent: TimeNonexistent = "raise",
1535+
):
1536+
return self._round_temporally("ceil", freq, ambiguous, nonexistent)
1537+
1538+
def _dt_floor(
1539+
self,
1540+
freq,
1541+
ambiguous: TimeAmbiguous = "raise",
1542+
nonexistent: TimeNonexistent = "raise",
1543+
):
1544+
return self._round_temporally("floor", freq, ambiguous, nonexistent)
1545+
1546+
def _dt_round(
1547+
self,
1548+
freq,
1549+
ambiguous: TimeAmbiguous = "raise",
1550+
nonexistent: TimeNonexistent = "raise",
1551+
):
1552+
return self._round_temporally("round", freq, ambiguous, nonexistent)
1553+
1554+
def _dt_tz_localize(
1555+
self,
1556+
tz,
1557+
ambiguous: TimeAmbiguous = "raise",
1558+
nonexistent: TimeNonexistent = "raise",
1559+
):
1560+
if ambiguous != "raise":
1561+
raise NotImplementedError(f"{ambiguous=} is not supported")
1562+
if nonexistent != "raise":
1563+
raise NotImplementedError(f"{nonexistent=} is not supported")
1564+
if tz is None:
1565+
new_type = pa.timestamp(self.dtype.pyarrow_dtype.unit)
1566+
return type(self)(self._data.cast(new_type))
1567+
pa_tz = str(tz)
1568+
return type(self)(
1569+
self._data.cast(pa.timestamp(self.dtype.pyarrow_dtype.unit, pa_tz))
1570+
)

pandas/core/arrays/arrow/dtype.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ def numpy_dtype(self) -> np.dtype:
112112

113113
@cache_readonly
114114
def kind(self) -> str:
115+
if pa.types.is_timestamp(self.pyarrow_dtype):
116+
# To mirror DatetimeTZDtype
117+
return "M"
115118
return self.numpy_dtype.kind
116119

117120
@cache_readonly

0 commit comments

Comments
 (0)