Skip to content

Commit 0c1872b

Browse files
authored
Merge pull request #65 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 33b2081 + e6bd49f commit 0c1872b

File tree

88 files changed

+2147
-1600
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+2147
-1600
lines changed

asv_bench/benchmarks/categoricals.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,6 @@ def setup(self):
258258
def time_get_loc(self):
259259
self.index.get_loc(self.category)
260260

261-
def time_shape(self):
262-
self.index.shape
263-
264261
def time_shallow_copy(self):
265262
self.index._shallow_copy()
266263

asv_bench/benchmarks/index_cached_properties.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ class IndexCache:
77

88
params = [
99
[
10+
"CategoricalIndex",
1011
"DatetimeIndex",
1112
"Float64Index",
1213
"IntervalIndex",
@@ -42,6 +43,8 @@ def setup(self, index_type):
4243
self.idx = pd.Float64Index(range(N))
4344
elif index_type == "UInt64Index":
4445
self.idx = pd.UInt64Index(range(N))
46+
elif index_type == "CategoricalIndex":
47+
self.idx = pd.CategoricalIndex(range(N), range(N))
4548
else:
4649
raise ValueError
4750
assert len(self.idx) == N

asv_bench/benchmarks/index_object.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,6 @@ def time_datetime_difference_disjoint(self):
5555
self.datetime_left.difference(self.datetime_right)
5656

5757

58-
class Datetime:
59-
def setup(self):
60-
self.dr = date_range("20000101", freq="D", periods=10000)
61-
62-
def time_is_dates_only(self):
63-
self.dr._is_dates_only
64-
65-
6658
class Range:
6759
def setup(self):
6860
self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)

asv_bench/benchmarks/indexing.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
"""
2+
These benchmarks are for Series and DataFrame indexing methods. For the
3+
lower-level methods directly on Index and subclasses, see index_object.py,
4+
indexing_engine.py, and index_cached.py
5+
"""
16
import warnings
27

38
import numpy as np

asv_bench/benchmarks/period.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,6 @@ def setup(self):
8585
def time_get_loc(self):
8686
self.index.get_loc(self.period)
8787

88-
def time_shape(self):
89-
self.index.shape
90-
9188
def time_shallow_copy(self):
9289
self.index._shallow_copy()
9390

asv_bench/benchmarks/timedelta.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,6 @@ def setup(self):
7373
def time_get_loc(self):
7474
self.index.get_loc(self.timedelta)
7575

76-
def time_shape(self):
77-
self.index.shape
78-
7976
def time_shallow_copy(self):
8077
self.index._shallow_copy()
8178

asv_bench/benchmarks/timeseries.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ def time_to_date(self, index_type):
5757
def time_to_pydatetime(self, index_type):
5858
self.index.to_pydatetime()
5959

60+
def time_is_dates_only(self, index_type):
61+
self.index._is_dates_only
62+
6063

6164
class TzLocalize:
6265

doc/source/ecosystem.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ joining paths, replacing file extensions, and checking if files exist are also a
5656
Statistics and machine learning
5757
-------------------------------
5858

59+
`pandas-tfrecords <https://pypi.org/project/pandas-tfrecords/>`__
60+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
61+
62+
Easy saving pandas dataframe to tensorflow tfrecords format and reading tfrecords to pandas.
63+
5964
`Statsmodels <https://www.statsmodels.org/>`__
6065
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6166

doc/source/getting_started/basics.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,17 @@ of a 1D array of values. It can also be used as a function on regular arrays:
689689
s.value_counts()
690690
pd.value_counts(data)
691691
692+
.. versionadded:: 1.1.0
693+
694+
The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns.
695+
By default all columns are used but a subset can be selected using the ``subset`` argument.
696+
697+
.. ipython:: python
698+
699+
data = {"a": [1, 2, 3, 4], "b": ["x", "x", "y", "y"]}
700+
frame = pd.DataFrame(data)
701+
frame.value_counts()
702+
692703
Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame:
693704

694705
.. ipython:: python

doc/source/reference/frame.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ Computations / descriptive stats
170170
DataFrame.std
171171
DataFrame.var
172172
DataFrame.nunique
173+
DataFrame.value_counts
173174

174175
Reindexing / selection / label manipulation
175176
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/user_guide/timeseries.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,6 +2297,35 @@ To remove time zone information, use ``tz_localize(None)`` or ``tz_convert(None)
22972297
# tz_convert(None) is identical to tz_convert('UTC').tz_localize(None)
22982298
didx.tz_convert('UTC').tz_localize(None)
22992299
2300+
.. _timeseries.fold:
2301+
2302+
Fold
2303+
~~~~
2304+
2305+
.. versionadded:: 1.1.0
2306+
2307+
For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument.
2308+
Due to daylight saving time, one wall clock time can occur twice when shifting
2309+
from summer to winter time; fold describes whether the datetime-like corresponds
2310+
to the first (0) or the second time (1) the wall clock hits the ambiguous time.
2311+
Fold is supported only for constructing from naive ``datetime.datetime``
2312+
(see `datetime documentation <https://docs.python.org/3/library/datetime.html>`__ for details) or from :class:`Timestamp`
2313+
or for constructing from components (see below). Only ``dateutil`` timezones are supported
2314+
(see `dateutil documentation <https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.enfold>`__
2315+
for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz``
2316+
timezones do not support fold (see `pytz documentation <http://pytz.sourceforge.net/index.html>`__
2317+
for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime
2318+
with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely
2319+
on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct
2320+
control over how they are handled.
2321+
2322+
.. ipython:: python
2323+
2324+
pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0),
2325+
tz='dateutil/Europe/London', fold=0)
2326+
pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30,
2327+
tz='dateutil/Europe/London', fold=1)
2328+
23002329
.. _timeseries.timezone_ambiguous:
23012330

23022331
Ambiguous times when localizing

doc/source/whatsnew/v1.0.2.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Fixed regressions
2323
- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`).
2424
- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
2525
- Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`)
26+
- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`)
2627
-
2728

2829
.. ---------------------------------------------------------------------------
@@ -62,6 +63,7 @@ Bug fixes
6263
**Datetimelike**
6364

6465
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`)
66+
- Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`)
6567

6668
**Categorical**
6769

doc/source/whatsnew/v1.1.0.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,28 @@ For example:
3636
ser["2014"]
3737
ser.loc["May 2015"]
3838
39+
.. _whatsnew_110.timestamp_fold_support:
40+
41+
Fold argument support in Timestamp constructor
42+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
43+
44+
:class:`Timestamp:` now supports the keyword-only fold argument according to `PEP 495 <https://www.python.org/dev/peps/pep-0495/#the-fold-attribute>`_ similar to parent ``datetime.datetime`` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to ``dateutil`` timezones as ``pytz`` doesn't support fold.
45+
46+
For example:
47+
48+
.. ipython:: python
49+
50+
ts = pd.Timestamp("2019-10-27 01:30:00+00:00")
51+
ts.fold
52+
53+
.. ipython:: python
54+
55+
ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30,
56+
tz="dateutil/Europe/London", fold=1)
57+
ts
58+
59+
For more on working with fold, see :ref:`Fold subsection <timeseries.fold>` in the user guide.
60+
3961
.. _whatsnew_110.enhancements.other:
4062

4163
Other enhancements
@@ -55,6 +77,7 @@ Other API changes
5577

5678
- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
5779
will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
80+
- Added :meth:`DataFrame.value_counts` (:issue:`5377`)
5881
- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
5982
- ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`)
6083
-
@@ -114,6 +137,7 @@ Datetimelike
114137
- :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`)
115138
- Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`)
116139
- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`)
140+
- :class:`Timestamp` raising confusing error message when year, month or day is missing (:issue:`31200`)
117141

118142
Timedelta
119143
^^^^^^^^^

pandas/_libs/tslib.pyx

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,30 +49,31 @@ from pandas._libs.tslibs.tzconversion cimport (
4949

5050
cdef inline object create_datetime_from_ts(
5151
int64_t value, npy_datetimestruct dts,
52-
object tz, object freq):
52+
object tz, object freq, bint fold):
5353
""" convenience routine to construct a datetime.datetime from its parts """
5454
return datetime(dts.year, dts.month, dts.day, dts.hour,
55-
dts.min, dts.sec, dts.us, tz)
55+
dts.min, dts.sec, dts.us, tz, fold=fold)
5656

5757

5858
cdef inline object create_date_from_ts(
5959
int64_t value, npy_datetimestruct dts,
60-
object tz, object freq):
60+
object tz, object freq, bint fold):
6161
""" convenience routine to construct a datetime.date from its parts """
62+
# GH 25057 add fold argument to match other func_create signatures
6263
return date(dts.year, dts.month, dts.day)
6364

6465

6566
cdef inline object create_time_from_ts(
6667
int64_t value, npy_datetimestruct dts,
67-
object tz, object freq):
68+
object tz, object freq, bint fold):
6869
""" convenience routine to construct a datetime.time from its parts """
69-
return time(dts.hour, dts.min, dts.sec, dts.us, tz)
70+
return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold)
7071

7172

7273
@cython.wraparound(False)
7374
@cython.boundscheck(False)
7475
def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
75-
str box="datetime"):
76+
bint fold=0, str box="datetime"):
7677
"""
7778
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp
7879
@@ -83,6 +84,13 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
8384
convert to this timezone
8485
freq : str/Offset, default None
8586
freq to convert
87+
fold : bint, default is 0
88+
Due to daylight saving time, one wall clock time can occur twice
89+
when shifting from summer to winter time; fold describes whether the
90+
datetime-like corresponds to the first (0) or the second time (1)
91+
the wall clock hits the ambiguous time
92+
93+
.. versionadded:: 1.1.0
8694
box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
8795
If datetime, convert to datetime.datetime
8896
If date, convert to datetime.date
@@ -104,7 +112,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
104112
str typ
105113
int64_t value, delta, local_value
106114
ndarray[object] result = np.empty(n, dtype=object)
107-
object (*func_create)(int64_t, npy_datetimestruct, object, object)
115+
object (*func_create)(int64_t, npy_datetimestruct, object, object, bint)
108116

109117
if box == "date":
110118
assert (tz is None), "tz should be None when converting to date"
@@ -129,7 +137,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
129137
result[i] = <object>NaT
130138
else:
131139
dt64_to_dtstruct(value, &dts)
132-
result[i] = func_create(value, dts, tz, freq)
140+
result[i] = func_create(value, dts, tz, freq, fold)
133141
elif is_tzlocal(tz):
134142
for i in range(n):
135143
value = arr[i]
@@ -141,7 +149,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
141149
# using the i8 representation.
142150
local_value = tz_convert_utc_to_tzlocal(value, tz)
143151
dt64_to_dtstruct(local_value, &dts)
144-
result[i] = func_create(value, dts, tz, freq)
152+
result[i] = func_create(value, dts, tz, freq, fold)
145153
else:
146154
trans, deltas, typ = get_dst_info(tz)
147155

@@ -155,7 +163,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
155163
else:
156164
# Adjust datetime64 timestamp, recompute datetimestruct
157165
dt64_to_dtstruct(value + delta, &dts)
158-
result[i] = func_create(value, dts, tz, freq)
166+
result[i] = func_create(value, dts, tz, freq, fold)
159167

160168
elif typ == 'dateutil':
161169
# no zone-name change for dateutil tzs - dst etc
@@ -168,7 +176,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
168176
# Adjust datetime64 timestamp, recompute datetimestruct
169177
pos = trans.searchsorted(value, side='right') - 1
170178
dt64_to_dtstruct(value + deltas[pos], &dts)
171-
result[i] = func_create(value, dts, tz, freq)
179+
result[i] = func_create(value, dts, tz, freq, fold)
172180
else:
173181
# pytz
174182
for i in range(n):
@@ -182,7 +190,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
182190
new_tz = tz._tzinfos[tz._transition_info[pos]]
183191

184192
dt64_to_dtstruct(value + deltas[pos], &dts)
185-
result[i] = func_create(value, dts, new_tz, freq)
193+
result[i] = func_create(value, dts, new_tz, freq, fold)
186194

187195
return result
188196

pandas/_libs/tslibs/conversion.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ cdef class _TSObject:
1212
npy_datetimestruct dts # npy_datetimestruct
1313
int64_t value # numpy dt64
1414
object tzinfo
15+
bint fold
1516

1617

1718
cdef convert_to_tsobject(object ts, object tz, object unit,

0 commit comments

Comments
 (0)