Skip to content

Commit b043243

Browse files
committed
Merge remote-tracking branch 'upstream/master' into depr-sparse-depr
2 parents 30f3670 + ff4437e commit b043243

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+343
-358
lines changed

asv_bench/benchmarks/io/csv.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import numpy as np
55
import pandas.util.testing as tm
6-
from pandas import DataFrame, Categorical, date_range, read_csv
6+
from pandas import DataFrame, Categorical, date_range, read_csv, to_datetime
77
from pandas.io.parsers import _parser_defaults
88
from io import StringIO
99

@@ -302,7 +302,7 @@ def mem_parser_chunks(self):
302302

303303
class ReadCSVParseSpecialDate(StringIORewind):
304304
params = (['mY', 'mdY', 'hm'],)
305-
params_name = ['value']
305+
param_names = ['value']
306306
objects = {
307307
'mY': '01-2019\n10-2019\n02/2000\n',
308308
'mdY': '12/02/2010\n',
@@ -319,4 +319,29 @@ def time_read_special_date(self, value):
319319
names=['Date'], parse_dates=['Date'])
320320

321321

322+
class ParseDateComparison(StringIORewind):
323+
params = ([False, True],)
324+
param_names = ['cache_dates']
325+
326+
def setup(self, cache_dates):
327+
count_elem = 10000
328+
data = '12-02-2010\n' * count_elem
329+
self.StringIO_input = StringIO(data)
330+
331+
def time_read_csv_dayfirst(self, cache_dates):
332+
read_csv(self.data(self.StringIO_input), sep=',', header=None,
333+
names=['Date'], parse_dates=['Date'], cache_dates=cache_dates,
334+
dayfirst=True)
335+
336+
def time_to_datetime_dayfirst(self, cache_dates):
337+
df = read_csv(self.data(self.StringIO_input),
338+
dtype={'date': str}, names=['date'])
339+
to_datetime(df['date'], cache=cache_dates, dayfirst=True)
340+
341+
def time_to_datetime_format_DD_MM_YYYY(self, cache_dates):
342+
df = read_csv(self.data(self.StringIO_input),
343+
dtype={'date': str}, names=['date'])
344+
to_datetime(df['date'], cache=cache_dates, format='%d-%m-%Y')
345+
346+
322347
from ..pandas_vb_common import setup # noqa: F401

ci/azure/windows.yml

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,10 @@ jobs:
1717
CONDA_PY: "37"
1818

1919
steps:
20-
- task: CondaEnvironment@1
21-
inputs:
22-
updateConda: no
23-
packageSpecs: ''
24-
25-
- script: |
26-
ci\\incremental\\setup_conda_environment.cmd
27-
displayName: 'Before Install'
20+
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
21+
displayName: Add conda to PATH
22+
- script: conda env create --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
23+
displayName: Create anaconda environment
2824
- script: |
2925
call activate pandas-dev
3026
ci\\incremental\\build.cmd

ci/code_checks.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
239239
pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
240240
RET=$(($RET + $?)) ; echo $MSG "DONE"
241241

242+
MSG='Doctests datetimes.py' ; echo $MSG
243+
pytest -q --doctest-modules pandas/core/tools/datetimes.py
244+
RET=$(($RET + $?)) ; echo $MSG "DONE"
245+
242246
MSG='Doctests top-level reshaping functions' ; echo $MSG
243247
pytest -q --doctest-modules \
244248
pandas/core/reshape/concat.py \

ci/incremental/setup_conda_environment.cmd

Lines changed: 0 additions & 23 deletions
This file was deleted.

doc/source/whatsnew/v0.25.0.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,37 @@ Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
154154
cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
155155

156156

157+
``DataFrame`` groupby ffill/bfill no longer return group labels
158+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
159+
160+
The methods ``ffill``, ``bfill``, ``pad`` and ``backfill`` of
161+
:class:`DataFrameGroupBy <pandas.core.groupby.DataFrameGroupBy>`
162+
previously included the group labels in the return value, which was
163+
inconsistent with other groupby transforms. Now only the filled values
164+
are returned. (:issue:`21521`)
165+
166+
.. ipython:: python
167+
168+
df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
169+
df
170+
171+
*Previous Behaviour*:
172+
173+
.. code-block:: python
174+
175+
In [3]: df.groupby("a").ffill()
176+
Out[3]:
177+
a b
178+
0 x 1
179+
1 y 2
180+
181+
*New Behaviour*:
182+
183+
.. ipython:: python
184+
185+
df.groupby("a").ffill()
186+
187+
157188
.. _whatsnew_0250.api_breaking.deps:
158189

159190
Increased minimum versions for dependencies
@@ -299,6 +330,7 @@ Timezones
299330
- Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`)
300331
- Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`)
301332
- Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`)
333+
- Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`)
302334

303335
Numeric
304336
^^^^^^^
@@ -409,6 +441,7 @@ Groupby/Resample/Rolling
409441
- Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`)
410442
- Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`)
411443
- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`)
444+
- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`)
412445

413446

414447
Reshaping

pandas/_libs/parsers.pyx

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,14 @@ import pandas._libs.lib as lib
4141
from pandas._libs.khash cimport (
4242
khiter_t,
4343
kh_str_t, kh_init_str, kh_put_str, kh_exist_str,
44-
kh_get_str, kh_destroy_str, kh_resize_str,
44+
kh_get_str, kh_destroy_str,
4545
kh_float64_t, kh_get_float64, kh_destroy_float64,
4646
kh_put_float64, kh_init_float64, kh_resize_float64,
4747
kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox,
4848
kh_destroy_strbox,
4949
kh_str_starts_t, kh_put_str_starts_item, kh_init_str_starts,
5050
kh_get_str_starts_item, kh_destroy_str_starts, kh_resize_str_starts)
5151

52-
import pandas.compat as compat
5352
from pandas.core.dtypes.common import (
5453
is_categorical_dtype,
5554
is_integer_dtype, is_float_dtype,
@@ -477,14 +476,19 @@ cdef class TextReader:
477476

478477
self.verbose = verbose
479478
self.low_memory = low_memory
480-
self.parser.double_converter_nogil = xstrtod
481-
self.parser.double_converter_withgil = NULL
482-
if float_precision == 'high':
483-
self.parser.double_converter_nogil = precise_xstrtod
484-
self.parser.double_converter_withgil = NULL
485-
elif float_precision == 'round_trip': # avoid gh-15140
479+
480+
if float_precision == "round_trip":
481+
# see gh-15140
482+
#
483+
# Our current roundtrip implementation requires the GIL.
486484
self.parser.double_converter_nogil = NULL
487485
self.parser.double_converter_withgil = round_trip
486+
elif float_precision == "high":
487+
self.parser.double_converter_withgil = NULL
488+
self.parser.double_converter_nogil = precise_xstrtod
489+
else:
490+
self.parser.double_converter_withgil = NULL
491+
self.parser.double_converter_nogil = xstrtod
488492

489493
if isinstance(dtype, dict):
490494
dtype = {k: pandas_dtype(dtype[k])

pandas/compat/__init__.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
55
Cross-compatible functions for different versions of Python.
66
7-
Key items to import for compatible code:
8-
* lists: lrange()
9-
107
Other items:
118
* platform checker
129
"""
@@ -19,11 +16,6 @@
1916
PYPY = platform.python_implementation() == 'PyPy'
2017

2118

22-
# list-producing versions of the major Python iterating functions
23-
def lrange(*args, **kwargs):
24-
return list(range(*args, **kwargs))
25-
26-
2719
# ----------------------------------------------------------------------------
2820
# functions largely based / taken from the six module
2921

pandas/core/groupby/generic.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -916,8 +916,12 @@ def transform(self, func, *args, **kwargs):
916916
s = klass(res, indexer)
917917
results.append(s)
918918

919-
from pandas.core.reshape.concat import concat
920-
result = concat(results).sort_index()
919+
# check for empty "results" to avoid concat ValueError
920+
if results:
921+
from pandas.core.reshape.concat import concat
922+
result = concat(results).sort_index()
923+
else:
924+
result = Series()
921925

922926
# we will only try to coerce the result type if
923927
# we have a numeric dtype, as these are *always* udfs
@@ -1480,15 +1484,6 @@ def _apply_to_column_groupbys(self, func):
14801484
in self._iterate_column_groupbys()),
14811485
keys=self._selected_obj.columns, axis=1)
14821486

1483-
def _fill(self, direction, limit=None):
1484-
"""Overridden method to join grouped columns in output"""
1485-
res = super()._fill(direction, limit=limit)
1486-
output = OrderedDict(
1487-
(grp.name, grp.grouper) for grp in self.grouper.groupings)
1488-
1489-
from pandas import concat
1490-
return concat((self._wrap_transformed_output(output), res), axis=1)
1491-
14921487
def count(self):
14931488
"""
14941489
Compute count of group, excluding missing values.

pandas/core/groupby/groupby.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2235,7 +2235,6 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
22352235
limit=limit, freq=freq,
22362236
axis=axis))
22372237
filled = getattr(self, fill_method)(limit=limit)
2238-
filled = filled.drop(self.grouper.names, axis=1)
22392238
fill_grp = filled.groupby(self.grouper.labels)
22402239
shifted = fill_grp.shift(periods=periods, freq=freq)
22412240
return (filled / shifted) - 1

pandas/core/reshape/merge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1674,8 +1674,8 @@ def _right_outer_join(x, y, max_groups):
16741674
def _factorize_keys(lk, rk, sort=True):
16751675
# Some pre-processing for non-ndarray lk / rk
16761676
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
1677-
lk = lk._data
1678-
rk = rk._data
1677+
lk = getattr(lk, '_values', lk)._data
1678+
rk = getattr(rk, '_values', rk)._data
16791679

16801680
elif (is_categorical_dtype(lk) and
16811681
is_categorical_dtype(rk) and

pandas/core/tools/datetimes.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
525525
'ms', 'us', 'ns']) or plurals of the same
526526
527527
>>> df = pd.DataFrame({'year': [2015, 2016],
528-
'month': [2, 3],
529-
'day': [4, 5]})
528+
... 'month': [2, 3],
529+
... 'day': [4, 5]})
530530
>>> pd.to_datetime(df)
531531
0 2015-02-04
532532
1 2016-03-05
@@ -548,8 +548,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
548548
Passing infer_datetime_format=True can often-times speedup a parsing
549549
if its not an ISO8601 format exactly, but in a regular format.
550550
551-
>>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000']*1000)
552-
551+
>>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
553552
>>> s.head()
554553
0 3/11/2000
555554
1 3/12/2000
@@ -558,10 +557,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
558557
4 3/12/2000
559558
dtype: object
560559
561-
>>> %timeit pd.to_datetime(s,infer_datetime_format=True)
560+
>>> %timeit pd.to_datetime(s,infer_datetime_format=True) # doctest: +SKIP
562561
100 loops, best of 3: 10.4 ms per loop
563562
564-
>>> %timeit pd.to_datetime(s,infer_datetime_format=False)
563+
>>> %timeit pd.to_datetime(s,infer_datetime_format=False) # doctest: +SKIP
565564
1 loop, best of 3: 471 ms per loop
566565
567566
Using a unix epoch time
@@ -577,10 +576,9 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
577576
Using a non-unix epoch origin
578577
579578
>>> pd.to_datetime([1, 2, 3], unit='D',
580-
origin=pd.Timestamp('1960-01-01'))
581-
0 1960-01-02
582-
1 1960-01-03
583-
2 1960-01-04
579+
... origin=pd.Timestamp('1960-01-01'))
580+
DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], \
581+
dtype='datetime64[ns]', freq=None)
584582
"""
585583
if arg is None:
586584
return None

pandas/plotting/_converter.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from pandas._libs import lib, tslibs
1313
from pandas._libs.tslibs import resolution
1414
from pandas._libs.tslibs.frequencies import FreqGroup, get_freq
15-
from pandas.compat import lrange
1615

1716
from pandas.core.dtypes.common import (
1817
is_datetime64_ns_dtype, is_float, is_float_dtype, is_integer,
@@ -1029,7 +1028,7 @@ def __call__(self):
10291028
base = self.base
10301029
(d, m) = divmod(vmin, base)
10311030
vmin = (d + 1) * base
1032-
locs = lrange(vmin, vmax + 1, base)
1031+
locs = list(range(vmin, vmax + 1, base))
10331032
return locs
10341033

10351034
def autoscale(self):

pandas/plotting/_core.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from pandas._config import get_option
1010

11-
from pandas.compat import lrange
1211
from pandas.errors import AbstractMethodError
1312
from pandas.util._decorators import Appender, cache_readonly
1413

@@ -583,9 +582,9 @@ def _get_xticks(self, convert_period=False):
583582
x = self.data.index._mpl_repr()
584583
else:
585584
self._need_to_set_index = True
586-
x = lrange(len(index))
585+
x = list(range(len(index)))
587586
else:
588-
x = lrange(len(index))
587+
x = list(range(len(index)))
589588

590589
return x
591590

pandas/plotting/_misc.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# being a bit too dynamic
22
import numpy as np
33

4-
from pandas.compat import lrange
54
from pandas.util._decorators import deprecate_kwarg
65

76
from pandas.core.dtypes.missing import notna
@@ -81,8 +80,8 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
8180
rdelta_ext = (rmax_ - rmin_) * range_padding / 2.
8281
boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext))
8382

84-
for i, a in zip(lrange(n), df.columns):
85-
for j, b in zip(lrange(n), df.columns):
83+
for i, a in enumerate(df.columns):
84+
for j, b in enumerate(df.columns):
8685
ax = axes[i, j]
8786

8887
if i == j:
@@ -420,7 +419,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
420419
for sampling in samplings])
421420
if fig is None:
422421
fig = plt.figure()
423-
x = lrange(samples)
422+
x = list(range(samples))
424423
axes = []
425424
ax1 = fig.add_subplot(2, 3, 1)
426425
ax1.set_xlabel("Sample")
@@ -532,7 +531,7 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
532531
raise ValueError('Length of xticks must match number of columns')
533532
x = xticks
534533
else:
535-
x = lrange(ncols)
534+
x = list(range(ncols))
536535

537536
if ax is None:
538537
ax = plt.gca()

0 commit comments

Comments
 (0)