Skip to content

Commit 9ff10fe

Browse files
committed
Merge remote-tracking branch 'upstream/master' into 26760-converter
2 parents fb694da + ea06f8d commit 9ff10fe

40 files changed

+773
-356
lines changed

.travis.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,5 @@ script:
103103
after_script:
104104
- echo "after_script start"
105105
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
106-
- if [ -e test-data-single.xml ]; then
107-
ci/print_skipped.py test-data-single.xml;
108-
fi
109-
- if [ -e test-data-multiple.xml ]; then
110-
ci/print_skipped.py test-data-multiple.xml;
111-
fi
106+
- ci/print_skipped.py
112107
- echo "after_script done"

asv_bench/asv.conf.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
// `asv` will cache wheels of the recent builds in each
108108
// environment, making them faster to install next time. This is
109109
// number of builds to keep, per environment.
110-
"wheel_cache_size": 8,
110+
"build_cache_size": 8,
111111

112112
// The commits after which the regression search in `asv publish`
113113
// should start looking for regressions. Dictionary whose keys are

asv_bench/benchmarks/index_object.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ def setup(self, N):
196196
self.intv = IntervalIndex.from_arrays(left, right)
197197
self.intv._engine
198198

199+
self.intv2 = IntervalIndex.from_arrays(left + 1, right + 1)
200+
self.intv2._engine
201+
199202
self.left = IntervalIndex.from_breaks(np.arange(N))
200203
self.right = IntervalIndex.from_breaks(np.arange(N - 3, 2 * N - 3))
201204

@@ -208,8 +211,11 @@ def time_is_unique(self, N):
208211
def time_intersection(self, N):
209212
self.left.intersection(self.right)
210213

211-
def time_intersection_duplicate(self, N):
214+
def time_intersection_one_duplicate(self, N):
212215
self.intv.intersection(self.right)
213216

217+
def time_intersection_both_duplicate(self, N):
218+
self.intv.intersection(self.intv2)
219+
214220

215221
from .pandas_vb_common import setup # noqa: F401

ci/azure/posix.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,9 @@ jobs:
8989
# note that this will produce $LASTEXITCODE=1
9090
Write-Error "$($matches[1]) tests failed"
9191
}
92-
displayName: Check for test failures
92+
displayName: 'Check for test failures'
93+
- script: |
94+
export PATH=$HOME/miniconda3/bin:$PATH
95+
source activate pandas-dev
96+
python ci/print_skipped.py
97+
displayName: 'Print skipped tests'

ci/azure/windows.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ jobs:
1818

1919
steps:
2020
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
21-
displayName: Add conda to PATH
21+
displayName: 'Add conda to PATH'
2222
- script: conda update -q -n base conda
2323
displayName: Update conda
2424
- script: conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
25-
displayName: Create anaconda environment
25+
displayName: 'Create anaconda environment'
2626
- script: |
2727
call activate pandas-dev
2828
call conda list
@@ -48,4 +48,9 @@ jobs:
4848
# note that this will produce $LASTEXITCODE=1
4949
Write-Error "$($matches[1]) tests failed"
5050
}
51-
displayName: Check for test failures
51+
displayName: 'Check for test failures'
52+
- script: |
53+
export PATH=$HOME/miniconda3/bin:$PATH
54+
source activate pandas-dev
55+
python ci/print_skipped.py
56+
displayName: 'Print skipped tests'

ci/print_skipped.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python
22

3+
import os
34
import sys
45
import math
56
import xml.etree.ElementTree as et
@@ -36,19 +37,19 @@ def parse_results(filename):
3637
return '\n'.join(skipped)
3738

3839

39-
def main(args):
40+
def main():
41+
test_files = [
42+
'test-data-single.xml',
43+
'test-data-multiple.xml',
44+
'test-data.xml',
45+
]
46+
4047
print('SKIPPED TESTS:')
41-
for fn in args.filename:
42-
print(parse_results(fn))
48+
for fn in test_files:
49+
if os.path.isfile(fn):
50+
print(parse_results(fn))
4351
return 0
4452

4553

46-
def parse_args():
47-
import argparse
48-
parser = argparse.ArgumentParser()
49-
parser.add_argument('filename', nargs='+', help='XUnit file to parse')
50-
return parser.parse_args()
51-
52-
5354
if __name__ == '__main__':
54-
sys.exit(main(parse_args()))
55+
sys.exit(main())

doc/source/reference/indexing.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,13 @@ Conversion
403403
DatetimeIndex.to_series
404404
DatetimeIndex.to_frame
405405

406+
Methods
407+
~~~~~~~
408+
.. autosummary::
409+
:toctree: api/
410+
411+
DatetimeIndex.mean
412+
406413
TimedeltaIndex
407414
--------------
408415
.. autosummary::
@@ -435,6 +442,13 @@ Conversion
435442
TimedeltaIndex.ceil
436443
TimedeltaIndex.to_frame
437444

445+
Methods
446+
~~~~~~~
447+
.. autosummary::
448+
:toctree: api/
449+
450+
TimedeltaIndex.mean
451+
438452
.. currentmodule:: pandas
439453

440454
PeriodIndex

doc/source/user_guide/groupby.rst

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation",
595595
animals.groupby("kind").agg(
596596
min_height=pd.NamedAgg(column='height', aggfunc='min'),
597597
max_height=pd.NamedAgg(column='height', aggfunc='max'),
598-
average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
598+
average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean),
599599
)
600600
601601
@@ -606,7 +606,7 @@ accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation",
606606
animals.groupby("kind").agg(
607607
min_height=('height', 'min'),
608608
max_height=('height', 'max'),
609-
average_weight=('height', np.mean),
609+
average_weight=('weight', np.mean),
610610
)
611611
612612
@@ -630,6 +630,16 @@ requires additional arguments, partially apply them with :meth:`functools.partia
630630
consistent. To ensure consistent ordering, the keys (and so output columns)
631631
will always be sorted for Python 3.5.
632632

633+
Named aggregation is also valid for Series groupby aggregations. In this case there's
634+
no column selection, so the values are just the functions.
635+
636+
.. ipython:: python
637+
638+
animals.groupby("kind").height.agg(
639+
min_height='min',
640+
max_height='max',
641+
)
642+
633643
Applying different functions to DataFrame columns
634644
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
635645

doc/source/whatsnew/v0.25.0.rst

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Groupby Aggregation with Relabeling
2828
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2929

3030
Pandas has added special groupby behavior, known as "named aggregation", for naming the
31-
output columns when applying multiple aggregation functions to specific columns (:issue:`18366`).
31+
output columns when applying multiple aggregation functions to specific columns (:issue:`18366`, :issue:`26512`).
3232

3333
.. ipython:: python
3434
@@ -39,7 +39,7 @@ output columns when applying multiple aggregation functions to specific columns
3939
animals.groupby("kind").agg(
4040
min_height=pd.NamedAgg(column='height', aggfunc='min'),
4141
max_height=pd.NamedAgg(column='height', aggfunc='max'),
42-
average_weight=pd.NamedAgg(column='height', aggfunc=np.mean),
42+
average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean),
4343
)
4444
4545
Pass the desired columns names as the ``**kwargs`` to ``.agg``. The values of ``**kwargs``
@@ -52,12 +52,26 @@ what the arguments to the function are, but plain tuples are accepted as well.
5252
animals.groupby("kind").agg(
5353
min_height=('height', 'min'),
5454
max_height=('height', 'max'),
55-
average_weight=('height', np.mean),
55+
average_weight=('weight', np.mean),
5656
)
5757
5858
Named aggregation is the recommended replacement for the deprecated "dict-of-dicts"
5959
approach to naming the output of column-specific aggregations (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`).
6060

61+
A similar approach is now available for Series groupby objects as well. Because there's no need for
62+
column selection, the values can just be the functions to apply
63+
64+
.. ipython:: python
65+
66+
animals.groupby("kind").height.agg(
67+
min_height="min",
68+
max_height="max",
69+
)
70+
71+
72+
This type of aggregation is the recommended alternative to the deprecated behavior when passing
73+
a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`).
74+
6175
See :ref:`_groupby.aggregate.named` for more.
6276

6377
.. _whatsnew_0250.enhancements.other:
@@ -82,7 +96,9 @@ Other Enhancements
8296
- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
8397
- :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
8498
- :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
85-
-
99+
- Error message for missing required imports now includes the original import error's text (:issue:`23868`)
100+
- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a ``mean`` method (:issue:`24757`)
101+
- :meth:`DataFrame.describe` now formats integer percentiles without decimal point (:issue:`26660`)
86102

87103
.. _whatsnew_0250.api_breaking:
88104

@@ -478,6 +494,8 @@ Other Deprecations
478494
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
479495
- The internal attributes ``_start``, ``_stop`` and ``_step`` attributes of :class:`RangeIndex` have been deprecated.
480496
Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`).
497+
- The :meth:`Series.ftype`, :meth:`Series.ftypes` and :meth:`DataFrame.ftypes` methods are deprecated and will be removed in a future version.
498+
Instead, use :meth:`Series.dtype` and :meth:`DataFrame.dtypes` (:issue:`26705`).
481499

482500

483501
.. _whatsnew_0250.prior_deprecations:
@@ -650,6 +668,8 @@ I/O
650668
- Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`)
651669
- Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`)
652670
- :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`)
671+
- :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`)
672+
- Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`).
653673

654674
Plotting
655675
^^^^^^^^

pandas/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,10 @@
1010
try:
1111
__import__(dependency)
1212
except ImportError as e:
13-
missing_dependencies.append(dependency)
13+
missing_dependencies.append("{0}: {1}".format(dependency, str(e)))
1414

1515
if missing_dependencies:
16-
raise ImportError(
17-
"Missing required dependencies {0}".format(missing_dependencies))
16+
raise ImportError("Unable to import required dependencies:\n" + "\n".join(missing_dependencies))
1817
del hard_dependencies, dependency, missing_dependencies
1918

2019
# numpy compat

pandas/_libs/tslib.pyx

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,15 @@ def ints_to_pydatetime(int64_t[:] arr, object tz=None, object freq=None,
127127
for i in range(n):
128128
value = arr[i]
129129
if value == NPY_NAT:
130-
result[i] = NaT
130+
result[i] = <object>NaT
131131
else:
132132
dt64_to_dtstruct(value, &dts)
133133
result[i] = func_create(value, dts, tz, freq)
134134
elif is_tzlocal(tz):
135135
for i in range(n):
136136
value = arr[i]
137137
if value == NPY_NAT:
138-
result[i] = NaT
138+
result[i] = <object>NaT
139139
else:
140140
# Python datetime objects do not support nanosecond
141141
# resolution (yet, PEP 564). Need to compute new value
@@ -152,7 +152,7 @@ def ints_to_pydatetime(int64_t[:] arr, object tz=None, object freq=None,
152152
for i in range(n):
153153
value = arr[i]
154154
if value == NPY_NAT:
155-
result[i] = NaT
155+
result[i] = <object>NaT
156156
else:
157157
# Adjust datetime64 timestamp, recompute datetimestruct
158158
dt64_to_dtstruct(value + delta, &dts)
@@ -164,7 +164,7 @@ def ints_to_pydatetime(int64_t[:] arr, object tz=None, object freq=None,
164164
for i in range(n):
165165
value = arr[i]
166166
if value == NPY_NAT:
167-
result[i] = NaT
167+
result[i] = <object>NaT
168168
else:
169169
# Adjust datetime64 timestamp, recompute datetimestruct
170170
pos = trans.searchsorted(value, side='right') - 1
@@ -175,7 +175,7 @@ def ints_to_pydatetime(int64_t[:] arr, object tz=None, object freq=None,
175175
for i in range(n):
176176
value = arr[i]
177177
if value == NPY_NAT:
178-
result[i] = NaT
178+
result[i] = <object>NaT
179179
else:
180180
# Adjust datetime64 timestamp, recompute datetimestruct
181181
pos = trans.searchsorted(value, side='right') - 1
@@ -439,11 +439,11 @@ def array_with_unit_to_datetime(ndarray values, object unit,
439439
val = values[i]
440440

441441
if checknull_with_nat(val):
442-
oresult[i] = NaT
442+
oresult[i] = <object>NaT
443443
elif is_integer_object(val) or is_float_object(val):
444444

445445
if val != val or val == NPY_NAT:
446-
oresult[i] = NaT
446+
oresult[i] = <object>NaT
447447
else:
448448
try:
449449
oresult[i] = Timestamp(cast_from_unit(val, unit))
@@ -452,7 +452,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
452452

453453
elif isinstance(val, str):
454454
if len(val) == 0 or val in nat_strings:
455-
oresult[i] = NaT
455+
oresult[i] = <object>NaT
456456

457457
else:
458458
oresult[i] = val
@@ -816,7 +816,7 @@ cdef array_to_datetime_object(ndarray[object] values, str errors,
816816
check_dts_bounds(&dts)
817817
except (ValueError, OverflowError):
818818
if is_coerce:
819-
oresult[i] = NaT
819+
oresult[i] = <object>NaT
820820
continue
821821
if is_raise:
822822
raise

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def ints_to_pytimedelta(int64_t[:] arr, box=False):
117117

118118
value = arr[i]
119119
if value == NPY_NAT:
120-
result[i] = NaT
120+
result[i] = <object>NaT
121121
else:
122122
if box:
123123
result[i] = Timedelta(value)

0 commit comments

Comments
 (0)