Skip to content

Commit 2837ebd

Browse files
committed
Merge branch 'master' into PR_TOOL_MERGE_PR_18852
2 parents 8e0076d + aa9e002 commit 2837ebd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1905
-1022
lines changed

.travis.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ matrix:
4949
apt:
5050
packages:
5151
- python-gtk2
52+
# In allow_failures
5253
- dist: trusty
5354
env:
5455
- JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true
@@ -76,6 +77,9 @@ matrix:
7677
env:
7778
- JOB="3.6_DOC" DOC=true
7879
allow_failures:
80+
- dist: trusty
81+
env:
82+
- JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true
7983
- dist: trusty
8084
env:
8185
- JOB="2.7_SLOW" SLOW=true
@@ -95,6 +99,9 @@ matrix:
9599

96100
before_install:
97101
- echo "before_install"
102+
# set non-blocking IO on travis
103+
# https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024
104+
- python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);'
98105
- source ci/travis_process_gbq_encryption.sh
99106
- export PATH="$HOME/miniconda3/bin:$PATH"
100107
- df -h

asv_bench/benchmarks/frame_methods.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
55
isnull, NaT)
66

7-
from .pandas_vb_common import setup # noqa
7+
from .pandas_vb_common import setup # noqa
88

99

1010
class GetNumericData(object):
@@ -127,7 +127,7 @@ class ToHTML(object):
127127
def setup(self):
128128
nrows = 500
129129
self.df2 = DataFrame(np.random.randn(nrows, 10))
130-
self.df2[0] = period_range('2000', '2010', nrows)
130+
self.df2[0] = period_range('2000', periods=nrows)
131131
self.df2[1] = range(nrows)
132132

133133
def time_to_html_mixed(self):

asv_bench/benchmarks/groupby.py

Lines changed: 19 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from string import ascii_letters, digits
1+
from string import ascii_letters
22
from itertools import product
33
from functools import partial
44

@@ -275,18 +275,12 @@ class GroupStrings(object):
275275

276276
def setup(self):
277277
n = 2 * 10**5
278-
alpha = list(map(''.join, product((ascii_letters + digits), repeat=4)))
279-
self.df = DataFrame({'a': np.repeat(np.random.choice(alpha,
280-
(n // 11)), 11),
281-
'b': np.repeat(np.random.choice(alpha,
282-
(n // 7)), 7),
283-
'c': np.repeat(np.random.choice(alpha,
284-
(n // 5)), 5),
285-
'd': np.repeat(np.random.choice(alpha,
286-
(n // 1)), 1)})
278+
alpha = list(map(''.join, product(ascii_letters, repeat=4)))
279+
data = np.random.choice(alpha, (n // 5, 4), replace=False)
280+
data = np.repeat(data, 5, axis=0)
281+
self.df = DataFrame(data, columns=list('abcd'))
287282
self.df['joe'] = (np.random.randn(len(self.df)) * 10).round(3)
288-
i = np.random.permutation(len(self.df))
289-
self.df = self.df.iloc[i].reset_index(drop=True)
283+
self.df = self.df.sample(frac=1).reset_index(drop=True)
290284

291285
def time_multi_columns(self):
292286
self.df.groupby(list('abcd')).max()
@@ -356,10 +350,16 @@ class GroupByMethods(object):
356350

357351
goal_time = 0.2
358352

359-
param_names = ['dtype', 'ngroups']
360-
params = [['int', 'float'], [100, 10000]]
353+
param_names = ['dtype', 'method']
354+
params = [['int', 'float'],
355+
['all', 'any', 'count', 'cumcount', 'cummax', 'cummin',
356+
'cumprod', 'cumsum', 'describe', 'first', 'head', 'last', 'mad',
357+
'max', 'min', 'median', 'mean', 'nunique', 'pct_change', 'prod',
358+
'rank', 'sem', 'shift', 'size', 'skew', 'std', 'sum', 'tail',
359+
'unique', 'value_counts', 'var']]
361360

362-
def setup(self, dtype, ngroups):
361+
def setup(self, dtype, method):
362+
ngroups = 1000
363363
size = ngroups * 2
364364
rng = np.arange(ngroups)
365365
values = rng.take(np.random.randint(0, ngroups, size=size))
@@ -369,104 +369,11 @@ def setup(self, dtype, ngroups):
369369
key = np.concatenate([np.random.random(ngroups) * 0.1,
370370
np.random.random(ngroups) * 10.0])
371371

372-
self.df = DataFrame({'values': values,
373-
'key': key})
372+
df = DataFrame({'values': values, 'key': key})
373+
self.df_groupby_method = getattr(df.groupby('key')['values'], method)
374374

375-
def time_all(self, dtype, ngroups):
376-
self.df.groupby('key')['values'].all()
377-
378-
def time_any(self, dtype, ngroups):
379-
self.df.groupby('key')['values'].any()
380-
381-
def time_count(self, dtype, ngroups):
382-
self.df.groupby('key')['values'].count()
383-
384-
def time_cumcount(self, dtype, ngroups):
385-
self.df.groupby('key')['values'].cumcount()
386-
387-
def time_cummax(self, dtype, ngroups):
388-
self.df.groupby('key')['values'].cummax()
389-
390-
def time_cummin(self, dtype, ngroups):
391-
self.df.groupby('key')['values'].cummin()
392-
393-
def time_cumprod(self, dtype, ngroups):
394-
self.df.groupby('key')['values'].cumprod()
395-
396-
def time_cumsum(self, dtype, ngroups):
397-
self.df.groupby('key')['values'].cumsum()
398-
399-
def time_describe(self, dtype, ngroups):
400-
self.df.groupby('key')['values'].describe()
401-
402-
def time_diff(self, dtype, ngroups):
403-
self.df.groupby('key')['values'].diff()
404-
405-
def time_first(self, dtype, ngroups):
406-
self.df.groupby('key')['values'].first()
407-
408-
def time_head(self, dtype, ngroups):
409-
self.df.groupby('key')['values'].head()
410-
411-
def time_last(self, dtype, ngroups):
412-
self.df.groupby('key')['values'].last()
413-
414-
def time_mad(self, dtype, ngroups):
415-
self.df.groupby('key')['values'].mad()
416-
417-
def time_max(self, dtype, ngroups):
418-
self.df.groupby('key')['values'].max()
419-
420-
def time_mean(self, dtype, ngroups):
421-
self.df.groupby('key')['values'].mean()
422-
423-
def time_median(self, dtype, ngroups):
424-
self.df.groupby('key')['values'].median()
425-
426-
def time_min(self, dtype, ngroups):
427-
self.df.groupby('key')['values'].min()
428-
429-
def time_nunique(self, dtype, ngroups):
430-
self.df.groupby('key')['values'].nunique()
431-
432-
def time_pct_change(self, dtype, ngroups):
433-
self.df.groupby('key')['values'].pct_change()
434-
435-
def time_prod(self, dtype, ngroups):
436-
self.df.groupby('key')['values'].prod()
437-
438-
def time_rank(self, dtype, ngroups):
439-
self.df.groupby('key')['values'].rank()
440-
441-
def time_sem(self, dtype, ngroups):
442-
self.df.groupby('key')['values'].sem()
443-
444-
def time_shift(self, dtype, ngroups):
445-
self.df.groupby('key')['values'].shift()
446-
447-
def time_size(self, dtype, ngroups):
448-
self.df.groupby('key')['values'].size()
449-
450-
def time_skew(self, dtype, ngroups):
451-
self.df.groupby('key')['values'].skew()
452-
453-
def time_std(self, dtype, ngroups):
454-
self.df.groupby('key')['values'].std()
455-
456-
def time_sum(self, dtype, ngroups):
457-
self.df.groupby('key')['values'].sum()
458-
459-
def time_tail(self, dtype, ngroups):
460-
self.df.groupby('key')['values'].tail()
461-
462-
def time_unique(self, dtype, ngroups):
463-
self.df.groupby('key')['values'].unique()
464-
465-
def time_value_counts(self, dtype, ngroups):
466-
self.df.groupby('key')['values'].value_counts()
467-
468-
def time_var(self, dtype, ngroups):
469-
self.df.groupby('key')['values'].var()
375+
def time_method(self, dtype, method):
376+
self.df_groupby_method()
470377

471378

472379
class Float32(object):

asv_bench/benchmarks/timestamp.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1+
import datetime
2+
13
from pandas import Timestamp
24
import pytz
3-
import datetime
45

56

67
class TimestampConstruction(object):
7-
# TODO: classmethod constructors: fromordinal, fromtimestamp...
88

99
def time_parse_iso8601_no_tz(self):
1010
Timestamp('2017-08-25 08:16:14')
@@ -21,6 +21,12 @@ def time_parse_today(self):
2121
def time_parse_now(self):
2222
Timestamp('now')
2323

24+
def time_fromordinal(self):
25+
Timestamp.fromordinal(730120)
26+
27+
def time_fromtimestamp(self):
28+
Timestamp.fromtimestamp(1515448538)
29+
2430

2531
class TimestampProperties(object):
2632
goal_time = 0.2
@@ -36,9 +42,6 @@ def setup(self, tz, freq):
3642
def time_tz(self, tz, freq):
3743
self.ts.tz
3844

39-
def time_offset(self, tz, freq):
40-
self.ts.offset
41-
4245
def time_dayofweek(self, tz, freq):
4346
self.ts.dayofweek
4447

ci/install_travis.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,6 @@ time conda create -n pandas --file=${REQ} || exit 1
101101

102102
source activate pandas
103103

104-
# https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024
105-
python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)"
106-
107104
# may have addtl installation instructions for this build
108105
echo
109106
echo "[build addtl installs]"

ci/requirements-2.7.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ python=2.7*
22
python-dateutil=2.5.0
33
pytz=2013b
44
nomkl
5-
numpy
5+
numpy=1.13*
66
cython=0.24

ci/requirements-3.6_DOC.build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
python=3.6*
22
python-dateutil
33
pytz
4-
numpy
4+
numpy=1.13*
55
cython

conda.recipe/meta.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ requirements:
1414
build:
1515
- python
1616
- cython
17-
- {{ pin_compatible('numpy') }}
17+
- {{ pin_compatible('numpy', upper_bound='1.14') }}
1818
- setuptools >=3.3
1919
- python-dateutil >=2.5.0
2020
- pytz
2121

2222
run:
2323
- python
24-
- {{ pin_compatible('numpy') }}
24+
- {{ pin_compatible('numpy', upper_bound='1.14') }}
2525
- python-dateutil >=2.5.0
2626
- pytz
2727

doc/source/contributing.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -858,9 +858,9 @@ takes a regular expression. For example, this will only run tests from a
858858
If you want to only run a specific group of tests from a file, you can do it
859859
using ``.`` as a separator. For example::
860860
861-
asv continuous -f 1.1 upstream/master HEAD -b groupby.groupby_agg_builtins
861+
asv continuous -f 1.1 upstream/master HEAD -b groupby.GroupByMethods
862862
863-
will only run the ``groupby_agg_builtins`` benchmark defined in ``groupby.py``.
863+
will only run the ``GroupByMethods`` benchmark defined in ``groupby.py``.
864864
865865
You can also run the benchmark suite using the version of ``pandas``
866866
already installed in your current Python environment. This can be

doc/source/io.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ squeeze : boolean, default ``False``
149149
prefix : str, default ``None``
150150
Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
151151
mangle_dupe_cols : boolean, default ``True``
152-
Duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X'.
152+
Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'.
153153
Passing in False will cause data to be overwritten if there are duplicate
154154
names in the columns.
155155

@@ -548,7 +548,7 @@ these names so as to prevent data overwrite:
548548
pd.read_csv(StringIO(data))
549549
550550
There is no more duplicate data because ``mangle_dupe_cols=True`` by default, which modifies
551-
a series of duplicate columns 'X'...'X' to become 'X.0'...'X.N'. If ``mangle_dupe_cols
551+
a series of duplicate columns 'X'...'X' to become 'X', 'X.1',...'X.N'. If ``mangle_dupe_cols
552552
=False``, duplicate data can arise:
553553

554554
.. code-block :: python

doc/source/whatsnew/v0.23.0.txt

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ Other Enhancements
241241
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
242242
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
243243
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
244+
- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`)
244245

245246
.. _whatsnew_0230.api_breaking:
246247

@@ -309,6 +310,8 @@ Other API Changes
309310
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
310311
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
311312
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
313+
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
314+
- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
312315

313316
.. _whatsnew_0230.deprecations:
314317

@@ -353,6 +356,9 @@ Removal of prior version deprecations/changes
353356
- The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`)
354357
- The ``Panel``class has dropped the ``to_long``and ``toLong`` methods (:issue:`19077`)
355358
- The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`)
359+
- The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attribute:`Categorical.codes` (:issue:`7768`)
360+
- The ``flavor`` parameter have been removed from func:`to_sql` method (:issue:`13611`)
361+
- The modules `pandas.tools.hashing` and `pandas.util.hashing` have been removed (:issue:`16223`)
356362

357363
.. _whatsnew_0230.performance:
358364

@@ -416,7 +422,14 @@ Conversion
416422
- Fixed bug where comparing :class:`DatetimeIndex` failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`)
417423
- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`)
418424
- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`)
419-
-
425+
- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`)
426+
- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`)
427+
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)
428+
429+
430+
431+
- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`)
432+
420433

421434
Indexing
422435
^^^^^^^^
@@ -437,8 +450,10 @@ Indexing
437450
- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`)
438451
- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`)
439452
- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`)
453+
- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`)
440454
- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`)
441455
- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`)
456+
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
442457

443458
I/O
444459
^^^
@@ -483,6 +498,8 @@ Reshaping
483498
- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`)
484499
- Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`)
485500
- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`)
501+
- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`)
502+
-
486503

487504
Numeric
488505
^^^^^^^
@@ -507,4 +524,3 @@ Other
507524
^^^^^
508525

509526
- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
510-
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)

0 commit comments

Comments
 (0)