Skip to content

Commit d1a743b

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into tslibs-offsets-inits
2 parents 82b3109 + 96a5274 commit d1a743b

File tree

14 files changed

+529
-250
lines changed

14 files changed

+529
-250
lines changed

asv_bench/benchmarks/timeseries.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,17 +346,22 @@ class ToDatetime(object):
346346

347347
def setup(self):
348348
self.rng = date_range(start='1/1/2000', periods=10000, freq='D')
349-
self.stringsD = Series((((self.rng.year * 10000) + (self.rng.month * 100)) + self.rng.day), dtype=np.int64).apply(str)
349+
self.stringsD = Series(self.rng.strftime('%Y%m%d'))
350350

351351
self.rng = date_range(start='1/1/2000', periods=20000, freq='H')
352-
self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng]
353-
self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng]
352+
self.strings = self.rng.strftime('%Y-%m-%d %H:%M:%S').tolist()
353+
self.strings_nosep = self.rng.strftime('%Y%m%d %H:%M:%S').tolist()
354354
self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800'
355355
for x in self.rng]
356356

357357
self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
358358
self.s2 = self.s.str.replace(':\\S+$', '')
359359

360+
self.unique_numeric_seconds = range(10000)
361+
self.dup_numeric_seconds = [1000] * 10000
362+
self.dup_string_dates = ['2000-02-11'] * 10000
363+
self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * 10000
364+
360365
def time_format_YYYYMMDD(self):
361366
to_datetime(self.stringsD, format='%Y%m%d')
362367

@@ -381,6 +386,36 @@ def time_format_exact(self):
381386
def time_format_no_exact(self):
382387
to_datetime(self.s, format='%d%b%y', exact=False)
383388

389+
def time_cache_true_with_unique_seconds_and_unit(self):
390+
to_datetime(self.unique_numeric_seconds, unit='s', cache=True)
391+
392+
def time_cache_false_with_unique_seconds_and_unit(self):
393+
to_datetime(self.unique_numeric_seconds, unit='s', cache=False)
394+
395+
def time_cache_true_with_dup_seconds_and_unit(self):
396+
to_datetime(self.dup_numeric_seconds, unit='s', cache=True)
397+
398+
def time_cache_false_with_dup_seconds_and_unit(self):
399+
to_datetime(self.dup_numeric_seconds, unit='s', cache=False)
400+
401+
def time_cache_true_with_dup_string_dates(self):
402+
to_datetime(self.dup_string_dates, cache=True)
403+
404+
def time_cache_false_with_dup_string_dates(self):
405+
to_datetime(self.dup_string_dates, cache=False)
406+
407+
def time_cache_true_with_dup_string_dates_and_format(self):
408+
to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=True)
409+
410+
def time_cache_false_with_dup_string_dates_and_format(self):
411+
to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=False)
412+
413+
def time_cache_true_with_dup_string_tzoffset_dates(self):
414+
to_datetime(self.dup_string_with_tz, cache=True)
415+
416+
def time_cache_false_with_dup_string_tzoffset_dates(self):
417+
to_datetime(self.dup_string_with_tz, cache=False)
418+
384419

385420
class Offsets(object):
386421
goal_time = 0.2

doc/source/release.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Highlights include:
5252
- Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here <whatsnew_0210.enhancements.parquet>`.
5353
- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
5454
categoricals independent of the data, see :ref:`here <whatsnew_0210.enhancements.categorical_dtype>`.
55-
- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
55+
- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
5656
- Compatibility fixes for pypy, see :ref:`here <whatsnew_0210.pypy>`.
5757
- Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here <whatsnew_0210.enhancements.drop_api>`.
5858
- Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here <whatsnew_0210.enhancements.infer_objects>`) and ``GroupBy.pipe`` (see :ref:`here <whatsnew_0210.enhancements.GroupBy_pipe>`).

doc/source/whatsnew/v0.21.0.txt

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Highlights include:
1212
- Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here <whatsnew_0210.enhancements.parquet>`.
1313
- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
1414
categoricals independent of the data, see :ref:`here <whatsnew_0210.enhancements.categorical_dtype>`.
15-
- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
15+
- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
1616
- Compatibility fixes for pypy, see :ref:`here <whatsnew_0210.pypy>`.
1717
- Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here <whatsnew_0210.enhancements.drop_api>`.
1818
- Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here <whatsnew_0210.enhancements.infer_objects>`) and ``GroupBy.pipe`` (see :ref:`here <whatsnew_0210.enhancements.GroupBy_pipe>`).
@@ -369,47 +369,47 @@ Additionally, support has been dropped for Python 3.4 (:issue:`15251`).
369369

370370
.. _whatsnew_0210.api_breaking.bottleneck:
371371

372-
Sum/Prod of all-NaN Series/DataFrames is now consistently NaN
373-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
372+
Sum/Prod of all-NaN or empty Series/DataFrames is now consistently NaN
373+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
374374

375375
The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames no longer depends on
376-
whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed. (:issue:`9422`, :issue:`15507`).
376+
whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and return value of ``sum`` and ``prod`` on an empty Series has changed (:issue:`9422`, :issue:`15507`).
377377

378378
Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, will result in ``NaN``. See the :ref:`docs <missing_data.numeric_sum>`.
379379

380380
.. ipython:: python
381381

382382
s = Series([np.nan])
383383

384-
Previously NO ``bottleneck``
384+
Previously WITHOUT ``bottleneck`` installed:
385385

386386
.. code-block:: ipython
387387

388388
In [2]: s.sum()
389389
Out[2]: np.nan
390390

391-
Previously WITH ``bottleneck``
391+
Previously WITH ``bottleneck``:
392392

393393
.. code-block:: ipython
394394

395395
In [2]: s.sum()
396396
Out[2]: 0.0
397397

398-
New Behavior, without regard to the bottleneck installation.
398+
New Behavior, without regard to the bottleneck installation:
399399

400400
.. ipython:: python
401401

402402
s.sum()
403403

404-
Note that this also changes the sum of an empty ``Series``
405-
406-
Previously regardless of ``bottlenck``
404+
Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottlenck`` installation:
407405

408406
.. code-block:: ipython
409407

410408
In [1]: pd.Series([]).sum()
411409
Out[1]: 0
412410

411+
but for consistency with the all-NaN case, this was changed to return NaN as well:
412+
413413
.. ipython:: python
414414

415415
pd.Series([]).sum()
@@ -877,6 +877,28 @@ New Behavior:
877877

878878
pd.interval_range(start=0, end=4)
879879

880+
.. _whatsnew_0210.api.mpl_converters:
881+
882+
No Automatic Matplotlib Converters
883+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
884+
885+
Pandas no longer registers our ``date``, ``time``, ``datetime``,
886+
``datetime64``, and ``Period`` converters with matplotlib when pandas is
887+
imported. Matplotlib plot methods (``plt.plot``, ``ax.plot``, ...), will not
888+
nicely format the x-axis for ``DatetimeIndex`` or ``PeriodIndex`` values. You
889+
must explicitly register these methods:
890+
891+
.. ipython:: python
892+
893+
from pandas.tseries import converter
894+
converter.register()
895+
896+
fig, ax = plt.subplots()
897+
plt.plot(pd.date_range('2017', periods=6), range(6))
898+
899+
Pandas built-in ``Series.plot`` and ``DataFrame.plot`` *will* register these
900+
converters on first-use (:issue:17710).
901+
880902
.. _whatsnew_0210.api:
881903

882904
Other API Changes
@@ -900,8 +922,6 @@ Other API Changes
900922
- Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`)
901923
- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`)
902924
- Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`).
903-
- Pandas no longer registers matplotlib converters on import. The converters
904-
will be registered and used when the first plot is draw (:issue:`17710`)
905925

906926
.. _whatsnew_0210.deprecations:
907927

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ Performance Improvements
7171
~~~~~~~~~~~~~~~~~~~~~~~~
7272

7373
- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`)
74-
-
74+
- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
7575
-
7676

7777
.. _whatsnew_0220.docs:

pandas/core/dtypes/cast.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def trans(x): # noqa
136136
try:
137137
if np.allclose(new_result, result, rtol=0):
138138
return new_result
139-
except:
139+
except Exception:
140140

141141
# comparison of an object dtype with a number type could
142142
# hit here
@@ -151,14 +151,14 @@ def trans(x): # noqa
151151
elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i', 'f']:
152152
try:
153153
result = result.astype(dtype)
154-
except:
154+
except Exception:
155155
if dtype.tz:
156156
# convert to datetime and change timezone
157157
from pandas import to_datetime
158158
result = to_datetime(result).tz_localize('utc')
159159
result = result.tz_convert(dtype.tz)
160160

161-
except:
161+
except Exception:
162162
pass
163163

164164
return result
@@ -210,7 +210,7 @@ def changeit():
210210
new_result[mask] = om_at
211211
result[:] = new_result
212212
return result, False
213-
except:
213+
except Exception:
214214
pass
215215

216216
# we are forced to change the dtype of the result as the input
@@ -243,7 +243,7 @@ def changeit():
243243

244244
try:
245245
np.place(result, mask, other)
246-
except:
246+
except Exception:
247247
return changeit()
248248

249249
return result, False
@@ -274,14 +274,14 @@ def maybe_promote(dtype, fill_value=np.nan):
274274
if issubclass(dtype.type, np.datetime64):
275275
try:
276276
fill_value = tslib.Timestamp(fill_value).value
277-
except:
277+
except Exception:
278278
# the proper thing to do here would probably be to upcast
279279
# to object (but numpy 1.6.1 doesn't do this properly)
280280
fill_value = iNaT
281281
elif issubclass(dtype.type, np.timedelta64):
282282
try:
283283
fill_value = lib.Timedelta(fill_value).value
284-
except:
284+
except Exception:
285285
# as for datetimes, cannot upcast to object
286286
fill_value = iNaT
287287
else:
@@ -592,12 +592,12 @@ def maybe_convert_scalar(values):
592592

593593
def coerce_indexer_dtype(indexer, categories):
594594
""" coerce the indexer input array to the smallest dtype possible """
595-
l = len(categories)
596-
if l < _int8_max:
595+
length = len(categories)
596+
if length < _int8_max:
597597
return _ensure_int8(indexer)
598-
elif l < _int16_max:
598+
elif length < _int16_max:
599599
return _ensure_int16(indexer)
600-
elif l < _int32_max:
600+
elif length < _int32_max:
601601
return _ensure_int32(indexer)
602602
return _ensure_int64(indexer)
603603

@@ -629,7 +629,7 @@ def conv(r, dtype):
629629
r = float(r)
630630
elif dtype.kind == 'i':
631631
r = int(r)
632-
except:
632+
except Exception:
633633
pass
634634

635635
return r
@@ -756,7 +756,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True,
756756
if not isna(new_values).all():
757757
values = new_values
758758

759-
except:
759+
except Exception:
760760
pass
761761
else:
762762
# soft-conversion
@@ -817,7 +817,7 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
817817
# If all NaNs, then do not-alter
818818
values = converted if not isna(converted).all() else values
819819
values = values.copy() if copy else values
820-
except:
820+
except Exception:
821821
pass
822822

823823
return values
@@ -888,10 +888,10 @@ def try_datetime(v):
888888
try:
889889
from pandas import to_datetime
890890
return to_datetime(v)
891-
except:
891+
except Exception:
892892
pass
893893

894-
except:
894+
except Exception:
895895
pass
896896

897897
return v.reshape(shape)
@@ -903,7 +903,7 @@ def try_timedelta(v):
903903
from pandas import to_timedelta
904904
try:
905905
return to_timedelta(v)._values.reshape(shape)
906-
except:
906+
except Exception:
907907
return v.reshape(shape)
908908

909909
inferred_type = lib.infer_datetimelike_array(_ensure_object(v))

pandas/core/indexes/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,7 +2032,7 @@ def equals(self, other):
20322032
try:
20332033
return array_equivalent(_values_from_object(self),
20342034
_values_from_object(other))
2035-
except:
2035+
except Exception:
20362036
return False
20372037

20382038
def identical(self, other):
@@ -2315,7 +2315,7 @@ def intersection(self, other):
23152315
try:
23162316
indexer = Index(other._values).get_indexer(self._values)
23172317
indexer = indexer.take((indexer != -1).nonzero()[0])
2318-
except:
2318+
except Exception:
23192319
# duplicates
23202320
indexer = algos.unique1d(
23212321
Index(other._values).get_indexer_non_unique(self._values)[0])
@@ -3022,13 +3022,13 @@ def _reindex_non_unique(self, target):
30223022
new_indexer = None
30233023

30243024
if len(missing):
3025-
l = np.arange(len(indexer))
3025+
length = np.arange(len(indexer))
30263026

30273027
missing = _ensure_platform_int(missing)
30283028
missing_labels = target.take(missing)
3029-
missing_indexer = _ensure_int64(l[~check])
3029+
missing_indexer = _ensure_int64(length[~check])
30303030
cur_labels = self.take(indexer[check]).values
3031-
cur_indexer = _ensure_int64(l[check])
3031+
cur_indexer = _ensure_int64(length[check])
30323032

30333033
new_labels = np.empty(tuple([len(indexer)]), dtype=object)
30343034
new_labels[cur_indexer] = cur_labels

pandas/core/indexes/datetimes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ def _generate(cls, start, end, periods, name, offset,
449449

450450
try:
451451
inferred_tz = timezones.infer_tzinfo(start, end)
452-
except:
452+
except Exception:
453453
raise TypeError('Start and end cannot both be tz-aware with '
454454
'different timezones')
455455

@@ -1176,12 +1176,12 @@ def __iter__(self):
11761176

11771177
# convert in chunks of 10k for efficiency
11781178
data = self.asi8
1179-
l = len(self)
1179+
length = len(self)
11801180
chunksize = 10000
1181-
chunks = int(l / chunksize) + 1
1181+
chunks = int(length / chunksize) + 1
11821182
for i in range(chunks):
11831183
start_i = i * chunksize
1184-
end_i = min((i + 1) * chunksize, l)
1184+
end_i = min((i + 1) * chunksize, length)
11851185
converted = libts.ints_to_pydatetime(data[start_i:end_i],
11861186
tz=self.tz, freq=self.freq,
11871187
box=True)

pandas/core/indexes/timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ def insert(self, loc, item):
841841
if _is_convertible_to_td(item):
842842
try:
843843
item = Timedelta(item)
844-
except:
844+
except Exception:
845845
pass
846846

847847
freq = None

0 commit comments

Comments
 (0)