pandas-dev
diff --git a/‎asv_bench/benchmarks/timeseries.py
Lines changed: 38 additions & 3 deletions b/‎asv_bench/benchmarks/timeseries.py
Lines changed: 38 additions & 3 deletions
diff --git a/‎doc/source/release.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/release.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.21.0.txt
Lines changed: 32 additions & 12 deletions b/‎doc/source/whatsnew/v0.21.0.txt
Lines changed: 32 additions & 12 deletions
diff --git a/‎doc/source/whatsnew/v0.22.0.txt
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v0.22.0.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/dtypes/cast.py
Lines changed: 17 additions & 17 deletions b/‎pandas/core/dtypes/cast.py
Lines changed: 17 additions & 17 deletions
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 5 additions & 5 deletions b/‎pandas/core/indexes/base.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎pandas/core/indexes/datetimes.py
Lines changed: 4 additions & 4 deletions b/‎pandas/core/indexes/datetimes.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎pandas/core/indexes/timedeltas.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/indexes/timedeltas.py
Lines changed: 1 addition & 1 deletion
@@ -346,17 +346,22 @@ class ToDatetime(object):
 
     def setup(self):
         self.rng = date_range(start='1/1/2000', periods=10000, freq='D')
-        self.stringsD = Series((((self.rng.year * 10000) + (self.rng.month * 100)) + self.rng.day), dtype=np.int64).apply(str)
+        self.stringsD = Series(self.rng.strftime('%Y%m%d'))
 
         self.rng = date_range(start='1/1/2000', periods=20000, freq='H')
-        self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng]
-        self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng]
+        self.strings = self.rng.strftime('%Y-%m-%d %H:%M:%S').tolist()
+        self.strings_nosep = self.rng.strftime('%Y%m%d %H:%M:%S').tolist()
         self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800'
                                  for x in self.rng]
 
         self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
         self.s2 = self.s.str.replace(':\\S+$', '')
 
+        self.unique_numeric_seconds = range(10000)
+        self.dup_numeric_seconds = [1000] * 10000
+        self.dup_string_dates = ['2000-02-11'] * 10000
+        self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * 10000
+
     def time_format_YYYYMMDD(self):
         to_datetime(self.stringsD, format='%Y%m%d')
 
@@ -381,6 +386,36 @@ def time_format_exact(self):
     def time_format_no_exact(self):
         to_datetime(self.s, format='%d%b%y', exact=False)
 
+    def time_cache_true_with_unique_seconds_and_unit(self):
+        to_datetime(self.unique_numeric_seconds, unit='s', cache=True)
+
+    def time_cache_false_with_unique_seconds_and_unit(self):
+        to_datetime(self.unique_numeric_seconds, unit='s', cache=False)
+
+    def time_cache_true_with_dup_seconds_and_unit(self):
+        to_datetime(self.dup_numeric_seconds, unit='s', cache=True)
+
+    def time_cache_false_with_dup_seconds_and_unit(self):
+        to_datetime(self.dup_numeric_seconds, unit='s', cache=False)
+
+    def time_cache_true_with_dup_string_dates(self):
+        to_datetime(self.dup_string_dates, cache=True)
+
+    def time_cache_false_with_dup_string_dates(self):
+        to_datetime(self.dup_string_dates, cache=False)
+
+    def time_cache_true_with_dup_string_dates_and_format(self):
+        to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=True)
+
+    def time_cache_false_with_dup_string_dates_and_format(self):
+        to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=False)
+
+    def time_cache_true_with_dup_string_tzoffset_dates(self):
+        to_datetime(self.dup_string_with_tz, cache=True)
+
+    def time_cache_false_with_dup_string_tzoffset_dates(self):
+        to_datetime(self.dup_string_with_tz, cache=False)
+
 
 class Offsets(object):
     goal_time = 0.2
 
@@ -52,7 +52,7 @@ Highlights include:
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here <whatsnew_0210.enhancements.parquet>`.
 - New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
   categoricals independent of the data, see :ref:`here <whatsnew_0210.enhancements.categorical_dtype>`.
-- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
+- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
 - Compatibility fixes for pypy, see :ref:`here <whatsnew_0210.pypy>`.
 - Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here <whatsnew_0210.enhancements.drop_api>`.
 - Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here <whatsnew_0210.enhancements.infer_objects>`) and ``GroupBy.pipe`` (see :ref:`here <whatsnew_0210.enhancements.GroupBy_pipe>`).
 
@@ -12,7 +12,7 @@ Highlights include:
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here <whatsnew_0210.enhancements.parquet>`.
 - New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
   categoricals independent of the data, see :ref:`here <whatsnew_0210.enhancements.categorical_dtype>`.
-- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
+- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here <whatsnew_0210.api_breaking.bottleneck>`.
 - Compatibility fixes for pypy, see :ref:`here <whatsnew_0210.pypy>`.
 - Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here <whatsnew_0210.enhancements.drop_api>`.
 - Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here <whatsnew_0210.enhancements.infer_objects>`) and ``GroupBy.pipe`` (see :ref:`here <whatsnew_0210.enhancements.GroupBy_pipe>`).
@@ -369,47 +369,47 @@ Additionally, support has been dropped for Python 3.4 (:issue:`15251`).
 
 .. _whatsnew_0210.api_breaking.bottleneck:
 
-Sum/Prod of all-NaN Series/DataFrames is now consistently NaN
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Sum/Prod of all-NaN or empty Series/DataFrames is now consistently NaN
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames no longer depends on
-whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed. (:issue:`9422`, :issue:`15507`).
+whether `bottleneck <http://berkeleyanalytics.com/bottleneck>`__ is installed, and return value of ``sum`` and ``prod`` on an empty Series has changed (:issue:`9422`, :issue:`15507`).
 
 Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, will result in ``NaN``. See the :ref:`docs <missing_data.numeric_sum>`.
 
 .. ipython:: python
 
    s = Series([np.nan])
 
-Previously NO ``bottleneck``
+Previously WITHOUT ``bottleneck`` installed:
 
 .. code-block:: ipython
 
    In [2]: s.sum()
    Out[2]: np.nan
 
-Previously WITH ``bottleneck``
+Previously WITH ``bottleneck``:
 
 .. code-block:: ipython
 
    In [2]: s.sum()
    Out[2]: 0.0
 
-New Behavior, without regard to the bottleneck installation.
+New Behavior, without regard to the bottleneck installation:
 
 .. ipython:: python
 
    s.sum()
 
-Note that this also changes the sum of an empty ``Series``
-
-Previously regardless of ``bottlenck``
+Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottlenck`` installation:
 
 .. code-block:: ipython
 
    In [1]: pd.Series([]).sum()
    Out[1]: 0
 
+but for consistency with the all-NaN case, this was changed to return NaN as well:
+
 .. ipython:: python
 
    pd.Series([]).sum()
@@ -877,6 +877,28 @@ New Behavior:
 
    pd.interval_range(start=0, end=4)
 
+.. _whatsnew_0210.api.mpl_converters:
+
+No Automatic Matplotlib Converters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Pandas no longer registers our ``date``, ``time``, ``datetime``,
+``datetime64``, and ``Period`` converters with matplotlib when pandas is
+imported. Matplotlib plot methods (``plt.plot``, ``ax.plot``, ...), will not
+nicely format the x-axis for ``DatetimeIndex`` or ``PeriodIndex`` values. You
+must explicitly register these methods:
+
+.. ipython:: python
+
+   from pandas.tseries import converter
+   converter.register()
+
+   fig, ax = plt.subplots()
+   plt.plot(pd.date_range('2017', periods=6), range(6))
+
+Pandas built-in ``Series.plot`` and ``DataFrame.plot`` *will* register these
+converters on first-use (:issue:17710).
+
 .. _whatsnew_0210.api:
 
 Other API Changes
@@ -900,8 +922,6 @@ Other API Changes
 - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`)
 - Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`)
 - Restricted DateOffset keyword arguments.  Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior.  Now, only valid arguments will be accepted. (:issue:`17176`).
-- Pandas no longer registers matplotlib converters on import. The converters
-  will be registered and used when the first plot is draw (:issue:`17710`)
 
 .. _whatsnew_0210.deprecations:
 
 
@@ -71,7 +71,7 @@ Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 - Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`)
--
+- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
 -
 
 .. _whatsnew_0220.docs:
 
@@ -136,7 +136,7 @@ def trans(x):  # noqa
                 try:
                     if np.allclose(new_result, result, rtol=0):
                         return new_result
-                except:
+                except Exception:
 
                     # comparison of an object dtype with a number type could
                     # hit here
@@ -151,14 +151,14 @@ def trans(x):  # noqa
         elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i', 'f']:
             try:
                 result = result.astype(dtype)
-            except:
+            except Exception:
                 if dtype.tz:
                     # convert to datetime and change timezone
                     from pandas import to_datetime
                     result = to_datetime(result).tz_localize('utc')
                     result = result.tz_convert(dtype.tz)
 
-    except:
+    except Exception:
         pass
 
     return result
@@ -210,7 +210,7 @@ def changeit():
                     new_result[mask] = om_at
                     result[:] = new_result
                     return result, False
-            except:
+            except Exception:
                 pass
 
             # we are forced to change the dtype of the result as the input
@@ -243,7 +243,7 @@ def changeit():
 
         try:
             np.place(result, mask, other)
-        except:
+        except Exception:
             return changeit()
 
     return result, False
@@ -274,14 +274,14 @@ def maybe_promote(dtype, fill_value=np.nan):
             if issubclass(dtype.type, np.datetime64):
                 try:
                     fill_value = tslib.Timestamp(fill_value).value
-                except:
+                except Exception:
                     # the proper thing to do here would probably be to upcast
                     # to object (but numpy 1.6.1 doesn't do this properly)
                     fill_value = iNaT
             elif issubclass(dtype.type, np.timedelta64):
                 try:
                     fill_value = lib.Timedelta(fill_value).value
-                except:
+                except Exception:
                     # as for datetimes, cannot upcast to object
                     fill_value = iNaT
             else:
@@ -592,12 +592,12 @@ def maybe_convert_scalar(values):
 
 def coerce_indexer_dtype(indexer, categories):
     """ coerce the indexer input array to the smallest dtype possible """
-    l = len(categories)
-    if l < _int8_max:
+    length = len(categories)
+    if length < _int8_max:
         return _ensure_int8(indexer)
-    elif l < _int16_max:
+    elif length < _int16_max:
         return _ensure_int16(indexer)
-    elif l < _int32_max:
+    elif length < _int32_max:
         return _ensure_int32(indexer)
     return _ensure_int64(indexer)
 
@@ -629,7 +629,7 @@ def conv(r, dtype):
                 r = float(r)
             elif dtype.kind == 'i':
                 r = int(r)
-        except:
+        except Exception:
             pass
 
         return r
@@ -756,7 +756,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True,
                 if not isna(new_values).all():
                     values = new_values
 
-            except:
+            except Exception:
                 pass
         else:
             # soft-conversion
@@ -817,7 +817,7 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
             # If all NaNs, then do not-alter
             values = converted if not isna(converted).all() else values
             values = values.copy() if copy else values
-        except:
+        except Exception:
             pass
 
     return values
@@ -888,10 +888,10 @@ def try_datetime(v):
             try:
                 from pandas import to_datetime
                 return to_datetime(v)
-            except:
+            except Exception:
                 pass
 
-        except:
+        except Exception:
             pass
 
         return v.reshape(shape)
@@ -903,7 +903,7 @@ def try_timedelta(v):
         from pandas import to_timedelta
         try:
             return to_timedelta(v)._values.reshape(shape)
-        except:
+        except Exception:
             return v.reshape(shape)
 
     inferred_type = lib.infer_datetimelike_array(_ensure_object(v))
 
@@ -2032,7 +2032,7 @@ def equals(self, other):
         try:
             return array_equivalent(_values_from_object(self),
                                     _values_from_object(other))
-        except:
+        except Exception:
             return False
 
     def identical(self, other):
@@ -2315,7 +2315,7 @@ def intersection(self, other):
         try:
             indexer = Index(other._values).get_indexer(self._values)
             indexer = indexer.take((indexer != -1).nonzero()[0])
-        except:
+        except Exception:
             # duplicates
             indexer = algos.unique1d(
                 Index(other._values).get_indexer_non_unique(self._values)[0])
@@ -3022,13 +3022,13 @@ def _reindex_non_unique(self, target):
         new_indexer = None
 
         if len(missing):
-            l = np.arange(len(indexer))
+            length = np.arange(len(indexer))
 
             missing = _ensure_platform_int(missing)
             missing_labels = target.take(missing)
-            missing_indexer = _ensure_int64(l[~check])
+            missing_indexer = _ensure_int64(length[~check])
             cur_labels = self.take(indexer[check]).values
-            cur_indexer = _ensure_int64(l[check])
+            cur_indexer = _ensure_int64(length[check])
 
             new_labels = np.empty(tuple([len(indexer)]), dtype=object)
             new_labels[cur_indexer] = cur_labels
 
@@ -449,7 +449,7 @@ def _generate(cls, start, end, periods, name, offset,
 
         try:
             inferred_tz = timezones.infer_tzinfo(start, end)
-        except:
+        except Exception:
             raise TypeError('Start and end cannot both be tz-aware with '
                             'different timezones')
 
@@ -1176,12 +1176,12 @@ def __iter__(self):
 
         # convert in chunks of 10k for efficiency
         data = self.asi8
-        l = len(self)
+        length = len(self)
         chunksize = 10000
-        chunks = int(l / chunksize) + 1
+        chunks = int(length / chunksize) + 1
         for i in range(chunks):
             start_i = i * chunksize
-            end_i = min((i + 1) * chunksize, l)
+            end_i = min((i + 1) * chunksize, length)
             converted = libts.ints_to_pydatetime(data[start_i:end_i],
                                                  tz=self.tz, freq=self.freq,
                                                  box=True)
 
@@ -841,7 +841,7 @@ def insert(self, loc, item):
         if _is_convertible_to_td(item):
             try:
                 item = Timedelta(item)
-            except:
+            except Exception:
                 pass
 
         freq = None
Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,7 @@ Performance Improvements`
`71`	`71`	`~~~~~~~~~~~~~~~~~~~~~~~~`
`72`	`72`
`73`	`73`	- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`)
`74`		`--`
	`74`	+- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
`75`	`75`	`-`
`76`	`76`
`77`	`77`	`.. _whatsnew_0220.docs:`