Merge remote-tracking branch 'upstream/master' into depr-sparse-depr

TomAugspurger · TomAugspurger · commit b2aef9515c4f · 2019-05-16T11:27:26.000-05:00
diff --git a/.travis.yml b/.travis.yml
@@ -86,6 +86,14 @@ install:
   - ci/submit_cython_cache.sh
   - echo "install done"
 
+before_script:
+  # display server (for clipboard functionality) needs to be started here,
+  # does not work if done in install:setup_env.sh (GH-26103)
+  - export DISPLAY=":99.0"
+  - echo "sh -e /etc/init.d/xvfb start"
+  - sh -e /etc/init.d/xvfb start
+  - sleep 3
+
 script:
   - echo "script start"
   - source activate pandas-dev
diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml
@@ -19,10 +19,13 @@ jobs:
   steps:
     - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
       displayName: Add conda to PATH
-    - script: conda env create --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
+    - script: conda update -q -n base conda
+      displayName: Update conda
+    - script: conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
       displayName: Create anaconda environment
     - script: |
         call activate pandas-dev
+        call conda list
         ci\\incremental\\build.cmd
       displayName: 'Build'
     - script: |
diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml
@@ -1,9 +1,11 @@
 name: pandas-dev
 channels:
   - defaults
+  - conda-forge
 dependencies:
   - beautifulsoup4
   - bottleneck
+  - gcsfs
   - html5lib
   - jinja2
   - lxml
diff --git a/ci/setup_env.sh b/ci/setup_env.sh
@@ -118,16 +118,10 @@ echo "conda list"
 conda list
 
 # Install DB for Linux
-export DISPLAY=":99."
 if [ ${TRAVIS_OS_NAME} == "linux" ]; then
   echo "installing dbs"
   mysql -e 'create database pandas_nosetest;'
   psql -c 'create database pandas_nosetest;' -U postgres
-
-  echo
-  echo "sh -e /etc/init.d/xvfb start"
-  sh -e /etc/init.d/xvfb start
-  sleep 3
 else
    echo "not using dbs on non-linux"
 fi
diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
@@ -15,8 +15,7 @@ Sparse data structures
 Pandas provides data structures for efficiently storing sparse data.
 These are not necessarily sparse in the typical "mostly 0". Rather, you can view these
 objects as being "compressed" where any data matching a specific value (``NaN`` / missing value, though any value
-can be chosen, including 0) is omitted. A special ``SparseIndex`` object tracks where data has been
-"sparsified". For example,
+can be chosen, including 0) is omitted. The compressed values are not actually stored in the array.
 
 .. ipython:: python
 
@@ -121,21 +120,13 @@ class itself for creating a Series with sparse data from a scipy COO matrix with
 A ``.sparse`` accessor has been added for :class:`DataFrame` as well.
 See :ref:`api.dataframe.sparse` for more.
 
-SparseIndex objects
--------------------
-
-Two kinds of ``SparseIndex`` are implemented, ``block`` and ``integer``. We
-recommend using ``block`` as it's more memory efficient. The ``integer`` format
-keeps an arrays of all of the locations where the data are not equal to the
-fill value. The ``block`` format tracks only the locations and sizes of blocks
-of data.
-
 .. _sparse.calculation:
 
 Sparse Calculation
 ------------------
 
-You can apply NumPy *ufuncs* to ``SparseArray`` and get a ``SparseArray`` as a result.
+You can apply NumPy `ufuncs <https://docs.scipy.org/doc/numpy/reference/ufuncs.html>`_
+to ``SparseArray`` and get a ``SparseArray`` as a result.
 
 .. ipython:: python
 
@@ -165,21 +156,14 @@ sparse values instead.
 **There's no performance or memory penalty to using a Series or DataFrame with sparse values,
 rather than a SparseSeries or SparseDataFrame**.
 
-This section provides some guidance on migrating your code to the new style. As a reminder, you can
-use the python warnings module to control warnings. If you wish to ignore the warnings,
-
-.. code-block:: python
-
-   >>> import warnings
+This section provides some guidance on migrating your code to the new style. As a reminder,
+you can use the python warnings module to control warnings. But we recommend modifying
+your code, rather than ignoring the warning.
 
-   >>> warnings.filterwarnings('ignore', 'Sparse', FutureWarning)
-   >>> pd.SparseSeries()  # No warning message
-   Series([], dtype: Sparse[float64, nan])
-   BlockIndex
-   Block locations: array([], dtype=int32)
-   Block lengths: array([], dtype=int32)
+**General Differences**
 
-But we recommend modifying your code, rather than ignoring the warning.
+In a SparseDataFrame, *all* columns were sparse. A :class:`DataFrame` can have a mixture of
+sparse and dense columns.
 
 **Construction**
 
@@ -188,7 +172,7 @@ From an array-like, use the regular :class:`Series` or
 
 .. code-block:: python
 
-   # Old way
+   # Previous way
    >>> pd.SparseDataFrame({"A": [0, 1]})
 
 .. ipython:: python
@@ -200,7 +184,7 @@ From a SciPy sparse matrix, use :meth:`DataFrame.sparse.from_spmatrix`,
 
 .. code-block:: python
 
-   # Old way
+   # Previous way
    >>> from scipy import sparse
    >>> mat = sparse.eye(3)
    >>> df = pd.SparseDataFrame(mat, columns=['A', 'B', 'C'])
diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst
@@ -238,14 +238,9 @@ Enhancements
 
     - support ``read_hdf/to_hdf`` API similar to ``read_csv/to_csv``
 
-      .. ipython:: python
-          :suppress:
-
-          from pandas.compat import lrange
-
       .. ipython:: python
 
-          df = pd.DataFrame({'A': lrange(5), 'B': lrange(5)})
+          df = pd.DataFrame({'A': range(5), 'B': range(5)})
           df.to_hdf('store.h5', 'table', append=True)
           pd.read_hdf('store.h5', 'table', where=['index > 2'])
 
diff --git a/doc/source/whatsnew/v0.12.0.rst b/doc/source/whatsnew/v0.12.0.rst
@@ -83,13 +83,8 @@ API changes
     ``iloc`` API to be *purely* positional based.
 
     .. ipython:: python
-       :suppress:
 
-       from pandas.compat import lrange
-
-    .. ipython:: python
-
-       df = pd.DataFrame(lrange(5), list('ABCDE'), columns=['a'])
+       df = pd.DataFrame(range(5), index=list('ABCDE'), columns=['a'])
        mask = (df.a % 2 == 0)
        mask
 
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -255,9 +255,35 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 
+Sparse Subclasses
+^^^^^^^^^^^^^^^^^
+
+The ``SparseSeries`` and ``SparseDataFrame`` subclasses are deprecated. Their functionality is better-provided
+by a ``Series`` or ``DataFrame`` with sparse values.
+
+**Previous Way**
+
+.. ipython:: python
+   :okwarning:
+
+   df = pd.SparseDataFrame({"A": [0, 0, 1, 2]})
+   df.dtypes
+
+**New Way**
+
+.. ipython:: python
+
+   df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])})
+   df.dtypes
+
+The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`).
+
+Other Deprecations
+^^^^^^^^^^^^^^^^^^
+
 - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
 - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
-- The ``SparseSeries`` and ``SparseDataFrame`` subclasses are deprecated. Use a ``DataFrame`` or ``Series`` with sparse values instead. See :ref:`sparse.migration` for more (:issue:`19239`).
+- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version.
 
 .. _whatsnew_0250.prior_deprecations:
 
@@ -374,7 +400,7 @@ Indexing
 - Improved exception message when calling :meth:`DataFrame.iloc` with a list of non-numeric objects (:issue:`25753`).
 - Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` where ``KeyError`` was not raised for a ``MultiIndex`` when the key was less than or equal to the number of levels in the :class:`MultiIndex` (:issue:`14885`).
 - Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`).
--
+- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
 
 
 Missing
diff --git a/mypy.ini b/mypy.ini
@@ -23,15 +23,9 @@ ignore_errors=True
 [mypy-pandas.core.internals.blocks]
 ignore_errors=True
 
-[mypy-pandas.core.ops]
-ignore_errors=True
-
 [mypy-pandas.core.panel]
 ignore_errors=True
 
-[mypy-pandas.core.resample]
-ignore_errors=True
-
 [mypy-pandas.core.reshape.merge]
 ignore_errors=True
 
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -2208,10 +2208,6 @@ cdef class _Period:
     def now(cls, freq=None):
         return Period(datetime.now(), freq=freq)
 
-    # HACK IT UP AND YOU BETTER FIX IT SOON
-    def __str__(self):
-        return self.__unicode__()
-
     @property
     def freqstr(self):
         return self.freq.freqstr
@@ -2221,9 +2217,9 @@ cdef class _Period:
         formatted = period_format(self.ordinal, base)
         return "Period('%s', '%s')" % (formatted, self.freqstr)
 
-    def __unicode__(self):
+    def __str__(self):
         """
-        Return a unicode string representation for a particular DataFrame
+        Return a string representation for a particular DataFrame
         """
         base, mult = get_freq_code(self.freq)
         formatted = period_format(self.ordinal, base)
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -126,14 +126,11 @@ class PandasExtensionDtype(ExtensionDtype):
     isnative = 0
     _cache = {}  # type: Dict[str_type, 'PandasExtensionDtype']
 
-    def __unicode__(self):
-        return self.name
-
     def __str__(self):
         """
         Return a string representation for a particular Object
         """
-        return self.__unicode__()
+        return self.name
 
     def __bytes__(self):
         """
@@ -142,7 +139,7 @@ def __bytes__(self):
         from pandas._config import get_option
 
         encoding = get_option("display.encoding")
-        return self.__unicode__().encode(encoding, 'replace')
+        return str(self).encode(encoding, 'replace')
 
     def __repr__(self):
         """
@@ -707,7 +704,7 @@ def construct_from_string(cls, string):
 
         raise TypeError("Could not construct DatetimeTZDtype")
 
-    def __unicode__(self):
+    def __str__(self):
         return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz)
 
     @property
@@ -837,12 +834,12 @@ def construct_from_string(cls, string):
                 pass
         raise TypeError("could not construct PeriodDtype")
 
-    def __unicode__(self):
-        return str(self.name)
+    def __str__(self):
+        return self.name
 
     @property
     def name(self):
-        return str("period[{freq}]".format(freq=self.freq.freqstr))
+        return "period[{freq}]".format(freq=self.freq.freqstr)
 
     @property
     def na_value(self):
@@ -1007,7 +1004,7 @@ def construct_from_string(cls, string):
     def type(self):
         return Interval
 
-    def __unicode__(self):
+    def __str__(self):
         if self.subtype is None:
             return "interval"
         return "interval[{subtype}]".format(subtype=self.subtype)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10177,11 +10177,14 @@ def mad(self, axis=None, skipna=None, level=None):
             nanops.nanstd)
 
         @Substitution(desc="Return the compound percentage of the values for "
-                      "the requested axis.", name1=name, name2=name2,
-                      axis_descr=axis_descr,
+                      "the requested axis.\n\n.. deprecated:: 0.25.0",
+                      name1=name, name2=name2, axis_descr=axis_descr,
                       min_count='', see_also='', examples='')
         @Appender(_num_doc)
         def compound(self, axis=None, skipna=None, level=None):
+            msg = ("The 'compound' method is deprecated and will be"
+                   "removed in a future version.")
+            warnings.warn(msg, FutureWarning, stacklevel=2)
             if skipna is None:
                 skipna = True
             return (1 + self).prod(axis=axis, skipna=skipna, level=level) - 1
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -3,6 +3,7 @@
 split-apply-combine paradigm.
 """
 
+from typing import Tuple
 import warnings
 
 import numpy as np
@@ -84,7 +85,8 @@ class Grouper:
 
     >>> df.groupby(Grouper(level='date', freq='60s', axis=1))
     """
-    _attributes = ('key', 'level', 'freq', 'axis', 'sort')
+    _attributes = ('key', 'level', 'freq', 'axis',
+                   'sort')  # type: Tuple[str, ...]
 
     def __new__(cls, *args, **kwargs):
         if kwargs.get('freq') is not None:
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -946,7 +946,9 @@ def _format_native_types(self, na_rep='nan', **kwargs):
             new_codes.append(level_codes)
 
         if len(new_levels) == 1:
-            return Index(new_levels[0])._format_native_types()
+            # a single-level multi-index
+            return Index(new_levels[0].take(
+                new_codes[0]))._format_native_types()
         else:
             # reconstruct the multi-index
             mi = MultiIndex(levels=new_levels, codes=new_codes,
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -6,6 +6,7 @@
 import datetime
 import operator
 import textwrap
+from typing import Dict, Optional
 import warnings
 
 import numpy as np
@@ -625,15 +626,13 @@ def _get_op_name(op, special):
            'desc': 'Greater than or equal to',
            'reverse': None,
            'series_examples': None}
-}
+}  # type: Dict[str, Dict[str, Optional[str]]]
 
 _op_names = list(_op_descriptions.keys())
 for key in _op_names:
-    _op_descriptions[key]['reversed'] = False
     reverse_op = _op_descriptions[key]['reverse']
     if reverse_op is not None:
         _op_descriptions[reverse_op] = _op_descriptions[key].copy()
-        _op_descriptions[reverse_op]['reversed'] = True
         _op_descriptions[reverse_op]['reverse'] = key
 
 _flex_doc_SERIES = """
@@ -1010,7 +1009,7 @@ def _make_flex_doc(op_name, typ):
     op_name = op_name.replace('__', '')
     op_desc = _op_descriptions[op_name]
 
-    if op_desc['reversed']:
+    if op_name.startswith('r'):
         equiv = 'other ' + op_desc['op'] + ' ' + typ
     else:
         equiv = typ + ' ' + op_desc['op'] + ' other'
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py