sthagen · sthagen · Feb 12, 2020 · Feb 11, 2020 · Feb 11, 2020 · Feb 11, 2020
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,6 +10,20 @@ repos:
     -   id: flake8
         language: python_venv
         additional_dependencies: [flake8-comprehensions>=3.1.0]
+    -   id: flake8
+        name: flake8-pyx
+        language: python_venv
+        files: \.(pyx|pxd)$
+        types:
+          - file
+        args: [--append-config=flake8/cython.cfg]
+    -   id: flake8
+        name: flake8-pxd
+        language: python_venv
+        files: \.pxi\.in$
+        types:
+          - file
+        args: [--append-config=flake8/cython-template.cfg]
 -   repo: https://github.com/pre-commit/mirrors-isort
     rev: v4.3.21
     hooks:

diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py
@@ -160,4 +160,43 @@ def time_equals_non_object_index(self):
         self.mi_large_slow.equals(self.idx_non_object)
 
 
+class SetOperations:
+
+    params = [
+        ("monotonic", "non_monotonic"),
+        ("datetime", "int", "string"),
+        ("intersection", "union", "symmetric_difference"),
+    ]
+    param_names = ["index_structure", "dtype", "method"]
+
+    def setup(self, index_structure, dtype, method):
+        N = 10 ** 5
+        level1 = range(1000)
+
+        level2 = date_range(start="1/1/2000", periods=N // 1000)
+        dates_left = MultiIndex.from_product([level1, level2])
+
+        level2 = range(N // 1000)
+        int_left = MultiIndex.from_product([level1, level2])
+
+        level2 = tm.makeStringIndex(N // 1000).values
+        str_left = MultiIndex.from_product([level1, level2])
+
+        data = {
+            "datetime": dates_left,
+            "int": int_left,
+            "string": str_left,
+        }
+
+        if index_structure == "non_monotonic":
+            data = {k: mi[::-1] for k, mi in data.items()}
+
+        data = {k: {"left": mi, "right": mi[:-1]} for k, mi in data.items()}
+        self.left = data[dtype]["left"]
+        self.right = data[dtype]["right"]
+
+    def time_operation(self, index_structure, dtype, method):
+        getattr(self.left, method)(self.right)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -65,12 +65,12 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     flake8 --format="$FLAKE8_FORMAT" .
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Linting .pyx code' ; echo $MSG
-    flake8 --format="$FLAKE8_FORMAT" pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
+    MSG='Linting .pyx and .pxd code' ; echo $MSG
+    flake8 --format="$FLAKE8_FORMAT" pandas --append-config=flake8/cython.cfg
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Linting .pxd and .pxi.in' ; echo $MSG
-    flake8 --format="$FLAKE8_FORMAT" pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
+    MSG='Linting .pxi.in' ; echo $MSG
+    flake8 --format="$FLAKE8_FORMAT" pandas/_libs --append-config=flake8/cython-template.cfg
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     echo "flake8-rst --version"

diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst
@@ -937,33 +937,31 @@ classes. This helps us keep docstrings consistent, while keeping things clear
 for the user reading. It comes at the cost of some complexity when writing.
 
 Each shared docstring will have a base template with variables, like
-``%(klass)s``. The variables filled in later on using the ``Substitution``
-decorator. Finally, docstrings can be appended to with the ``Appender``
-decorator.
+``{klass}``. The variables filled in later on using the ``doc`` decorator.
+Finally, docstrings can also be appended to with the ``doc`` decorator.
 
 In this example, we'll create a parent docstring normally (this is like
 ``pandas.core.generic.NDFrame``. Then we'll have two children (like
 ``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll
-substitute the children's class names in this docstring.
+substitute the class names in this docstring.
 
 .. code-block:: python
 
    class Parent:
+       @doc(klass="Parent")
        def my_function(self):
-           """Apply my function to %(klass)s."""
+           """Apply my function to {klass}."""
            ...
 
 
    class ChildA(Parent):
-       @Substitution(klass="ChildA")
-       @Appender(Parent.my_function.__doc__)
+       @doc(Parent.my_function, klass="ChildA")
        def my_function(self):
            ...
 
 
    class ChildB(Parent):
-       @Substitution(klass="ChildB")
-       @Appender(Parent.my_function.__doc__)
+       @doc(Parent.my_function, klass="ChildB")
        def my_function(self):
            ...
 
@@ -972,18 +970,16 @@ The resulting docstrings are
 .. code-block:: python
 
    >>> print(Parent.my_function.__doc__)
-   Apply my function to %(klass)s.
+   Apply my function to Parent.
    >>> print(ChildA.my_function.__doc__)
    Apply my function to ChildA.
    >>> print(ChildB.my_function.__doc__)
    Apply my function to ChildB.
 
-Notice two things:
+Notice:
 
 1. We "append" the parent docstring to the children docstrings, which are
    initially empty.
-2. Python decorators are applied inside out. So the order is Append then
-   Substitution, even though Substitution comes first in the file.
 
 Our files will often contain a module-level ``_shared_doc_kwargs`` with some
 common substitution values (things like ``klass``, ``axes``, etc).
@@ -992,14 +988,13 @@ You can substitute and append in one shot with something like
 
 .. code-block:: python
 
-   @Appender(template % _shared_doc_kwargs)
+   @doc(template, **_shared_doc_kwargs)
    def my_function(self):
        ...
 
 where ``template`` may come from a module-level ``_shared_docs`` dictionary
 mapping function names to docstrings. Wherever possible, we prefer using
-``Appender`` and ``Substitution``, since the docstring-writing processes is
-slightly closer to normal.
+``doc``, since the docstring-writing processes is slightly closer to normal.
 
 See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
 ``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -163,6 +163,23 @@ The commands in this table will install pandas for Python 3 from your distributi
 to get the newest version of pandas, it's recommended to install using the ``pip`` or ``conda``
 methods described above.
 
+Handling ImportErrors
+~~~~~~~~~~~~~~~~~~~~~~
+
+If you encounter an ImportError, it usually means that Python couldn't find pandas in the list of available
+libraries. Python internally has a list of directories it searches through, to find packages. You can
+obtain these directories with::
+
+            import sys
+            sys.path
+
+One way you could be encountering this error is if you have multiple Python installations on your system
+and you don't have pandas installed in the Python installation you're currently using.
+In Linux/Mac you can run ``which python`` on your terminal and it will tell you which Python installation you're
+using. If it's something like "/usr/bin/python", you're using the Python from the system, which is not recommended.
+
+It is highly recommended to use ``conda``, for quick installation and for package and dependency updates.
+You can find simple installation instructions for pandas in this document: `installation instructions </getting_started.html>`.
 
 Installing from source
 ~~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst
@@ -28,9 +28,21 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 
+**Categorical**
+
+- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`)
+
 **I/O**
 
 - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`)
+- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`).
+
+
+
+**Experimental dtypes**
+
+- Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`).
+- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -42,6 +42,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
 - :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
+- When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
 -
 -
 
@@ -73,6 +74,7 @@ Backwards incompatible API changes
 Deprecations
 ~~~~~~~~~~~~
 - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version.  Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
+- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
 -
 -
 
@@ -175,6 +177,16 @@ MultiIndex
                           index=[["a", "a", "b", "b"], [1, 2, 1, 2]])
         # Rows are now ordered as the requested keys
         df.loc[(['b', 'a'], [2, 1]), :]
+
+- Bug in :meth:`MultiIndex.intersection` was not guaranteed to preserve order when ``sort=False``. (:issue:`31325`)
+
+.. ipython:: python
+
+        left = pd.MultiIndex.from_arrays([["b", "a"], [2, 1]])
+        right = pd.MultiIndex.from_arrays([["a", "b", "c"], [1, 2, 3]])
+        # Common elements are now guaranteed to be ordered by the left side
+        left.intersection(right, sort=False)
+
 -
 
 I/O
@@ -192,6 +204,8 @@ Plotting
 
 - :func:`.plot` for line/bar now accepts color by dictonary (:issue:`8193`).
 -
+- Bug in :meth:`DataFrame.boxplot` and :meth:`DataFrame.plot.boxplot` lost color attributes of ``medianprops``, ``whiskerprops``, ``capprops`` and ``medianprops`` (:issue:`30346`)
+
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/flake8/cython-template.cfg b/flake8/cython-template.cfg
@@ -0,0 +1,4 @@
+[flake8]
+filename = *.pxi.in
+select = E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
+
diff --git a/flake8/cython.cfg b/flake8/cython.cfg
@@ -0,0 +1,3 @@
+[flake8]
+filename = *.pyx,*.pxd
+select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
diff --git a/pandas/_config/config.py b/pandas/_config/config.py
@@ -550,7 +550,6 @@ def _select_options(pat: str) -> List[str]:
 
     if pat=="all", returns all registered options
     """
-
     # short-circuit for exact key
     if pat in _registered_options:
         return [pat]
@@ -573,7 +572,6 @@ def _get_root(key: str) -> Tuple[Dict[str, Any], str]:
 
 def _is_deprecated(key: str) -> bool:
     """ Returns True if the given option has been deprecated """
-
     key = key.lower()
     return key in _deprecated_options
 
@@ -586,7 +584,6 @@ def _get_deprecated_option(key: str):
     -------
     DeprecatedOption (namedtuple) if key is deprecated, None otherwise
     """
-
     try:
         d = _deprecated_options[key]
     except KeyError:
@@ -611,7 +608,6 @@ def _translate_key(key: str) -> str:
     if key id deprecated and a replacement key defined, will return the
     replacement key, otherwise returns `key` as - is
     """
-
     d = _get_deprecated_option(key)
     if d:
         return d.rkey or key
@@ -627,7 +623,6 @@ def _warn_if_deprecated(key: str) -> bool:
     -------
     bool - True if `key` is deprecated, False otherwise.
     """
-
     d = _get_deprecated_option(key)
     if d:
         if d.msg:
@@ -649,7 +644,6 @@ def _warn_if_deprecated(key: str) -> bool:
 
 def _build_option_description(k: str) -> str:
     """ Builds a formatted description of a registered option and prints it """
-
     o = _get_registered_option(k)
     d = _get_deprecated_option(k)
 
@@ -674,7 +668,6 @@ def _build_option_description(k: str) -> str:
 
 def pp_options_list(keys: Iterable[str], width=80, _print: bool = False):
     """ Builds a concise listing of available options, grouped by prefix """
-
     from textwrap import wrap
     from itertools import groupby
 
@@ -738,7 +731,6 @@ def config_prefix(prefix):
     will register options "display.font.color", "display.font.size", set the
     value of "display.font.size"... and so on.
     """
-
     # Note: reset_option relies on set_option, and on key directly
     # it does not fit in to this monkey-patching scheme
 
@@ -801,7 +793,6 @@ def is_instance_factory(_type) -> Callable[[Any], None]:
                 ValueError if x is not an instance of `_type`
 
     """
-
     if isinstance(_type, (tuple, list)):
         _type = tuple(_type)
         type_repr = "|".join(map(str, _type))
@@ -848,7 +839,6 @@ def is_nonnegative_int(value: Optional[int]) -> None:
     ValueError
         When the value is not None or is a negative integer
     """
-
     if value is None:
         return
 

diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py
@@ -61,7 +61,6 @@ def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
     bool
         Whether the passed locale can be set
     """
-
     try:
         with set_locale(lc, lc_var=lc_var):
             pass

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1005,7 +1005,7 @@ _TYPE_MAP = {
     'complex64': 'complex',
     'complex128': 'complex',
     'c': 'complex',
-    'string': 'bytes',
+    'string': 'string',
     'S': 'bytes',
     'U': 'string',
     'bool': 'boolean',

diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in
@@ -235,7 +235,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_,
                                               {{dtype}}_t yfill):
     cdef:
         IntIndex out_index
-        Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
+        Py_ssize_t xi = 0, yi = 0, out_i = 0  # fp buf indices
         int32_t xloc, yloc
         int32_t[:] xindices, yindices, out_indices
         {{dtype}}_t[:] x, y

diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
@@ -42,7 +42,7 @@ cdef extern from "numpy/ndarrayobject.h":
     bint PyArray_IsIntegerScalar(obj) nogil
     bint PyArray_Check(obj) nogil
 
-cdef extern from  "numpy/npy_common.h":
+cdef extern from "numpy/npy_common.h":
     int64_t NPY_MIN_INT64
-Original file line number
+Diff line change
@@ Expand Up @@
         bool
             Whether the passed locale can be set
         """
         try:
             with set_locale(lc, lc_var=lc_var):
                 pass
@@ Expand Down @@