pandas-dev
diff --git a/‎ci/code_checks.sh
Lines changed: 8 additions & 0 deletions b/‎ci/code_checks.sh
Lines changed: 8 additions & 0 deletions
diff --git a/‎doc/source/conf.py
Lines changed: 8 additions & 4 deletions b/‎doc/source/conf.py
Lines changed: 8 additions & 4 deletions
diff --git a/‎doc/source/contributing.rst
Lines changed: 52 additions & 0 deletions b/‎doc/source/contributing.rst
Lines changed: 52 additions & 0 deletions
diff --git a/‎doc/source/cookbook.rst
Lines changed: 11 additions & 0 deletions b/‎doc/source/cookbook.rst
Lines changed: 11 additions & 0 deletions
diff --git a/‎doc/source/groupby.rst
Lines changed: 10 additions & 0 deletions b/‎doc/source/groupby.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v0.24.0.txt
Lines changed: 9 additions & 4 deletions b/‎doc/source/whatsnew/v0.24.0.txt
Lines changed: 9 additions & 4 deletions
diff --git a/‎pandas/_libs/algos.pyx
Lines changed: 8 additions & 6 deletions b/‎pandas/_libs/algos.pyx
Lines changed: 8 additions & 6 deletions
diff --git a/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 5 additions & 15 deletions b/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 5 additions & 15 deletions
diff --git a/‎pandas/_libs/algos_rank_helper.pxi.in
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/algos_rank_helper.pxi.in
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/algos_take_helper.pxi.in
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/algos_take_helper.pxi.in
Lines changed: 1 addition & 1 deletion
@@ -44,6 +44,14 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    # Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
+    # it doesn't make a difference, but we want to be internally consistent.
+    # Note: this grep pattern is (intended to be) equivalent to the python
+    # regex r'(?<![ ->])> '
+    MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG
+    ! grep -r -E --include '*.pyx' --include '*.pxi.in' '> ' pandas/_libs | grep -v '[ ->]> '
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     # readability/casting: Warnings about C casting instead of C++ casting
     # runtime/int: Warnings about using C number types instead of C++ ones
     # build/include_subdir: Warnings about prefacing included header files with directory
 
@@ -99,7 +99,7 @@
 # JP: added from sphinxdocs
 autosummary_generate = False
 
-if any(re.match("\s*api\s*", l) for l in index_rst_lines):
+if any(re.match(r"\s*api\s*", l) for l in index_rst_lines):
     autosummary_generate = True
 
 # numpydoc
@@ -341,8 +341,8 @@
 # file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
     ('index', 'pandas.tex',
-     u'pandas: powerful Python data analysis toolkit',
-     u'Wes McKinney\n\& PyData Development Team', 'manual'),
+     'pandas: powerful Python data analysis toolkit',
+     r'Wes McKinney\n\& PyData Development Team', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -569,7 +569,11 @@ def linkcode_resolve(domain, info):
             return None
 
     try:
-        fn = inspect.getsourcefile(obj)
+        # inspect.unwrap() was added in Python version 3.4
+        if sys.version_info >= (3, 5):
+            fn = inspect.getsourcefile(inspect.unwrap(obj))
+        else:
+            fn = inspect.getsourcefile(obj)
     except:
         fn = None
     if not fn:
 
@@ -612,6 +612,54 @@ Alternatively, you can install the ``grep`` and ``xargs`` commands via the
 `MinGW <http://www.mingw.org/>`__ toolchain, and it will allow you to run the
 commands above.
 
+.. _contributing.import-formatting:
+
+Import Formatting
+~~~~~~~~~~~~~~~~~
+*pandas* uses `isort <https://pypi.org/project/isort/>`__ to standardise import
+formatting across the codebase.
+
+A guide to import layout as per pep8 can be found `here <https://www.python.org/dev/peps/pep-0008/#imports/>`__.
+
+A summary of our current import sections ( in order ):
+
+* Future
+* Python Standard Library
+* Third Party
+* ``pandas._libs``, ``pandas.compat``, ``pandas.util._*``, ``pandas.errors`` (largely not dependent on ``pandas.core``)
+* ``pandas.core.dtypes`` (largely not dependent on the rest of ``pandas.core``)
+* Rest of ``pandas.core.*``
+* Non-core ``pandas.io``, ``pandas.plotting``, ``pandas.tseries``
+* Local application/library specific imports
+
+Imports are alphabetically sorted within these sections.
+
+
+As part of :ref:`Continuous Integration <contributing.ci>` checks we run::
+
+    isort --recursive --check-only pandas
+
+to check that imports are correctly formatted as per the `setup.cfg`.
+
+If you see output like the below in :ref:`Continuous Integration <contributing.ci>` checks:
+
+.. code-block:: shell
+
+   Check import format using isort
+   ERROR: /home/travis/build/pandas-dev/pandas/pandas/io/pytables.py Imports are incorrectly sorted
+   Check import format using isort DONE
+   The command "ci/code_checks.sh" exited with 1
+
+You should run::
+
+    isort pandas/io/pytables.py
+
+to automatically format imports correctly. This will modify your local copy of the files.
+
+The `--recursive` flag can be passed to sort all files in a directory.
+
+You can then verify the changes look ok, then git :ref:`commit <contributing.commit-code>` and :ref:`push <contributing.push-code>`.
+
 Backwards Compatibility
 ~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1078,6 +1126,8 @@ or a new keyword argument (`example <https://github.com/pandas-dev/pandas/blob/v
 Contributing your changes to *pandas*
 =====================================
 
+.. _contributing.commit-code:
+
 Committing your code
 --------------------
 
@@ -1122,6 +1172,8 @@ Now you can commit your changes in your local repository::
 
     git commit -m
 
+.. _contributing.push-code:
+
 Pushing your changes
 --------------------
 
 
@@ -1226,6 +1226,17 @@ Computation
 Correlation
 ***********
 
+Often it's useful to obtain the lower (or upper) triangular form of a correlation matrix calculated from :func:`DataFrame.corr`.  This can be achieved by passing a boolean mask to ``where`` as follows:
+
+.. ipython:: python
+
+    df = pd.DataFrame(np.random.random(size=(100, 5)))
+
+    corr_mat = df.corr()
+    mask = np.tril(np.ones_like(corr_mat, dtype=np.bool), k=-1)
+
+    corr_mat.where(mask)
+
 The `method` argument within `DataFrame.corr` can accept a callable in addition to the named correlation types.  Here we compute the `distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>`__ matrix for a `DataFrame` object.
 
 .. code-block:: python
 
@@ -125,6 +125,16 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:
    grouped = df.groupby('A')
    grouped = df.groupby(['A', 'B'])
 
+.. versionadded:: 0.24
+
+If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all
+but the specified columns
+
+.. ipython:: python
+
+   df2 = df.set_index(['A', 'B'])
+   grouped = df2.groupby(level=df2.index.names.difference(['B'])
+
 These will split the DataFrame on its index (rows). We could also split by the
 columns:
 
 
@@ -13,10 +13,9 @@ v0.24.0 (Month XX, 2018)
 New features
 ~~~~~~~~~~~~
 - :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`)
-
-
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
-
+- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups
+<groupby.split>` for more information (:issue:`15475`, :issue:`15506`)
 - :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing
 the user to override the engine's default behavior to include or omit the
 dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
@@ -219,7 +218,8 @@ For earlier versions this can be done using the following.
 .. ipython:: python
 
    pd.merge(left.reset_index(), right.reset_index(),
-            on=['key'], how='inner').set_index(['key','X','Y'])
+            on=['key'], how='inner').set_index(['key', 'X', 'Y'])
+
 .. _whatsnew_0240.enhancements.rename_axis:
 
 Renaming names in a MultiIndex
@@ -267,6 +267,7 @@ Other Enhancements
 - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
 - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
 - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`)
+- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`22647`)
 - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
 - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
 - :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``,
@@ -1100,6 +1101,7 @@ Performance Improvements
 - Improved the performance of :func:`pandas.get_dummies` with ``sparse=True`` (:issue:`21997`)
 - Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`)
 - Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`)
+- Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
 
 
 .. _whatsnew_0240.docs:
@@ -1189,6 +1191,7 @@ Timezones
 - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`)
 - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`)
 - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`)
+- Bug in :meth:`DataFrame.drop` and :meth:`Series.drop` when specifying a tz-aware Timestamp key to drop from a :class:`DatetimeIndex` with a DST transition (:issue:`21761`)
 
 Offsets
 ^^^^^^^
@@ -1236,6 +1239,7 @@ Indexing
 - The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`)
 - When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError``, consistently with the case of a flat :class:`Int64Index`, rather than falling back to positional indexing (:issue:`21593`)
 - Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`)
+- Bug in :meth:`Series.reindex` when reindexing an empty series with a ``datetime64[ns, tz]`` dtype (:issue:`20869`)
 - Bug in :class:`DataFrame` when setting values with ``.loc`` and a timezone aware :class:`DatetimeIndex` (:issue:`11365`)
 - ``DataFrame.__getitem__`` now accepts dictionaries and dictionary keys as list-likes of labels, consistently with ``Series.__getitem__`` (:issue:`21294`)
 - Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`)
@@ -1312,6 +1316,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`)
 - Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
 - Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`).
+- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`)
 
 Plotting
 ^^^^^^^^
 
@@ -32,7 +32,7 @@ import missing
 
 cdef float64_t FP_ERR = 1e-13
 
-cdef double NaN = <double> np.NaN
+cdef double NaN = <double>np.NaN
 cdef double nan = NaN
 
 cdef int64_t iNaT = get_nat()
@@ -77,6 +77,8 @@ class NegInfinity(object):
     __ge__ = lambda self, other: isinstance(other, NegInfinity)
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
 cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
     """
     Efficiently find the unique first-differences of the given array.
@@ -240,7 +242,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
         int64_t nobs = 0
         float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor
 
-    N, K = (<object> mat).shape
+    N, K = (<object>mat).shape
 
     if minp is None:
         minpv = 1
@@ -305,7 +307,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
         int64_t nobs = 0
         float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor
 
-    N, K = (<object> mat).shape
+    N, K = (<object>mat).shape
 
     result = np.empty((K, K), dtype=np.float64)
     mask = np.isfinite(mat).view(np.uint8)
@@ -529,7 +531,7 @@ def pad_2d_inplace(ndarray[algos_t, ndim=2] values,
         algos_t val
         int lim, fill_count = 0
 
-    K, N = (<object> values).shape
+    K, N = (<object>values).shape
 
     # GH#2778
     if N == 0:
@@ -728,7 +730,7 @@ def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
         algos_t val
         int lim, fill_count = 0
 
-    K, N = (<object> values).shape
+    K, N = (<object>values).shape
 
     # GH#2778
     if N == 0:
@@ -793,7 +795,7 @@ arrmap_bool = arrmap["uint8_t"]
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def is_monotonic(ndarray[algos_t] arr, bint timelike):
+def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
     """
     Returns
     -------
 
@@ -1,16 +1,6 @@
 """
 Template for each `dtype` helper function using 1-d template
 
-# 1-d template
-- pad
-- pad_1d
-- pad_2d
-- backfill
-- backfill_1d
-- backfill_2d
-- is_monotonic
-- arrmap
-
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 
@@ -44,7 +34,7 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
     cdef:
         Py_ssize_t i, j, sx, sy
 
-    sx, sy = (<object> arr).shape
+    sx, sy = (<object>arr).shape
     if arr.flags.f_contiguous:
         if axis == 0:
             if periods >= 0:
@@ -98,14 +88,14 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
 # ensure_dtype
 #----------------------------------------------------------------------
 
-cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num
+cdef int PLATFORM_INT = (<ndarray>np.arange(0, dtype=np.intp)).descr.type_num
 
 
 def ensure_platform_int(object arr):
     # GH3033, GH1392
     # platform int is the size of the int pointer, e.g. np.intp
     if util.is_array(arr):
-        if (<ndarray> arr).descr.type_num == PLATFORM_INT:
+        if (<ndarray>arr).descr.type_num == PLATFORM_INT:
             return arr
         else:
             return arr.astype(np.intp)
@@ -115,7 +105,7 @@ def ensure_platform_int(object arr):
 
 def ensure_object(object arr):
     if util.is_array(arr):
-        if (<ndarray> arr).descr.type_num == NPY_OBJECT:
+        if (<ndarray>arr).descr.type_num == NPY_OBJECT:
             return arr
         else:
             return arr.astype(np.object_)
@@ -152,7 +142,7 @@ def get_dispatch(dtypes):
 
 def ensure_{{name}}(object arr, copy=True):
     if util.is_array(arr):
-        if (<ndarray> arr).descr.type_num == NPY_{{c_type}}:
+        if (<ndarray>arr).descr.type_num == NPY_{{c_type}}:
             return arr
         else:
             return arr.astype(np.{{dtype}}, copy=copy)
 
@@ -263,7 +263,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
     np.putmask(values, mask, nan_value)
     {{endif}}
 
-    n, k = (<object> values).shape
+    n, k = (<object>values).shape
     ranks = np.empty((n, k), dtype='f8')
 
     {{if dtype == 'object'}}
 
@@ -278,7 +278,7 @@ cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
         ndarray[take_t, ndim=2] result
         object val
 
-    N, K = (<object> values).shape
+    N, K = (<object>values).shape
 
     if take_t is object:
         # evaluated at compile-time