pandas-dev
diff --git a/‎.github/workflows/asv-bot.yml
Lines changed: 81 additions & 0 deletions b/‎.github/workflows/asv-bot.yml
Lines changed: 81 additions & 0 deletions
diff --git a/‎.github/workflows/autoupdate-pre-commit-config.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/autoupdate-pre-commit-config.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/conf.py
Lines changed: 2 additions & 3 deletions b/‎doc/source/conf.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎doc/source/getting_started/comparison/includes/nth_word.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/comparison/includes/nth_word.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.3.3.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.3.3.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/_libs/algos.pyi
Lines changed: 17 additions & 13 deletions b/‎pandas/_libs/algos.pyi
Lines changed: 17 additions & 13 deletions
diff --git a/‎pandas/_libs/algos.pyx
Lines changed: 3 additions & 1 deletion b/‎pandas/_libs/algos.pyx
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/algos_common_helper.pxi.in
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/hashing.pyi
Lines changed: 4 additions & 2 deletions b/‎pandas/_libs/hashing.pyi
Lines changed: 4 additions & 2 deletions
diff --git a/‎pandas/_libs/index.pyi
Lines changed: 8 additions & 9 deletions b/‎pandas/_libs/index.pyi
Lines changed: 8 additions & 9 deletions
diff --git a/‎pandas/_libs/reduction.pyx
Lines changed: 0 additions & 3 deletions b/‎pandas/_libs/reduction.pyx
Lines changed: 0 additions & 3 deletions
diff --git a/‎pandas/_libs/tslibs/conversion.pyi
Lines changed: 3 additions & 1 deletion b/‎pandas/_libs/tslibs/conversion.pyi
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/_libs/tslibs/fields.pyi
Lines changed: 14 additions & 12 deletions b/‎pandas/_libs/tslibs/fields.pyi
Lines changed: 14 additions & 12 deletions
@@ -0,0 +1,81 @@
+name: "ASV Bot"
+
+on:
+  issue_comment: # Pull requests are issues
+    types:
+      - created
+
+env:
+  ENV_FILE: environment.yml
+  COMMENT: ${{github.event.comment.body}}
+
+jobs:
+  autotune:
+    name: "Run benchmarks"
+    # TODO: Support more benchmarking options later, against different branches, against self, etc
+    if: startsWith(github.event.comment.body, '@github-actions benchmark')
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    concurrency:
+      # Set concurrency to prevent abuse(full runs are ~5.5 hours !!!)
+      # each user can only run one concurrent benchmark bot at a time
+      # We don't cancel in progress jobs, but if you want to benchmark multiple PRs, you're gonna have
+      # to wait
+      group: ${{ github.actor }}-asv
+      cancel-in-progress: false
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Cache conda
+        uses: actions/cache@v2
+        with:
+          path: ~/conda_pkgs_dir
+          key: ${{ runner.os }}-conda-${{ hashFiles('${{ env.ENV_FILE }}') }}
+
+        # Although asv sets up its own env, deps are still needed
+        # during discovery process
+      - uses: conda-incubator/setup-miniconda@v2
+        with:
+          activate-environment: pandas-dev
+          channel-priority: strict
+          environment-file: ${{ env.ENV_FILE }}
+          use-only-tar-bz2: true
+
+      - name: Run benchmarks
+        id: bench
+        continue-on-error: true # This is a fake failure, asv will exit code 1 for regressions
+        run: |
+          # extracting the regex, see https://stackoverflow.com/a/36798723
+          REGEX=$(echo "$COMMENT" | sed -n "s/^.*-b\s*\(\S*\).*$/\1/p")
+          cd asv_bench
+          asv check -E existing
+          git remote add upstream https://github.com/pandas-dev/pandas.git
+          git fetch upstream
+          asv machine --yes
+          asv continuous -f 1.1 -b $REGEX upstream/master HEAD
+          echo 'BENCH_OUTPUT<<EOF' >> $GITHUB_ENV
+          asv compare -f 1.1 upstream/master HEAD >> $GITHUB_ENV
+          echo 'EOF' >> $GITHUB_ENV
+          echo "REGEX=$REGEX" >> $GITHUB_ENV
+
+      - uses: actions/github-script@v4
+        env:
+          BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
+          REGEX: ${{env.REGEX}}
+        with:
+          script: |
+            const ENV_VARS = process.env
+            const run_url = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`
+            github.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: '\nBenchmarks completed. View runner logs here.' + run_url + '\nRegex used: '+ 'regex ' + ENV_VARS["REGEX"] + '\n' + ENV_VARS["BENCH_OUTPUT"]
+            })
@@ -2,7 +2,7 @@ name: "Update pre-commit config"
 
 on:
   schedule:
-    - cron: "0 7 * * 1" # At 07:00 on each Monday.
+    - cron: "0 7 1 * *" # At 07:00 on 1st of every month.
   workflow_dispatch:
 
 jobs:
 
@@ -461,7 +461,6 @@
 # eg pandas.Series.str and pandas.Series.dt (see GH9322)
 
 import sphinx  # isort:skip
-from sphinx.util import rpartition  # isort:skip
 from sphinx.ext.autodoc import (  # isort:skip
     AttributeDocumenter,
     Documenter,
@@ -521,8 +520,8 @@ def resolve_name(self, modname, parents, path, base):
             # HACK: this is added in comparison to ClassLevelDocumenter
             # mod_cls still exists of class.accessor, so an extra
             # rpartition is needed
-            modname, accessor = rpartition(mod_cls, ".")
-            modname, cls = rpartition(modname, ".")
+            modname, _, accessor = mod_cls.rpartition(".")
+            modname, _, cls = modname.rpartition(".")
             parents = [cls, accessor]
             # if the module name is still missing, get it like above
             if not modname:
 
@@ -5,5 +5,5 @@ word by index. Note there are more powerful approaches should you need them.
 
    firstlast = pd.DataFrame({"String": ["John Smith", "Jane Cook"]})
    firstlast["First_Name"] = firstlast["String"].str.split(" ", expand=True)[0]
-   firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[0]
+   firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[1]
    firstlast
@@ -17,6 +17,7 @@ Fixed regressions
 - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`)
 - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`)
 - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`)
+- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`)
 - Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`)
 - Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`)
 
 
@@ -101,6 +101,7 @@ Other enhancements
 - :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
 - :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
 - :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`)
+- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
 
 .. ---------------------------------------------------------------------------
 
@@ -375,6 +376,7 @@ I/O
 - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
 - Bug in :func:`read_fwf`, where difference in lengths of ``colspecs`` and ``names`` was not raising ``ValueError`` (:issue:`40830`)
 - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
+- Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
 -
 
 Period
@@ -423,6 +425,7 @@ Styler
 - Bug in :meth:`.Styler.copy` where ``uuid`` was not previously copied (:issue:`40675`)
 - Bug in :meth:`Styler.apply` where functions which returned Series objects were not correctly handled in terms of aligning their index labels (:issue:`13657`, :issue:`42014`)
 - Bug when rendering an empty DataFrame with a named index (:issue:`43305`).
+- Bug when rendering a single level MultiIndex (:issue:`43383`).
 
 Other
 ^^^^^
 
@@ -3,6 +3,8 @@ from typing import Any
 
 import numpy as np
 
+from pandas._typing import npt
+
 class Infinity:
     """
     Provide a positive Infinity comparison method for ranking.
@@ -30,7 +32,7 @@ class NegInfinity:
 def unique_deltas(
     arr: np.ndarray,  # const int64_t[:]
 ) -> np.ndarray: ...  # np.ndarray[np.int64, ndim=1]
-def is_lexsorted(list_of_arrays: list[np.ndarray]) -> bool: ...
+def is_lexsorted(list_of_arrays: list[npt.NDArray[np.int64]]) -> bool: ...
 def groupsort_indexer(
     index: np.ndarray,  # const int64_t[:]
     ngroups: int,
@@ -146,18 +148,20 @@ def diff_2d(
     axis: int,
     datetimelike: bool = ...,
 ) -> None: ...
-def ensure_platform_int(arr: object) -> np.ndarray: ...
-def ensure_object(arr: object) -> np.ndarray: ...
-def ensure_float64(arr: object, copy=True) -> np.ndarray: ...
-def ensure_float32(arr: object, copy=True) -> np.ndarray: ...
-def ensure_int8(arr: object, copy=True) -> np.ndarray: ...
-def ensure_int16(arr: object, copy=True) -> np.ndarray: ...
-def ensure_int32(arr: object, copy=True) -> np.ndarray: ...
-def ensure_int64(arr: object, copy=True) -> np.ndarray: ...
-def ensure_uint8(arr: object, copy=True) -> np.ndarray: ...
-def ensure_uint16(arr: object, copy=True) -> np.ndarray: ...
-def ensure_uint32(arr: object, copy=True) -> np.ndarray: ...
-def ensure_uint64(arr: object, copy=True) -> np.ndarray: ...
+def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ...
+def ensure_object(arr: object) -> npt.NDArray[np.object_]: ...
+def ensure_complex64(arr: object, copy=True) -> npt.NDArray[np.complex64]: ...
+def ensure_complex128(arr: object, copy=True) -> npt.NDArray[np.complex128]: ...
+def ensure_float64(arr: object, copy=True) -> npt.NDArray[np.float64]: ...
+def ensure_float32(arr: object, copy=True) -> npt.NDArray[np.float32]: ...
+def ensure_int8(arr: object, copy=True) -> npt.NDArray[np.int8]: ...
+def ensure_int16(arr: object, copy=True) -> npt.NDArray[np.int16]: ...
+def ensure_int32(arr: object, copy=True) -> npt.NDArray[np.int32]: ...
+def ensure_int64(arr: object, copy=True) -> npt.NDArray[np.int64]: ...
+def ensure_uint8(arr: object, copy=True) -> npt.NDArray[np.uint8]: ...
+def ensure_uint16(arr: object, copy=True) -> npt.NDArray[np.uint16]: ...
+def ensure_uint32(arr: object, copy=True) -> npt.NDArray[np.uint32]: ...
+def ensure_uint64(arr: object, copy=True) -> npt.NDArray[np.uint64]: ...
 def take_1d_int8_int8(
     values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
 ) -> None: ...
 
@@ -15,6 +15,8 @@ import numpy as np
 
 cimport numpy as cnp
 from numpy cimport (
+    NPY_COMPLEX64,
+    NPY_COMPLEX128,
     NPY_FLOAT32,
     NPY_FLOAT64,
     NPY_INT8,
@@ -122,7 +124,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
 
     Parameters
     ----------
-    arr : ndarray[in64_t]
+    arr : ndarray[int64_t]
 
     Returns
     -------
 
@@ -47,6 +47,8 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
           ('uint16', 'UINT16', 'uint16'),
           ('uint32', 'UINT32', 'uint32'),
           ('uint64', 'UINT64', 'uint64'),
+          ('complex64', 'COMPLEX64', 'complex64'),
+          ('complex128', 'COMPLEX128', 'complex128')
           # ('platform_int', 'INT', 'int_'),
           # ('object', 'OBJECT', 'object_'),
 ]
 
@@ -1,7 +1,9 @@
 import numpy as np
 
+from pandas._typing import npt
+
 def hash_object_array(
-    arr: np.ndarray,  # np.ndarray[object]
+    arr: npt.NDArray[np.object_],
     key: str,
     encoding: str = ...,
-) -> np.ndarray: ...  # np.ndarray[np.uint64]
+) -> npt.NDArray[np.uint64]: ...
@@ -1,5 +1,7 @@
 import numpy as np
 
+from pandas._typing import npt
+
 class IndexEngine:
     over_size_threshold: bool
     def __init__(self, vgetter, n: int): ...
@@ -16,21 +18,18 @@ class IndexEngine:
     def is_monotonic_decreasing(self) -> bool: ...
     def get_backfill_indexer(
         self, other: np.ndarray, limit: int | None = ...
-    ) -> np.ndarray: ...
+    ) -> npt.NDArray[np.intp]: ...
     def get_pad_indexer(
         self, other: np.ndarray, limit: int | None = ...
-    ) -> np.ndarray: ...
+    ) -> npt.NDArray[np.intp]: ...
     @property
     def is_mapping_populated(self) -> bool: ...
     def clear_mapping(self): ...
-    def get_indexer(self, values: np.ndarray) -> np.ndarray: ...  # np.ndarray[np.intp]
+    def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...
     def get_indexer_non_unique(
         self,
         targets: np.ndarray,
-    ) -> tuple[
-        np.ndarray,  # np.ndarray[np.intp]
-        np.ndarray,  # np.ndarray[np.intp]
-    ]: ...
+    ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
 
 class Float64Engine(IndexEngine): ...
 class Float32Engine(IndexEngine): ...
@@ -58,8 +57,8 @@ class BaseMultiIndexCodesEngine:
     ): ...
     def get_indexer(
         self,
-        target: np.ndarray,  # np.ndarray[object]
-    ) -> np.ndarray: ...  # np.ndarray[np.intp]
+        target: npt.NDArray[np.object_],
+    ) -> npt.NDArray[np.intp]: ...
     def _extract_level_codes(self, target: object): ...
     def get_indexer_with_fill(
         self,
 
@@ -64,9 +64,6 @@ cdef class _BaseGrouper:
 
     cdef inline _update_cached_objs(self, object cached_series, object cached_index,
                                     Slider islider, Slider vslider):
-        # See the comment in indexes/base.py about _index_data.
-        # We need this for EA-backed indexes that have a reference
-        # to a 1-d ndarray like datetime / timedelta / period.
         cached_index._engine.clear_mapping()
         cached_index._cache.clear()  # e.g. inferred_freq must go
         cached_series._mgr.set_values(vslider.buf)
 
@@ -5,6 +5,8 @@ from datetime import (
 
 import numpy as np
 
+from pandas._typing import npt
+
 DT64NS_DTYPE: np.dtype
 TD64NS_DTYPE: np.dtype
 
@@ -22,6 +24,6 @@ def ensure_timedelta64ns(
     copy: bool = ...,
 ) -> np.ndarray: ...  # np.ndarray[timedelta64ns]
 def datetime_to_datetime64(
-    values: np.ndarray,  # np.ndarray[object]
+    values: npt.NDArray[np.object_],
 ) -> tuple[np.ndarray, tzinfo | None,]: ...  # (np.ndarray[dt64ns], _)
 def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...
@@ -1,33 +1,35 @@
 import numpy as np
 
+from pandas._typing import npt
+
 def build_field_sarray(
-    dtindex: np.ndarray,  # const int64_t[:]
+    dtindex: npt.NDArray[np.int64],  # const int64_t[:]
 ) -> np.ndarray: ...
 def month_position_check(fields, weekdays) -> str | None: ...
 def get_date_name_field(
-    dtindex: np.ndarray,  # const int64_t[:]
+    dtindex: npt.NDArray[np.int64],  # const int64_t[:]
     field: str,
     locale=...,
-) -> np.ndarray: ...  # np.ndarray[object]
+) -> npt.NDArray[np.object_]: ...
 def get_start_end_field(
-    dtindex: np.ndarray,  # const int64_t[:]
+    dtindex: npt.NDArray[np.int64],  # const int64_t[:]
     field: str,
     freqstr: str | None = ...,
     month_kw: int = ...,
-) -> np.ndarray: ...  # np.ndarray[bool]
+) -> npt.NDArray[np.bool_]: ...
 def get_date_field(
-    dtindex: np.ndarray,  # const int64_t[:]
+    dtindex: npt.NDArray[np.int64],  # const int64_t[:]
     field: str,
-) -> np.ndarray: ...  # np.ndarray[in32]
+) -> npt.NDArray[np.int32]: ...
 def get_timedelta_field(
     tdindex: np.ndarray,  # const int64_t[:]
     field: str,
-) -> np.ndarray: ...  # np.ndarray[int32]
+) -> npt.NDArray[np.int32]: ...
 def isleapyear_arr(
     years: np.ndarray,
-) -> np.ndarray: ...  # np.ndarray[bool]
+) -> npt.NDArray[np.bool_]: ...
 def build_isocalendar_sarray(
-    dtindex: np.ndarray,  # const int64_t[:]
+    dtindex: npt.NDArray[np.int64],  # const int64_t[:]
 ) -> np.ndarray: ...
 def get_locale_names(name_type: str, locale: object = None): ...
 
@@ -44,7 +46,7 @@ class RoundTo:
     def NEAREST_HALF_MINUS_INFTY(self) -> int: ...
 
 def round_nsint64(
-    values: np.ndarray,  # np.ndarray[np.int64]
+    values: npt.NDArray[np.int64],
     mode: RoundTo,
     nanos: int,
-) -> np.ndarray: ...  # np.ndarray[np.int64]
+) -> npt.NDArray[np.int64]: ...
Original file line number	Diff line number	Diff line change
`@@ -47,6 +47,8 @@ dtypes = [('float64', 'FLOAT64', 'float64'),`
`47`	`47`	`('uint16', 'UINT16', 'uint16'),`
`48`	`48`	`('uint32', 'UINT32', 'uint32'),`
`49`	`49`	`('uint64', 'UINT64', 'uint64'),`
	`50`	`+ ('complex64', 'COMPLEX64', 'complex64'),`
	`51`	`+ ('complex128', 'COMPLEX128', 'complex128')`
`50`	`52`	`# ('platform_int', 'INT', 'int_'),`
`51`	`53`	`# ('object', 'OBJECT', 'object_'),`
`52`	`54`	`]`