Skip to content

Commit b87ef09

Browse files
committed
Merge remote-tracking branch 'upstream/master' into styler_format_index
2 parents e36f198 + e64784f commit b87ef09

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+738
-256
lines changed

.github/workflows/asv-bot.yml

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
name: "ASV Bot"
2+
3+
on:
4+
issue_comment: # Pull requests are issues
5+
types:
6+
- created
7+
8+
env:
9+
ENV_FILE: environment.yml
10+
COMMENT: ${{github.event.comment.body}}
11+
12+
jobs:
13+
autotune:
14+
name: "Run benchmarks"
15+
# TODO: Support more benchmarking options later, against different branches, against self, etc
16+
if: startsWith(github.event.comment.body, '@github-actions benchmark')
17+
runs-on: ubuntu-latest
18+
defaults:
19+
run:
20+
shell: bash -l {0}
21+
22+
concurrency:
23+
# Set concurrency to prevent abuse(full runs are ~5.5 hours !!!)
24+
# each user can only run one concurrent benchmark bot at a time
25+
# We don't cancel in progress jobs, but if you want to benchmark multiple PRs, you're gonna have
26+
# to wait
27+
group: ${{ github.actor }}-asv
28+
cancel-in-progress: false
29+
30+
steps:
31+
- name: Checkout
32+
uses: actions/checkout@v2
33+
with:
34+
fetch-depth: 0
35+
36+
- name: Cache conda
37+
uses: actions/cache@v2
38+
with:
39+
path: ~/conda_pkgs_dir
40+
key: ${{ runner.os }}-conda-${{ hashFiles('${{ env.ENV_FILE }}') }}
41+
42+
# Although asv sets up its own env, deps are still needed
43+
# during discovery process
44+
- uses: conda-incubator/setup-miniconda@v2
45+
with:
46+
activate-environment: pandas-dev
47+
channel-priority: strict
48+
environment-file: ${{ env.ENV_FILE }}
49+
use-only-tar-bz2: true
50+
51+
- name: Run benchmarks
52+
id: bench
53+
continue-on-error: true # This is a fake failure, asv will exit code 1 for regressions
54+
run: |
55+
# extracting the regex, see https://stackoverflow.com/a/36798723
56+
REGEX=$(echo "$COMMENT" | sed -n "s/^.*-b\s*\(\S*\).*$/\1/p")
57+
cd asv_bench
58+
asv check -E existing
59+
git remote add upstream https://github.com/pandas-dev/pandas.git
60+
git fetch upstream
61+
asv machine --yes
62+
asv continuous -f 1.1 -b $REGEX upstream/master HEAD
63+
echo 'BENCH_OUTPUT<<EOF' >> $GITHUB_ENV
64+
asv compare -f 1.1 upstream/master HEAD >> $GITHUB_ENV
65+
echo 'EOF' >> $GITHUB_ENV
66+
echo "REGEX=$REGEX" >> $GITHUB_ENV
67+
68+
- uses: actions/github-script@v4
69+
env:
70+
BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
71+
REGEX: ${{env.REGEX}}
72+
with:
73+
script: |
74+
const ENV_VARS = process.env
75+
const run_url = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`
76+
github.issues.createComment({
77+
issue_number: context.issue.number,
78+
owner: context.repo.owner,
79+
repo: context.repo.repo,
80+
body: '\nBenchmarks completed. View runner logs here.' + run_url + '\nRegex used: '+ 'regex ' + ENV_VARS["REGEX"] + '\n' + ENV_VARS["BENCH_OUTPUT"]
81+
})

.github/workflows/autoupdate-pre-commit-config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: "Update pre-commit config"
22

33
on:
44
schedule:
5-
- cron: "0 7 * * 1" # At 07:00 on each Monday.
5+
- cron: "0 7 1 * *" # At 07:00 on 1st of every month.
66
workflow_dispatch:
77

88
jobs:

doc/source/conf.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,6 @@
461461
# eg pandas.Series.str and pandas.Series.dt (see GH9322)
462462

463463
import sphinx # isort:skip
464-
from sphinx.util import rpartition # isort:skip
465464
from sphinx.ext.autodoc import ( # isort:skip
466465
AttributeDocumenter,
467466
Documenter,
@@ -521,8 +520,8 @@ def resolve_name(self, modname, parents, path, base):
521520
# HACK: this is added in comparison to ClassLevelDocumenter
522521
# mod_cls still exists of class.accessor, so an extra
523522
# rpartition is needed
524-
modname, accessor = rpartition(mod_cls, ".")
525-
modname, cls = rpartition(modname, ".")
523+
modname, _, accessor = mod_cls.rpartition(".")
524+
modname, _, cls = modname.rpartition(".")
526525
parents = [cls, accessor]
527526
# if the module name is still missing, get it like above
528527
if not modname:

doc/source/getting_started/comparison/includes/nth_word.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ word by index. Note there are more powerful approaches should you need them.
55
66
firstlast = pd.DataFrame({"String": ["John Smith", "Jane Cook"]})
77
firstlast["First_Name"] = firstlast["String"].str.split(" ", expand=True)[0]
8-
firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[0]
8+
firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[1]
99
firstlast

doc/source/whatsnew/v1.3.3.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717
- Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`)
1818
- Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`)
1919
- Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`)
20+
- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`)
2021
- Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`)
2122
- Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`)
2223

doc/source/whatsnew/v1.4.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ Other enhancements
101101
- :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
102102
- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
103103
- :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`)
104+
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
104105

105106
.. ---------------------------------------------------------------------------
106107
@@ -375,6 +376,7 @@ I/O
375376
- Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
376377
- Bug in :func:`read_fwf`, where difference in lengths of ``colspecs`` and ``names`` was not raising ``ValueError`` (:issue:`40830`)
377378
- Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
379+
- Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
378380
-
379381

380382
Period
@@ -423,6 +425,7 @@ Styler
423425
- Bug in :meth:`.Styler.copy` where ``uuid`` was not previously copied (:issue:`40675`)
424426
- Bug in :meth:`Styler.apply` where functions which returned Series objects were not correctly handled in terms of aligning their index labels (:issue:`13657`, :issue:`42014`)
425427
- Bug when rendering an empty DataFrame with a named index (:issue:`43305`).
428+
- Bug when rendering a single level MultiIndex (:issue:`43383`).
426429

427430
Other
428431
^^^^^

pandas/_libs/algos.pyi

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ from typing import Any
33

44
import numpy as np
55

6+
from pandas._typing import npt
7+
68
class Infinity:
79
"""
810
Provide a positive Infinity comparison method for ranking.
@@ -30,7 +32,7 @@ class NegInfinity:
3032
def unique_deltas(
3133
arr: np.ndarray, # const int64_t[:]
3234
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
33-
def is_lexsorted(list_of_arrays: list[np.ndarray]) -> bool: ...
35+
def is_lexsorted(list_of_arrays: list[npt.NDArray[np.int64]]) -> bool: ...
3436
def groupsort_indexer(
3537
index: np.ndarray, # const int64_t[:]
3638
ngroups: int,
@@ -146,18 +148,20 @@ def diff_2d(
146148
axis: int,
147149
datetimelike: bool = ...,
148150
) -> None: ...
149-
def ensure_platform_int(arr: object) -> np.ndarray: ...
150-
def ensure_object(arr: object) -> np.ndarray: ...
151-
def ensure_float64(arr: object, copy=True) -> np.ndarray: ...
152-
def ensure_float32(arr: object, copy=True) -> np.ndarray: ...
153-
def ensure_int8(arr: object, copy=True) -> np.ndarray: ...
154-
def ensure_int16(arr: object, copy=True) -> np.ndarray: ...
155-
def ensure_int32(arr: object, copy=True) -> np.ndarray: ...
156-
def ensure_int64(arr: object, copy=True) -> np.ndarray: ...
157-
def ensure_uint8(arr: object, copy=True) -> np.ndarray: ...
158-
def ensure_uint16(arr: object, copy=True) -> np.ndarray: ...
159-
def ensure_uint32(arr: object, copy=True) -> np.ndarray: ...
160-
def ensure_uint64(arr: object, copy=True) -> np.ndarray: ...
151+
def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ...
152+
def ensure_object(arr: object) -> npt.NDArray[np.object_]: ...
153+
def ensure_complex64(arr: object, copy=True) -> npt.NDArray[np.complex64]: ...
154+
def ensure_complex128(arr: object, copy=True) -> npt.NDArray[np.complex128]: ...
155+
def ensure_float64(arr: object, copy=True) -> npt.NDArray[np.float64]: ...
156+
def ensure_float32(arr: object, copy=True) -> npt.NDArray[np.float32]: ...
157+
def ensure_int8(arr: object, copy=True) -> npt.NDArray[np.int8]: ...
158+
def ensure_int16(arr: object, copy=True) -> npt.NDArray[np.int16]: ...
159+
def ensure_int32(arr: object, copy=True) -> npt.NDArray[np.int32]: ...
160+
def ensure_int64(arr: object, copy=True) -> npt.NDArray[np.int64]: ...
161+
def ensure_uint8(arr: object, copy=True) -> npt.NDArray[np.uint8]: ...
162+
def ensure_uint16(arr: object, copy=True) -> npt.NDArray[np.uint16]: ...
163+
def ensure_uint32(arr: object, copy=True) -> npt.NDArray[np.uint32]: ...
164+
def ensure_uint64(arr: object, copy=True) -> npt.NDArray[np.uint64]: ...
161165
def take_1d_int8_int8(
162166
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
163167
) -> None: ...

pandas/_libs/algos.pyx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import numpy as np
1515

1616
cimport numpy as cnp
1717
from numpy cimport (
18+
NPY_COMPLEX64,
19+
NPY_COMPLEX128,
1820
NPY_FLOAT32,
1921
NPY_FLOAT64,
2022
NPY_INT8,
@@ -122,7 +124,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
122124
123125
Parameters
124126
----------
125-
arr : ndarray[in64_t]
127+
arr : ndarray[int64_t]
126128
127129
Returns
128130
-------

pandas/_libs/algos_common_helper.pxi.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ dtypes = [('float64', 'FLOAT64', 'float64'),
4747
('uint16', 'UINT16', 'uint16'),
4848
('uint32', 'UINT32', 'uint32'),
4949
('uint64', 'UINT64', 'uint64'),
50+
('complex64', 'COMPLEX64', 'complex64'),
51+
('complex128', 'COMPLEX128', 'complex128')
5052
# ('platform_int', 'INT', 'int_'),
5153
# ('object', 'OBJECT', 'object_'),
5254
]

pandas/_libs/hashing.pyi

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import numpy as np
22

3+
from pandas._typing import npt
4+
35
def hash_object_array(
4-
arr: np.ndarray, # np.ndarray[object]
6+
arr: npt.NDArray[np.object_],
57
key: str,
68
encoding: str = ...,
7-
) -> np.ndarray: ... # np.ndarray[np.uint64]
9+
) -> npt.NDArray[np.uint64]: ...

pandas/_libs/index.pyi

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import numpy as np
22

3+
from pandas._typing import npt
4+
35
class IndexEngine:
46
over_size_threshold: bool
57
def __init__(self, vgetter, n: int): ...
@@ -16,21 +18,18 @@ class IndexEngine:
1618
def is_monotonic_decreasing(self) -> bool: ...
1719
def get_backfill_indexer(
1820
self, other: np.ndarray, limit: int | None = ...
19-
) -> np.ndarray: ...
21+
) -> npt.NDArray[np.intp]: ...
2022
def get_pad_indexer(
2123
self, other: np.ndarray, limit: int | None = ...
22-
) -> np.ndarray: ...
24+
) -> npt.NDArray[np.intp]: ...
2325
@property
2426
def is_mapping_populated(self) -> bool: ...
2527
def clear_mapping(self): ...
26-
def get_indexer(self, values: np.ndarray) -> np.ndarray: ... # np.ndarray[np.intp]
28+
def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...
2729
def get_indexer_non_unique(
2830
self,
2931
targets: np.ndarray,
30-
) -> tuple[
31-
np.ndarray, # np.ndarray[np.intp]
32-
np.ndarray, # np.ndarray[np.intp]
33-
]: ...
32+
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
3433

3534
class Float64Engine(IndexEngine): ...
3635
class Float32Engine(IndexEngine): ...
@@ -58,8 +57,8 @@ class BaseMultiIndexCodesEngine:
5857
): ...
5958
def get_indexer(
6059
self,
61-
target: np.ndarray, # np.ndarray[object]
62-
) -> np.ndarray: ... # np.ndarray[np.intp]
60+
target: npt.NDArray[np.object_],
61+
) -> npt.NDArray[np.intp]: ...
6362
def _extract_level_codes(self, target: object): ...
6463
def get_indexer_with_fill(
6564
self,

pandas/_libs/reduction.pyx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,6 @@ cdef class _BaseGrouper:
6464

6565
cdef inline _update_cached_objs(self, object cached_series, object cached_index,
6666
Slider islider, Slider vslider):
67-
# See the comment in indexes/base.py about _index_data.
68-
# We need this for EA-backed indexes that have a reference
69-
# to a 1-d ndarray like datetime / timedelta / period.
7067
cached_index._engine.clear_mapping()
7168
cached_index._cache.clear() # e.g. inferred_freq must go
7269
cached_series._mgr.set_values(vslider.buf)

pandas/_libs/tslibs/conversion.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ from datetime import (
55

66
import numpy as np
77

8+
from pandas._typing import npt
9+
810
DT64NS_DTYPE: np.dtype
911
TD64NS_DTYPE: np.dtype
1012

@@ -22,6 +24,6 @@ def ensure_timedelta64ns(
2224
copy: bool = ...,
2325
) -> np.ndarray: ... # np.ndarray[timedelta64ns]
2426
def datetime_to_datetime64(
25-
values: np.ndarray, # np.ndarray[object]
27+
values: npt.NDArray[np.object_],
2628
) -> tuple[np.ndarray, tzinfo | None,]: ... # (np.ndarray[dt64ns], _)
2729
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...

pandas/_libs/tslibs/fields.pyi

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,35 @@
11
import numpy as np
22

3+
from pandas._typing import npt
4+
35
def build_field_sarray(
4-
dtindex: np.ndarray, # const int64_t[:]
6+
dtindex: npt.NDArray[np.int64], # const int64_t[:]
57
) -> np.ndarray: ...
68
def month_position_check(fields, weekdays) -> str | None: ...
79
def get_date_name_field(
8-
dtindex: np.ndarray, # const int64_t[:]
10+
dtindex: npt.NDArray[np.int64], # const int64_t[:]
911
field: str,
1012
locale=...,
11-
) -> np.ndarray: ... # np.ndarray[object]
13+
) -> npt.NDArray[np.object_]: ...
1214
def get_start_end_field(
13-
dtindex: np.ndarray, # const int64_t[:]
15+
dtindex: npt.NDArray[np.int64], # const int64_t[:]
1416
field: str,
1517
freqstr: str | None = ...,
1618
month_kw: int = ...,
17-
) -> np.ndarray: ... # np.ndarray[bool]
19+
) -> npt.NDArray[np.bool_]: ...
1820
def get_date_field(
19-
dtindex: np.ndarray, # const int64_t[:]
21+
dtindex: npt.NDArray[np.int64], # const int64_t[:]
2022
field: str,
21-
) -> np.ndarray: ... # np.ndarray[in32]
23+
) -> npt.NDArray[np.int32]: ...
2224
def get_timedelta_field(
2325
tdindex: np.ndarray, # const int64_t[:]
2426
field: str,
25-
) -> np.ndarray: ... # np.ndarray[int32]
27+
) -> npt.NDArray[np.int32]: ...
2628
def isleapyear_arr(
2729
years: np.ndarray,
28-
) -> np.ndarray: ... # np.ndarray[bool]
30+
) -> npt.NDArray[np.bool_]: ...
2931
def build_isocalendar_sarray(
30-
dtindex: np.ndarray, # const int64_t[:]
32+
dtindex: npt.NDArray[np.int64], # const int64_t[:]
3133
) -> np.ndarray: ...
3234
def get_locale_names(name_type: str, locale: object = None): ...
3335

@@ -44,7 +46,7 @@ class RoundTo:
4446
def NEAREST_HALF_MINUS_INFTY(self) -> int: ...
4547

4648
def round_nsint64(
47-
values: np.ndarray, # np.ndarray[np.int64]
49+
values: npt.NDArray[np.int64],
4850
mode: RoundTo,
4951
nanos: int,
50-
) -> np.ndarray: ... # np.ndarray[np.int64]
52+
) -> npt.NDArray[np.int64]: ...

0 commit comments

Comments
 (0)