Skip to content

Commit a348337

Browse files
committed
2 parents 6946c6d + 3770dda commit a348337

File tree

158 files changed

+2431
-1394
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

158 files changed

+2431
-1394
lines changed

.github/workflows/wheels.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ jobs:
8686
activate-environment: test
8787
channels: conda-forge, anaconda
8888
channel-priority: true
89-
mamba-version: "*"
89+
# mamba fails to solve, also we really don't need this since we're just installing python
90+
# mamba-version: "*"
9091

9192
- name: Test wheels (Windows 64-bit only)
9293
if: ${{ matrix.buildplat[1] == 'win_amd64' }}
@@ -154,7 +155,8 @@ jobs:
154155
python-version: '3.8'
155156
channels: conda-forge
156157
channel-priority: true
157-
mamba-version: "*"
158+
# mamba fails to solve, also we really don't need this since we're just installing python
159+
# mamba-version: "*"
158160

159161
- name: Build sdist
160162
run: |

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ repos:
9292
args: [--disable=all, --enable=redefined-outer-name]
9393
stages: [manual]
9494
- repo: https://github.com/PyCQA/isort
95-
rev: 5.11.4
95+
rev: 5.12.0
9696
hooks:
9797
- id: isort
9898
- repo: https://github.com/asottile/pyupgrade

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ BSD 3-Clause License
33
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
44
All rights reserved.
55

6-
Copyright (c) 2011-2022, Open source contributors.
6+
Copyright (c) 2011-2023, Open source contributors.
77

88
Redistribution and use in source and binary forms, with or without
99
modification, are permitted provided that the following conditions are met:

asv_bench/benchmarks/indexing.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99

1010
from pandas import (
11+
NA,
1112
CategoricalIndex,
1213
DataFrame,
1314
Index,
@@ -83,6 +84,37 @@ def time_loc_slice(self, index, index_structure):
8384
self.data.loc[:800000]
8485

8586

87+
class NumericMaskedIndexing:
88+
monotonic_list = list(range(10**6))
89+
non_monotonic_list = (
90+
list(range(50)) + [54, 53, 52, 51] + list(range(55, 10**6 - 1))
91+
)
92+
93+
params = [
94+
("Int64", "UInt64", "Float64"),
95+
(True, False),
96+
]
97+
param_names = ["dtype", "monotonic"]
98+
99+
def setup(self, dtype, monotonic):
100+
101+
indices = {
102+
True: Index(self.monotonic_list, dtype=dtype),
103+
False: Index(self.non_monotonic_list, dtype=dtype).append(
104+
Index([NA], dtype=dtype)
105+
),
106+
}
107+
self.data = indices[monotonic]
108+
self.indexer = np.arange(300, 1_000)
109+
self.data_dups = self.data.append(self.data)
110+
111+
def time_get_indexer(self, dtype, monotonic):
112+
self.data.get_indexer(self.indexer)
113+
114+
def time_get_indexer_dups(self, dtype, monotonic):
115+
self.data.get_indexer_for(self.indexer)
116+
117+
86118
class NonNumericSeriesIndexing:
87119

88120
params = [

asv_bench/benchmarks/indexing_engines.py

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
"""
2-
Benchmarks in this file depend exclusively on code in _libs/
2+
Benchmarks in this file depend mostly on code in _libs/
3+
4+
We have to created masked arrays to test the masked engine though. The
5+
array is unpacked on the Cython level.
36
47
If a PR does not edit anything in _libs, it is very unlikely that benchmarks
58
in this file will be affected.
@@ -9,6 +12,8 @@
912

1013
from pandas._libs import index as libindex
1114

15+
from pandas.core.arrays import BaseMaskedArray
16+
1217

1318
def _get_numeric_engines():
1419
engine_names = [
@@ -30,6 +35,26 @@ def _get_numeric_engines():
3035
]
3136

3237

38+
def _get_masked_engines():
39+
engine_names = [
40+
("MaskedInt64Engine", "Int64"),
41+
("MaskedInt32Engine", "Int32"),
42+
("MaskedInt16Engine", "Int16"),
43+
("MaskedInt8Engine", "Int8"),
44+
("MaskedUInt64Engine", "UInt64"),
45+
("MaskedUInt32Engine", "UInt32"),
46+
("MaskedUInt16engine", "UInt16"),
47+
("MaskedUInt8Engine", "UInt8"),
48+
("MaskedFloat64Engine", "Float64"),
49+
("MaskedFloat32Engine", "Float32"),
50+
]
51+
return [
52+
(getattr(libindex, engine_name), dtype)
53+
for engine_name, dtype in engine_names
54+
if hasattr(libindex, engine_name)
55+
]
56+
57+
3358
class NumericEngineIndexing:
3459

3560
params = [
@@ -80,6 +105,61 @@ def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
80105
self.data.get_loc(self.key_middle)
81106

82107

108+
class MaskedNumericEngineIndexing:
109+
110+
params = [
111+
_get_masked_engines(),
112+
["monotonic_incr", "monotonic_decr", "non_monotonic"],
113+
[True, False],
114+
[10**5, 2 * 10**6], # 2e6 is above SIZE_CUTOFF
115+
]
116+
param_names = ["engine_and_dtype", "index_type", "unique", "N"]
117+
118+
def setup(self, engine_and_dtype, index_type, unique, N):
119+
engine, dtype = engine_and_dtype
120+
121+
if index_type == "monotonic_incr":
122+
if unique:
123+
arr = np.arange(N * 3, dtype=dtype.lower())
124+
else:
125+
values = list([1] * N + [2] * N + [3] * N)
126+
arr = np.array(values, dtype=dtype.lower())
127+
mask = np.zeros(N * 3, dtype=np.bool_)
128+
elif index_type == "monotonic_decr":
129+
if unique:
130+
arr = np.arange(N * 3, dtype=dtype.lower())[::-1]
131+
else:
132+
values = list([1] * N + [2] * N + [3] * N)
133+
arr = np.array(values, dtype=dtype.lower())[::-1]
134+
mask = np.zeros(N * 3, dtype=np.bool_)
135+
else:
136+
assert index_type == "non_monotonic"
137+
if unique:
138+
arr = np.zeros(N * 3, dtype=dtype.lower())
139+
arr[:N] = np.arange(N * 2, N * 3, dtype=dtype.lower())
140+
arr[N:] = np.arange(N * 2, dtype=dtype.lower())
141+
142+
else:
143+
arr = np.array([1, 2, 3] * N, dtype=dtype.lower())
144+
mask = np.zeros(N * 3, dtype=np.bool_)
145+
mask[-1] = True
146+
147+
self.data = engine(BaseMaskedArray(arr, mask))
148+
# code belows avoids populating the mapping etc. while timing.
149+
self.data.get_loc(2)
150+
151+
self.key_middle = arr[len(arr) // 2]
152+
self.key_early = arr[2]
153+
154+
def time_get_loc(self, engine_and_dtype, index_type, unique, N):
155+
self.data.get_loc(self.key_early)
156+
157+
def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
158+
# searchsorted performance may be different near the middle of a range
159+
# vs near an endpoint
160+
self.data.get_loc(self.key_middle)
161+
162+
83163
class ObjectEngineIndexing:
84164

85165
params = [("monotonic_incr", "monotonic_decr", "non_monotonic")]

ci/code_checks.sh

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8383
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
8484
RET=$(($RET + $?)) ; echo $MSG "DONE"
8585

86-
MSG='Partially validate docstrings (EX01)' ; echo $MSG
86+
MSG='Partially validate docstrings (EX01)' ; echo $MSG
8787
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
8888
pandas.Series.index \
8989
pandas.Series.dtype \
@@ -187,7 +187,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
187187
pandas.show_versions \
188188
pandas.test \
189189
pandas.NaT \
190-
pandas.Timestamp.unit \
191190
pandas.Timestamp.as_unit \
192191
pandas.Timestamp.ctime \
193192
pandas.Timestamp.date \
@@ -574,7 +573,73 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
574573
pandas.DataFrame.sparse.to_coo \
575574
pandas.DataFrame.to_gbq \
576575
pandas.DataFrame.style \
577-
pandas.DataFrame.__dataframe__ \
576+
pandas.DataFrame.__dataframe__
577+
RET=$(($RET + $?)) ; echo $MSG "DONE"
578+
579+
MSG='Partially validate docstrings (EX02)' ; echo $MSG
580+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX02 --ignore_functions \
581+
pandas.DataFrame.copy \
582+
pandas.DataFrame.plot.line \
583+
pandas.DataFrame.std \
584+
pandas.DataFrame.var \
585+
pandas.Index.factorize \
586+
pandas.Period.strftime \
587+
pandas.Series.copy \
588+
pandas.Series.factorize \
589+
pandas.Series.floordiv \
590+
pandas.Series.plot.line \
591+
pandas.Series.rfloordiv \
592+
pandas.Series.sparse.density \
593+
pandas.Series.sparse.npoints \
594+
pandas.Series.sparse.sp_values \
595+
pandas.Series.std \
596+
pandas.Series.var \
597+
pandas.Timestamp.fromtimestamp \
598+
pandas.api.types.infer_dtype \
599+
pandas.api.types.is_bool_dtype \
600+
pandas.api.types.is_categorical_dtype \
601+
pandas.api.types.is_complex_dtype \
602+
pandas.api.types.is_datetime64_any_dtype \
603+
pandas.api.types.is_datetime64_dtype \
604+
pandas.api.types.is_datetime64_ns_dtype \
605+
pandas.api.types.is_datetime64tz_dtype \
606+
pandas.api.types.is_dict_like \
607+
pandas.api.types.is_file_like \
608+
pandas.api.types.is_float_dtype \
609+
pandas.api.types.is_hashable \
610+
pandas.api.types.is_int64_dtype \
611+
pandas.api.types.is_integer_dtype \
612+
pandas.api.types.is_interval_dtype \
613+
pandas.api.types.is_iterator \
614+
pandas.api.types.is_list_like \
615+
pandas.api.types.is_named_tuple \
616+
pandas.api.types.is_numeric_dtype \
617+
pandas.api.types.is_object_dtype \
618+
pandas.api.types.is_period_dtype \
619+
pandas.api.types.is_re \
620+
pandas.api.types.is_re_compilable \
621+
pandas.api.types.is_signed_integer_dtype \
622+
pandas.api.types.is_sparse \
623+
pandas.api.types.is_string_dtype \
624+
pandas.api.types.is_timedelta64_dtype \
625+
pandas.api.types.is_timedelta64_ns_dtype \
626+
pandas.api.types.is_unsigned_integer_dtype \
627+
pandas.core.groupby.DataFrameGroupBy.take \
628+
pandas.core.groupby.SeriesGroupBy.take \
629+
pandas.factorize \
630+
pandas.io.formats.style.Styler.concat \
631+
pandas.io.formats.style.Styler.export \
632+
pandas.io.formats.style.Styler.set_td_classes \
633+
pandas.io.formats.style.Styler.use \
634+
pandas.io.json.build_table_schema \
635+
pandas.merge_ordered \
636+
pandas.option_context \
637+
pandas.plotting.andrews_curves \
638+
pandas.plotting.autocorrelation_plot \
639+
pandas.plotting.lag_plot \
640+
pandas.plotting.parallel_coordinates \
641+
pandas.plotting.radviz \
642+
pandas.tseries.frequencies.to_offset
578643
RET=$(($RET + $?)) ; echo $MSG "DONE"
579644

580645
fi

doc/source/development/contributing_docstring.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ case of pandas, the NumPy docstring convention is followed. These conventions ar
6767
explained in this document:
6868

6969
* `numpydoc docstring guide <https://numpydoc.readthedocs.io/en/latest/format.html>`_
70-
(which is based in the original `Guide to NumPy/SciPy documentation
71-
<https://github.com/numpy/numpy/blob/main/doc/HOWTO_DOCUMENT.rst.txt>`_)
7270

7371
numpydoc is a Sphinx extension to support the NumPy docstring convention.
7472

doc/source/development/maintaining.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,8 @@ which will be triggered when the tag is pushed.
465465

466466
7. Download all wheels from the Anaconda repository where MacPython uploads them:
467467
https://anaconda.org/multibuild-wheels-staging/pandas/files?version=<version>
468-
to the ``dist/`` directory in the local pandas copy.
468+
to the ``dist/`` directory in the local pandas copy. You can use the script
469+
``scripts/download_wheels.sh`` to download all wheels at once.
469470

470471
8. Upload wheels to PyPI:
471472

doc/source/reference/frame.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ Binary operator functions
8383
.. autosummary::
8484
:toctree: api/
8585

86+
DataFrame.__add__
8687
DataFrame.add
8788
DataFrame.sub
8889
DataFrame.mul

doc/source/whatsnew/v1.1.4.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Fixed regressions
3131
- Fixed regression in setitem with :meth:`DataFrame.iloc` which raised error when trying to set a value while filtering with a boolean list (:issue:`36741`)
3232
- Fixed regression in setitem with a Series getting aligned before setting the values (:issue:`37427`)
3333
- Fixed regression in :attr:`MultiIndex.is_monotonic_increasing` returning wrong results with ``NaN`` in at least one of the levels (:issue:`37220`)
34-
- Fixed regression in inplace arithmetic operation on a Series not updating the parent DataFrame (:issue:`36373`)
34+
- Fixed regression in inplace arithmetic operation (`+=`) on a Series not updating the parent DataFrame/Series (:issue:`36373`)
3535

3636
.. ---------------------------------------------------------------------------
3737

0 commit comments

Comments
 (0)