Skip to content

Commit 00bff1d

Browse files
authored
Merge pull request #240 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 9e845dc + 9731fd0 commit 00bff1d

File tree

13 files changed

+78
-67
lines changed

13 files changed

+78
-67
lines changed

asv_bench/benchmarks/reshape.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def setup(self, dtype):
111111
values = np.take(list(string.ascii_letters), indices)
112112
values = [pd.Categorical(v) for v in values.T]
113113

114-
self.df = DataFrame(values, index, columns)
114+
self.df = DataFrame({i: cat for i, cat in enumerate(values)}, index, columns)
115115
self.df2 = self.df.iloc[:-1]
116116

117117
def time_full_product(self, dtype):

doc/source/whatsnew/v1.3.2.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17+
- Performance regression in :meth:`DataFrame.isin` and :meth:`Series.isin` for nullable data types (:issue:`42714`)
1718
-
1819
-
1920

doc/source/whatsnew/v1.4.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ Indexing
225225
- Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`)
226226
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
227227
- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
228-
-
228+
- Bug in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
229229

230230
Missing
231231
^^^^^^^

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ dependencies:
108108
- fsspec>=0.7.4, <2021.6.0 # for generic remote file operations
109109
- gcsfs>=0.6.0 # file IO when using 'gcs://...' path
110110
- sqlalchemy # pandas.read_sql, DataFrame.to_sql
111-
- xarray # DataFrame.to_xarray
111+
- xarray<0.19 # DataFrame.to_xarray
112112
- cftime # Needed for downstream xarray.CFTimeIndex test
113113
- pyreadstat # pandas.read_spss
114114
- tabulate>=0.8.3 # DataFrame.to_markdown

pandas/core/arrays/masked.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ def isin(self, values) -> BooleanArray: # type: ignore[override]
417417
# see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
418418
result[self._mask] = values_have_NA
419419

420-
mask = np.zeros_like(self, dtype=bool)
420+
mask = np.zeros(self._data.shape, dtype=bool)
421421
return BooleanArray(result, mask, copy=False)
422422

423423
def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3745,7 +3745,7 @@ def _set_item_mgr(self, key, value: ArrayLike) -> None:
37453745
# try to set first as we want an invalid
37463746
# value exception to occur first
37473747
if len(self):
3748-
self._check_setitem_copy(stacklevel=5)
3748+
self._check_setitem_copy()
37493749

37503750
def _iset_item(self, loc: int, value) -> None:
37513751
arraylike = self._sanitize_column(value)

pandas/core/generic.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
doc,
6868
rewrite_axis_style_signature,
6969
)
70+
from pandas.util._exceptions import find_stack_level
7071
from pandas.util._validators import (
7172
validate_ascending,
7273
validate_bool_kwarg,
@@ -3506,7 +3507,7 @@ def _maybe_update_cacher(
35063507
"""
35073508

35083509
if verify_is_copy:
3509-
self._check_setitem_copy(stacklevel=5, t="referent")
3510+
self._check_setitem_copy(t="referent")
35103511

35113512
if clear:
35123513
self._clear_item_cache()
@@ -3853,26 +3854,21 @@ def _check_is_chained_assignment_possible(self) -> bool_t:
38533854
setting.
38543855
"""
38553856
if self._is_copy:
3856-
self._check_setitem_copy(stacklevel=4, t="referent")
3857+
self._check_setitem_copy(t="referent")
38573858
return False
38583859

38593860
@final
3860-
def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
3861+
def _check_setitem_copy(self, t="setting", force=False):
38613862
"""
38623863
38633864
Parameters
38643865
----------
3865-
stacklevel : int, default 4
3866-
the level to show of the stack when the error is output
38673866
t : str, the type of setting error
38683867
force : bool, default False
38693868
If True, then force showing an error.
38703869
38713870
validate if we are doing a setitem on a chained copy.
38723871
3873-
If you call this function, be sure to set the stacklevel such that the
3874-
user will see the error *at the level of setting*
3875-
38763872
It is technically possible to figure out that we are setting on
38773873
a copy even WITH a multi-dtyped pandas object. In other words, some
38783874
blocks may be views while other are not. Currently _is_view will ALWAYS
@@ -3931,7 +3927,7 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
39313927
if value == "raise":
39323928
raise com.SettingWithCopyError(t)
39333929
elif value == "warn":
3934-
warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel)
3930+
warnings.warn(t, com.SettingWithCopyWarning, stacklevel=find_stack_level())
39353931

39363932
def __delitem__(self, key) -> None:
39373933
"""

pandas/core/internals/blocks.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,7 +1255,7 @@ def where(self, other, cond, errors="raise") -> list[Block]:
12551255

12561256
return result_blocks
12571257

1258-
def _unstack(self, unstacker, fill_value, new_placement):
1258+
def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
12591259
"""
12601260
Return a list of unstacked blocks of self
12611261
@@ -1264,6 +1264,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
12641264
unstacker : reshape._Unstacker
12651265
fill_value : int
12661266
Only used in ExtensionBlock._unstack
1267+
allow_fill : bool
12671268
12681269
Returns
12691270
-------
@@ -1638,7 +1639,7 @@ def where(self, other, cond, errors="raise") -> list[Block]:
16381639

16391640
return [self.make_block_same_class(result)]
16401641

1641-
def _unstack(self, unstacker, fill_value, new_placement):
1642+
def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
16421643
# ExtensionArray-safe unstack.
16431644
# We override ObjectBlock._unstack, which unstacks directly on the
16441645
# values of the array. For EA-backed blocks, this would require
@@ -1655,7 +1656,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
16551656
blocks = [
16561657
# TODO: could cast to object depending on fill_value?
16571658
self.make_block_same_class(
1658-
self.values.take(indices, allow_fill=True, fill_value=fill_value),
1659+
self.values.take(indices, allow_fill=allow_fill, fill_value=fill_value),
16591660
BlockPlacement(place),
16601661
)
16611662
for indices, place in zip(new_values.T, new_placement)

pandas/core/internals/managers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1368,6 +1368,8 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
13681368
new_columns = unstacker.get_new_columns(self.items)
13691369
new_index = unstacker.new_index
13701370

1371+
allow_fill = not unstacker.mask.all()
1372+
13711373
new_blocks: list[Block] = []
13721374
columns_mask: list[np.ndarray] = []
13731375

@@ -1377,7 +1379,10 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
13771379
new_placement = new_columns.get_indexer(new_items)
13781380

13791381
blocks, mask = blk._unstack(
1380-
unstacker, fill_value, new_placement=new_placement
1382+
unstacker,
1383+
fill_value,
1384+
new_placement=new_placement,
1385+
allow_fill=allow_fill,
13811386
)
13821387

13831388
new_blocks.extend(blocks)

pandas/core/series.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,7 +1214,7 @@ def _check_is_chained_assignment_possible(self) -> bool:
12141214
if self._is_view and self._is_cached:
12151215
ref = self._get_cacher()
12161216
if ref is not None and ref._is_mixed_type:
1217-
self._check_setitem_copy(stacklevel=4, t="referent", force=True)
1217+
self._check_setitem_copy(t="referent", force=True)
12181218
return True
12191219
return super()._check_is_chained_assignment_possible()
12201220

@@ -1233,14 +1233,15 @@ def _maybe_update_cacher(
12331233
# a copy
12341234
if ref is None:
12351235
del self._cacher
1236+
elif len(self) == len(ref) and self.name in ref.columns:
1237+
# GH#42530 self.name must be in ref.columns
1238+
# to ensure column still in dataframe
1239+
# otherwise, either self or ref has swapped in new arrays
1240+
ref._maybe_cache_changed(cacher[0], self)
12361241
else:
1237-
if len(self) == len(ref):
1238-
# otherwise, either self or ref has swapped in new arrays
1239-
ref._maybe_cache_changed(cacher[0], self)
1240-
else:
1241-
# GH#33675 we have swapped in a new array, so parent
1242-
# reference to self is now invalid
1243-
ref._item_cache.pop(cacher[0], None)
1242+
# GH#33675 we have swapped in a new array, so parent
1243+
# reference to self is now invalid
1244+
ref._item_cache.pop(cacher[0], None)
12441245

12451246
super()._maybe_update_cacher(clear=clear, verify_is_copy=verify_is_copy)
12461247

pandas/tests/series/indexing/test_setitem.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from pandas import (
1010
Categorical,
11+
DataFrame,
1112
DatetimeIndex,
1213
Index,
1314
IntervalIndex,
@@ -945,3 +946,17 @@ def test_setitem_int_as_positional_fallback_deprecation():
945946
with tm.assert_produces_warning(FutureWarning, match=msg):
946947
ser3[4] = 99
947948
tm.assert_series_equal(ser3, expected3)
949+
950+
951+
def test_setitem_with_bool_indexer():
952+
# GH#42530
953+
954+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
955+
result = df.pop("b")
956+
result[[True, False, False]] = 9
957+
expected = Series(data=[9, 5, 6], name="b")
958+
tm.assert_series_equal(result, expected)
959+
960+
df.loc[[True, False, False], "a"] = 10
961+
expected = DataFrame({"a": [10, 2, 3]})
962+
tm.assert_frame_equal(df, expected)

requirements-dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ s3fs>=0.4.0
7272
fsspec>=0.7.4, <2021.6.0
7373
gcsfs>=0.6.0
7474
sqlalchemy
75-
xarray
75+
xarray<0.19
7676
cftime
7777
pyreadstat
7878
tabulate>=0.8.3

scripts/generate_pip_deps_from_conda.py

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@
1313
$ python scripts/generate_pip_deps_from_conda.py --compare
1414
"""
1515
import argparse
16-
import os
16+
import pathlib
1717
import re
1818
import sys
1919

2020
import yaml
2121

2222
EXCLUDE = {"python", "c-compiler", "cxx-compiler"}
23-
RENAME = {"pytables": "tables", "pyqt": "pyqt5", "dask-core": "dask"}
23+
RENAME = {"pytables": "tables", "dask-core": "dask"}
2424

2525

26-
def conda_package_to_pip(package):
26+
def conda_package_to_pip(package: str):
2727
"""
2828
Convert a conda package to its pip equivalent.
2929
@@ -36,17 +36,13 @@ def conda_package_to_pip(package):
3636
package = re.sub("(?<=[^<>])=", "==", package).strip()
3737

3838
for compare in ("<=", ">=", "=="):
39-
if compare not in package:
40-
continue
39+
if compare in package:
40+
pkg, version = package.split(compare)
41+
if pkg in EXCLUDE:
42+
return
4143

42-
pkg, version = package.split(compare)
43-
if pkg in EXCLUDE:
44-
return
45-
46-
if pkg in RENAME:
47-
return "".join((RENAME[pkg], compare, version))
48-
49-
break
44+
if pkg in RENAME:
45+
return "".join((RENAME[pkg], compare, version))
5046

5147
if package in EXCLUDE:
5248
return
@@ -57,16 +53,18 @@ def conda_package_to_pip(package):
5753
return package
5854

5955

60-
def main(conda_fname, pip_fname, compare=False):
56+
def generate_pip_from_conda(
57+
conda_path: pathlib.Path, pip_path: pathlib.Path, compare: bool = False
58+
) -> bool:
6159
"""
6260
Generate the pip dependencies file from the conda file, or compare that
6361
they are synchronized (``compare=True``).
6462
6563
Parameters
6664
----------
67-
conda_fname : str
65+
conda_path : pathlib.Path
6866
Path to the conda file with dependencies (e.g. `environment.yml`).
69-
pip_fname : str
67+
pip_path : pathlib.Path
7068
Path to the pip file with dependencies (e.g. `requirements-dev.txt`).
7169
compare : bool, default False
7270
Whether to generate the pip file (``False``) or to compare if the
@@ -78,8 +76,8 @@ def main(conda_fname, pip_fname, compare=False):
7876
bool
7977
True if the comparison fails, False otherwise
8078
"""
81-
with open(conda_fname) as conda_fd:
82-
deps = yaml.safe_load(conda_fd)["dependencies"]
79+
with conda_path.open() as file:
80+
deps = yaml.safe_load(file)["dependencies"]
8381

8482
pip_deps = []
8583
for dep in deps:
@@ -88,24 +86,23 @@ def main(conda_fname, pip_fname, compare=False):
8886
if conda_dep:
8987
pip_deps.append(conda_dep)
9088
elif isinstance(dep, dict) and len(dep) == 1 and "pip" in dep:
91-
pip_deps += dep["pip"]
89+
pip_deps.extend(dep["pip"])
9290
else:
9391
raise ValueError(f"Unexpected dependency {dep}")
9492

95-
fname = os.path.split(conda_fname)[1]
9693
header = (
97-
f"# This file is auto-generated from {fname}, do not modify.\n"
94+
f"# This file is auto-generated from {conda_path.name}, do not modify.\n"
9895
"# See that file for comments about the need/usage of each dependency.\n\n"
9996
)
10097
pip_content = header + "\n".join(pip_deps) + "\n"
10198

10299
if compare:
103-
with open(pip_fname) as pip_fd:
104-
return pip_content != pip_fd.read()
105-
else:
106-
with open(pip_fname, "w") as pip_fd:
107-
pip_fd.write(pip_content)
108-
return False
100+
with pip_path.open() as file:
101+
return pip_content != file.read()
102+
103+
with pip_path.open("w") as file:
104+
file.write(pip_content)
105+
return False
109106

110107

111108
if __name__ == "__main__":
@@ -117,25 +114,20 @@ def main(conda_fname, pip_fname, compare=False):
117114
action="store_true",
118115
help="compare whether the two files are equivalent",
119116
)
120-
argparser.add_argument(
121-
"--azure", action="store_true", help="show the output in azure-pipelines format"
122-
)
123117
args = argparser.parse_args()
124118

125-
repo_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
126-
res = main(
127-
os.path.join(repo_path, "environment.yml"),
128-
os.path.join(repo_path, "requirements-dev.txt"),
119+
conda_fname = "environment.yml"
120+
pip_fname = "requirements-dev.txt"
121+
repo_path = pathlib.Path(__file__).parent.parent.absolute()
122+
res = generate_pip_from_conda(
123+
pathlib.Path(repo_path, conda_fname),
124+
pathlib.Path(repo_path, pip_fname),
129125
compare=args.compare,
130126
)
131127
if res:
132128
msg = (
133-
f"`requirements-dev.txt` has to be generated with `{sys.argv[0]}` after "
134-
"`environment.yml` is modified.\n"
129+
f"`{pip_fname}` has to be generated with `{__file__}` after "
130+
f"`{conda_fname}` is modified.\n"
135131
)
136-
if args.azure:
137-
msg = (
138-
f"##vso[task.logissue type=error;sourcepath=requirements-dev.txt]{msg}"
139-
)
140132
sys.stderr.write(msg)
141133
sys.exit(res)

0 commit comments

Comments
 (0)