Skip to content

Sync Fork from Upstream Repo #240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def setup(self, dtype):
values = np.take(list(string.ascii_letters), indices)
values = [pd.Categorical(v) for v in values.T]

self.df = DataFrame(values, index, columns)
self.df = DataFrame({i: cat for i, cat in enumerate(values)}, index, columns)
self.df2 = self.df.iloc[:-1]

def time_full_product(self, dtype):
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ including other versions of pandas.

Fixed regressions
~~~~~~~~~~~~~~~~~
- Performance regression in :meth:`DataFrame.isin` and :meth:`Series.isin` for nullable data types (:issue:`42714`)
-
-

Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ Indexing
- Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`)
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
-
- Bug in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)

Missing
^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ dependencies:
- fsspec>=0.7.4, <2021.6.0 # for generic remote file operations
- gcsfs>=0.6.0 # file IO when using 'gcs://...' path
- sqlalchemy # pandas.read_sql, DataFrame.to_sql
- xarray # DataFrame.to_xarray
- xarray<0.19 # DataFrame.to_xarray
- cftime # Needed for downstream xarray.CFTimeIndex test
- pyreadstat # pandas.read_spss
- tabulate>=0.8.3 # DataFrame.to_markdown
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ def isin(self, values) -> BooleanArray: # type: ignore[override]
# see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
result[self._mask] = values_have_NA

mask = np.zeros_like(self, dtype=bool)
mask = np.zeros(self._data.shape, dtype=bool)
return BooleanArray(result, mask, copy=False)

def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3745,7 +3745,7 @@ def _set_item_mgr(self, key, value: ArrayLike) -> None:
# try to set first as we want an invalid
# value exception to occur first
if len(self):
self._check_setitem_copy(stacklevel=5)
self._check_setitem_copy()

def _iset_item(self, loc: int, value) -> None:
arraylike = self._sanitize_column(value)
Expand Down
14 changes: 5 additions & 9 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
doc,
rewrite_axis_style_signature,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import (
validate_ascending,
validate_bool_kwarg,
Expand Down Expand Up @@ -3506,7 +3507,7 @@ def _maybe_update_cacher(
"""

if verify_is_copy:
self._check_setitem_copy(stacklevel=5, t="referent")
self._check_setitem_copy(t="referent")

if clear:
self._clear_item_cache()
Expand Down Expand Up @@ -3853,26 +3854,21 @@ def _check_is_chained_assignment_possible(self) -> bool_t:
setting.
"""
if self._is_copy:
self._check_setitem_copy(stacklevel=4, t="referent")
self._check_setitem_copy(t="referent")
return False

@final
def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
def _check_setitem_copy(self, t="setting", force=False):
"""

Parameters
----------
stacklevel : int, default 4
the level to show of the stack when the error is output
t : str, the type of setting error
force : bool, default False
If True, then force showing an error.

validate if we are doing a setitem on a chained copy.

If you call this function, be sure to set the stacklevel such that the
user will see the error *at the level of setting*

It is technically possible to figure out that we are setting on
a copy even WITH a multi-dtyped pandas object. In other words, some
blocks may be views while other are not. Currently _is_view will ALWAYS
Expand Down Expand Up @@ -3931,7 +3927,7 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
if value == "raise":
raise com.SettingWithCopyError(t)
elif value == "warn":
warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel)
warnings.warn(t, com.SettingWithCopyWarning, stacklevel=find_stack_level())

def __delitem__(self, key) -> None:
"""
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,7 +1255,7 @@ def where(self, other, cond, errors="raise") -> list[Block]:

return result_blocks

def _unstack(self, unstacker, fill_value, new_placement):
def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
"""
Return a list of unstacked blocks of self

Expand All @@ -1264,6 +1264,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
unstacker : reshape._Unstacker
fill_value : int
Only used in ExtensionBlock._unstack
allow_fill : bool

Returns
-------
Expand Down Expand Up @@ -1638,7 +1639,7 @@ def where(self, other, cond, errors="raise") -> list[Block]:

return [self.make_block_same_class(result)]

def _unstack(self, unstacker, fill_value, new_placement):
def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
# ExtensionArray-safe unstack.
# We override ObjectBlock._unstack, which unstacks directly on the
# values of the array. For EA-backed blocks, this would require
Expand All @@ -1655,7 +1656,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
blocks = [
# TODO: could cast to object depending on fill_value?
self.make_block_same_class(
self.values.take(indices, allow_fill=True, fill_value=fill_value),
self.values.take(indices, allow_fill=allow_fill, fill_value=fill_value),
BlockPlacement(place),
)
for indices, place in zip(new_values.T, new_placement)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1368,6 +1368,8 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
new_columns = unstacker.get_new_columns(self.items)
new_index = unstacker.new_index

allow_fill = not unstacker.mask.all()

new_blocks: list[Block] = []
columns_mask: list[np.ndarray] = []

Expand All @@ -1377,7 +1379,10 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
new_placement = new_columns.get_indexer(new_items)

blocks, mask = blk._unstack(
unstacker, fill_value, new_placement=new_placement
unstacker,
fill_value,
new_placement=new_placement,
allow_fill=allow_fill,
)

new_blocks.extend(blocks)
Expand Down
17 changes: 9 additions & 8 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1214,7 +1214,7 @@ def _check_is_chained_assignment_possible(self) -> bool:
if self._is_view and self._is_cached:
ref = self._get_cacher()
if ref is not None and ref._is_mixed_type:
self._check_setitem_copy(stacklevel=4, t="referent", force=True)
self._check_setitem_copy(t="referent", force=True)
return True
return super()._check_is_chained_assignment_possible()

Expand All @@ -1233,14 +1233,15 @@ def _maybe_update_cacher(
# a copy
if ref is None:
del self._cacher
elif len(self) == len(ref) and self.name in ref.columns:
# GH#42530 self.name must be in ref.columns
# to ensure column still in dataframe
# otherwise, either self or ref has swapped in new arrays
ref._maybe_cache_changed(cacher[0], self)
else:
if len(self) == len(ref):
# otherwise, either self or ref has swapped in new arrays
ref._maybe_cache_changed(cacher[0], self)
else:
# GH#33675 we have swapped in a new array, so parent
# reference to self is now invalid
ref._item_cache.pop(cacher[0], None)
# GH#33675 we have swapped in a new array, so parent
# reference to self is now invalid
ref._item_cache.pop(cacher[0], None)

super()._maybe_update_cacher(clear=clear, verify_is_copy=verify_is_copy)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pandas import (
Categorical,
DataFrame,
DatetimeIndex,
Index,
IntervalIndex,
Expand Down Expand Up @@ -945,3 +946,17 @@ def test_setitem_int_as_positional_fallback_deprecation():
with tm.assert_produces_warning(FutureWarning, match=msg):
ser3[4] = 99
tm.assert_series_equal(ser3, expected3)


def test_setitem_with_bool_indexer():
# GH#42530

df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
result = df.pop("b")
result[[True, False, False]] = 9
expected = Series(data=[9, 5, 6], name="b")
tm.assert_series_equal(result, expected)

df.loc[[True, False, False], "a"] = 10
expected = DataFrame({"a": [10, 2, 3]})
tm.assert_frame_equal(df, expected)
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ s3fs>=0.4.0
fsspec>=0.7.4, <2021.6.0
gcsfs>=0.6.0
sqlalchemy
xarray
xarray<0.19
cftime
pyreadstat
tabulate>=0.8.3
Expand Down
72 changes: 32 additions & 40 deletions scripts/generate_pip_deps_from_conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@
$ python scripts/generate_pip_deps_from_conda.py --compare
"""
import argparse
import os
import pathlib
import re
import sys

import yaml

EXCLUDE = {"python", "c-compiler", "cxx-compiler"}
RENAME = {"pytables": "tables", "pyqt": "pyqt5", "dask-core": "dask"}
RENAME = {"pytables": "tables", "dask-core": "dask"}


def conda_package_to_pip(package):
def conda_package_to_pip(package: str):
"""
Convert a conda package to its pip equivalent.

Expand All @@ -36,17 +36,13 @@ def conda_package_to_pip(package):
package = re.sub("(?<=[^<>])=", "==", package).strip()

for compare in ("<=", ">=", "=="):
if compare not in package:
continue
if compare in package:
pkg, version = package.split(compare)
if pkg in EXCLUDE:
return

pkg, version = package.split(compare)
if pkg in EXCLUDE:
return

if pkg in RENAME:
return "".join((RENAME[pkg], compare, version))

break
if pkg in RENAME:
return "".join((RENAME[pkg], compare, version))

if package in EXCLUDE:
return
Expand All @@ -57,16 +53,18 @@ def conda_package_to_pip(package):
return package


def main(conda_fname, pip_fname, compare=False):
def generate_pip_from_conda(
conda_path: pathlib.Path, pip_path: pathlib.Path, compare: bool = False
) -> bool:
"""
Generate the pip dependencies file from the conda file, or compare that
they are synchronized (``compare=True``).

Parameters
----------
conda_fname : str
conda_path : pathlib.Path
Path to the conda file with dependencies (e.g. `environment.yml`).
pip_fname : str
pip_path : pathlib.Path
Path to the pip file with dependencies (e.g. `requirements-dev.txt`).
compare : bool, default False
Whether to generate the pip file (``False``) or to compare if the
Expand All @@ -78,8 +76,8 @@ def main(conda_fname, pip_fname, compare=False):
bool
True if the comparison fails, False otherwise
"""
with open(conda_fname) as conda_fd:
deps = yaml.safe_load(conda_fd)["dependencies"]
with conda_path.open() as file:
deps = yaml.safe_load(file)["dependencies"]

pip_deps = []
for dep in deps:
Expand All @@ -88,24 +86,23 @@ def main(conda_fname, pip_fname, compare=False):
if conda_dep:
pip_deps.append(conda_dep)
elif isinstance(dep, dict) and len(dep) == 1 and "pip" in dep:
pip_deps += dep["pip"]
pip_deps.extend(dep["pip"])
else:
raise ValueError(f"Unexpected dependency {dep}")

fname = os.path.split(conda_fname)[1]
header = (
f"# This file is auto-generated from {fname}, do not modify.\n"
f"# This file is auto-generated from {conda_path.name}, do not modify.\n"
"# See that file for comments about the need/usage of each dependency.\n\n"
)
pip_content = header + "\n".join(pip_deps) + "\n"

if compare:
with open(pip_fname) as pip_fd:
return pip_content != pip_fd.read()
else:
with open(pip_fname, "w") as pip_fd:
pip_fd.write(pip_content)
return False
with pip_path.open() as file:
return pip_content != file.read()

with pip_path.open("w") as file:
file.write(pip_content)
return False


if __name__ == "__main__":
Expand All @@ -117,25 +114,20 @@ def main(conda_fname, pip_fname, compare=False):
action="store_true",
help="compare whether the two files are equivalent",
)
argparser.add_argument(
"--azure", action="store_true", help="show the output in azure-pipelines format"
)
args = argparser.parse_args()

repo_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
res = main(
os.path.join(repo_path, "environment.yml"),
os.path.join(repo_path, "requirements-dev.txt"),
conda_fname = "environment.yml"
pip_fname = "requirements-dev.txt"
repo_path = pathlib.Path(__file__).parent.parent.absolute()
res = generate_pip_from_conda(
pathlib.Path(repo_path, conda_fname),
pathlib.Path(repo_path, pip_fname),
compare=args.compare,
)
if res:
msg = (
f"`requirements-dev.txt` has to be generated with `{sys.argv[0]}` after "
"`environment.yml` is modified.\n"
f"`{pip_fname}` has to be generated with `{__file__}` after "
f"`{conda_fname}` is modified.\n"
)
if args.azure:
msg = (
f"##vso[task.logissue type=error;sourcepath=requirements-dev.txt]{msg}"
)
sys.stderr.write(msg)
sys.exit(res)