Skip to content

Sync Fork from Upstream Repo #162

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,10 @@ repos:
types: [python]
exclude: ^pandas/_typing\.py$
- id: inconsistent-namespace-usage
name: 'Check for inconsistent use of pandas namespace in tests'
name: 'Check for inconsistent use of pandas namespace'
entry: python scripts/check_for_inconsistent_pandas_namespace.py
language: python
types: [python]
files: ^pandas/tests/
- id: incorrect-code-directives
name: Check for incorrect code block or IPython directives
language: pygrep
Expand Down Expand Up @@ -213,3 +212,10 @@ repos:
|\#\ type:\s?ignore(?!\[)
language: pygrep
types: [python]
- id: use-pd_array-in-core
name: Import pandas.array as pd_array in core
language: python
entry: python scripts/use_pd_array_in_core.py
files: ^pandas/core/
exclude: ^pandas/core/api\.py$
types: [python]
20 changes: 9 additions & 11 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ def setup(self, op, shape):
# construct dataframe with 2 blocks
arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4")
df = pd.concat(
[pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True
)
df = pd.concat([DataFrame(arr1), DataFrame(arr2)], axis=1, ignore_index=True)
# should already be the case, but just to be sure
df._consolidate_inplace()

Expand All @@ -151,7 +149,7 @@ def setup(self, op, shape):
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
df2 = pd.concat(
[pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
[DataFrame(arr1), DataFrame(arr2), DataFrame(arr3)],
axis=1,
ignore_index=True,
)
Expand Down Expand Up @@ -459,9 +457,9 @@ class OffsetArrayArithmetic:

def setup(self, offset):
N = 10000
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
rng = date_range(start="1/1/2000", periods=N, freq="T")
self.rng = rng
self.ser = pd.Series(rng)
self.ser = Series(rng)

def time_add_series_offset(self, offset):
with warnings.catch_warnings(record=True):
Expand All @@ -478,7 +476,7 @@ class ApplyIndex:

def setup(self, offset):
N = 10000
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
rng = date_range(start="1/1/2000", periods=N, freq="T")
self.rng = rng

def time_apply_index(self, offset):
Expand All @@ -490,17 +488,17 @@ class BinaryOpsMultiIndex:
param_names = ["func"]

def setup(self, func):
date_range = pd.date_range("20200101 00:00", "20200102 0:00", freq="S")
array = date_range("20200101 00:00", "20200102 0:00", freq="S")
level_0_names = [str(i) for i in range(30)]

index = pd.MultiIndex.from_product([level_0_names, date_range])
index = pd.MultiIndex.from_product([level_0_names, array])
column_names = ["col_1", "col_2"]

self.df = pd.DataFrame(
self.df = DataFrame(
np.random.rand(len(index), 2), index=index, columns=column_names
)

self.arg_df = pd.DataFrame(
self.arg_df = DataFrame(
np.random.randint(1, 10, (len(level_0_names), 2)),
index=level_0_names,
columns=column_names,
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def setup(self):
data = np.random.randn(N)[:-i]
idx = rng[:-i]
data[100:] = np.nan
self.series[i] = pd.Series(pd.SparseArray(data), index=idx)
self.series[i] = Series(SparseArray(data), index=idx)

def time_series_to_frame(self):
pd.DataFrame(self.series)
Expand Down Expand Up @@ -63,7 +63,7 @@ def setup(self):
)

def time_sparse_series_from_coo(self):
pd.Series.sparse.from_coo(self.matrix)
Series.sparse.from_coo(self.matrix)


class ToCoo:
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ Other enhancements
- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`)
- :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`)
- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`)
- :meth:`.Styler.clear` now clears :attr:`Styler.hidden_index` and :attr:`Styler.hidden_columns` as well (:issue:`40484`)
- Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`)
- :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
- :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.
Expand Down Expand Up @@ -571,6 +572,7 @@ Conversion
- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
- Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
- Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`)
- Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`)
- Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
-

Expand Down
14 changes: 7 additions & 7 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,12 @@ def box_expected(expected, box_cls, transpose=True):
"""
if box_cls is pd.array:
expected = pd.array(expected)
elif box_cls is pd.Index:
expected = pd.Index(expected)
elif box_cls is pd.Series:
expected = pd.Series(expected)
elif box_cls is pd.DataFrame:
expected = pd.Series(expected).to_frame()
elif box_cls is Index:
expected = Index(expected)
elif box_cls is Series:
expected = Series(expected)
elif box_cls is DataFrame:
expected = Series(expected).to_frame()
if transpose:
# for vector operations, we need a DataFrame to be a single-row,
# not a single-column, in order to operate against non-DataFrame
Expand Down Expand Up @@ -400,7 +400,7 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None)
"x": state.rand(n) * 2 - 1,
"y": state.rand(n) * 2 - 1,
}
df = pd.DataFrame(columns, index=index, columns=sorted(columns))
df = DataFrame(columns, index=index, columns=sorted(columns))
if df.index[-1] == end:
df = df.iloc[:-1]
return df
Expand Down
18 changes: 8 additions & 10 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def unique_nulls_fixture(request):
# ----------------------------------------------------------------


@pytest.fixture(params=[pd.DataFrame, pd.Series])
@pytest.fixture(params=[DataFrame, Series])
def frame_or_series(request):
"""
Fixture to parametrize over DataFrame and Series.
Expand All @@ -338,7 +338,7 @@ def frame_or_series(request):

# error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]"
@pytest.fixture(
params=[pd.Index, pd.Series], ids=["index", "series"] # type: ignore[list-item]
params=[Index, Series], ids=["index", "series"] # type: ignore[list-item]
)
def index_or_series(request):
"""
Expand All @@ -356,9 +356,7 @@ def index_or_series(request):
index_or_series2 = index_or_series


@pytest.fixture(
params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
)
@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"])
def index_or_series_or_array(request):
"""
Fixture to parametrize over Index, Series, and ExtensionArray
Expand Down Expand Up @@ -559,7 +557,7 @@ def index_with_missing(request):
# ----------------------------------------------------------------
@pytest.fixture
def empty_series():
return pd.Series([], index=[], dtype=np.float64)
return Series([], index=[], dtype=np.float64)


@pytest.fixture
Expand Down Expand Up @@ -596,7 +594,7 @@ def _create_series(index):
""" Helper for the _series dict """
size = len(index)
data = np.random.randn(size)
return pd.Series(data, index=index, name="a")
return Series(data, index=index, name="a")


_series = {
Expand Down Expand Up @@ -1437,16 +1435,16 @@ def any_numpy_dtype(request):
("boolean", [True, np.nan, False]),
("boolean", [True, pd.NA, False]),
("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]),
("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]),
("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
# The following two dtypes are commented out due to GH 23554
# ('complex', [1 + 1j, np.nan, 2 + 2j]),
# ('timedelta64', [np.timedelta64(1, 'D'),
# np.nan, np.timedelta64(2, 'D')]),
("timedelta", [timedelta(1), np.nan, timedelta(2)]),
("time", [time(1), np.nan, time(2)]),
("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]),
("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]),
("period", [Period(2013), pd.NaT, Period(2018)]),
("interval", [Interval(0, 1), np.nan, Interval(0, 2)]),
]
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ def astype(self, dtype, copy=True):
arr[mask] = "0"
values = arr.astype(dtype.numpy_dtype)
return FloatingArray(values, mask, copy=False)
elif isinstance(dtype, ExtensionDtype):
cls = dtype.construct_array_type()
return cls._from_sequence(self, dtype=dtype, copy=copy)
elif np.issubdtype(dtype, np.floating):
arr = self._ndarray.copy()
mask = self.isna()
Expand Down
19 changes: 14 additions & 5 deletions pandas/core/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,20 @@ def _evaluate_numexpr(op, op_str, a, b):
a_value = a
b_value = b

result = ne.evaluate(
f"a_value {op_str} b_value",
local_dict={"a_value": a_value, "b_value": b_value},
casting="safe",
)
try:
result = ne.evaluate(
f"a_value {op_str} b_value",
local_dict={"a_value": a_value, "b_value": b_value},
casting="safe",
)
except TypeError:
# numexpr raises eg for array ** array with integers
# (https://github.com/pydata/numexpr/issues/379)
pass

if is_reversed:
# reverse order to original for fallback
a, b = b, a

if _TEST_MODE:
_store_test_result(result is not None)
Expand Down
49 changes: 0 additions & 49 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,55 +498,6 @@ def maybe_cast_to_extension_array(
return result


def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray) -> np.ndarray:
"""
A safe version of putmask that potentially upcasts the result.

The result is replaced with the first N elements of other,
where N is the number of True values in mask.
If the length of other is shorter than N, other will be repeated.

Parameters
----------
result : ndarray
The destination array. This will be mutated in-place if no upcasting is
necessary.
mask : np.ndarray[bool]

Returns
-------
result : ndarray

Examples
--------
>>> arr = np.arange(1, 6)
>>> mask = np.array([False, True, False, True, True])
>>> result = maybe_upcast_putmask(arr, mask)
>>> result
array([ 1., nan, 3., nan, nan])
"""
if not isinstance(result, np.ndarray):
raise ValueError("The result input must be a ndarray.")

# NB: we never get here with result.dtype.kind in ["m", "M"]

if mask.any():

# we want to decide whether place will work
# if we have nans in the False portion of our mask then we need to
# upcast (possibly), otherwise we DON't want to upcast (e.g. if we
# have values, say integers, in the success portion then it's ok to not
# upcast)
new_dtype = ensure_dtype_can_hold_na(result.dtype)

if new_dtype != result.dtype:
result = result.astype(new_dtype, copy=True)

np.place(result, mask, np.nan)

return result


@overload
def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype:
...
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@
)
from pandas.core.internals.blocks import ( # io.pytables, io.packers
Block,
DatetimeBlock,
DatetimeTZBlock,
ExtensionBlock,
NumericBlock,
ObjectBlock,
TimeDeltaBlock,
)
from pandas.core.internals.concat import concatenate_managers
from pandas.core.internals.managers import (
Expand All @@ -28,11 +26,9 @@
"Block",
"CategoricalBlock",
"NumericBlock",
"DatetimeBlock",
"DatetimeTZBlock",
"ExtensionBlock",
"ObjectBlock",
"TimeDeltaBlock",
"make_block",
"DataManager",
"ArrayManager",
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T
# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
# attribute "tz"
if hasattr(arr, "tz") and arr.tz is None: # type: ignore[union-attr]
# DatetimeArray needs to be converted to ndarray for DatetimeBlock
# DatetimeArray needs to be converted to ndarray for DatetimeLikeBlock

# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
# attribute "_data"
Expand Down
Loading