Skip to content

Sync Fork from Upstream Repo #84

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions asv_bench/benchmarks/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,36 @@ def time_frame_op_with_scalar(self, dtype, scalar, op):
op(self.df, scalar)


class MixedFrameWithSeriesAxis0:
params = [
[
"eq",
"ne",
"lt",
"le",
"ge",
"gt",
"add",
"sub",
"div",
"floordiv",
"mul",
"pow",
]
]
param_names = ["opname"]

def setup(self, opname):
arr = np.arange(10 ** 6).reshape(100, -1)
df = DataFrame(arr)
df["C"] = 1.0
self.df = df
self.ser = df[0]

def time_frame_op_with_series_axis0(self, opname):
getattr(self.df, opname)(self.ser, axis=0)


class Ops:

params = [[True, False], ["default", 1]]
Expand Down
6 changes: 3 additions & 3 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ Other enhancements
- When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
- `OptionError` is now exposed in `pandas.errors` (:issue:`27553`)
- :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`)
- Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`)
-

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -172,7 +173,7 @@ Deprecations
~~~~~~~~~~~~
- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
-
- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
-

.. ---------------------------------------------------------------------------
Expand All @@ -185,7 +186,7 @@ Performance improvements

- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`)
- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`)
-
- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
-

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -248,7 +249,6 @@ Strings

Interval
^^^^^^^^

-
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/ops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def scalar_compare(object[:] values, object val, object op):

@cython.wraparound(False)
@cython.boundscheck(False)
def vec_compare(object[:] left, object[:] right, object op):
def vec_compare(ndarray[object] left, ndarray[object] right, object op):
"""
Compare the elements of `left` with the elements of `right` pointwise,
with the comparison operation described by `op`.
Expand Down
58 changes: 24 additions & 34 deletions pandas/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
is_interval_dtype,
is_list_like,
is_number,
is_numeric_dtype,
is_period_dtype,
is_sequence,
is_timedelta64_dtype,
Expand Down Expand Up @@ -1064,7 +1065,6 @@ def assert_series_equal(
right,
check_dtype=True,
check_index_type="equiv",
check_series_type=True,
check_less_precise=False,
check_names=True,
check_exact=False,
Expand All @@ -1085,8 +1085,6 @@ def assert_series_equal(
check_index_type : bool or {'equiv'}, default 'equiv'
Whether to check the Index class, dtype and inferred_type
are identical.
check_series_type : bool, default True
Whether to check the Series class is identical.
check_less_precise : bool or int, default False
Specify comparison precision. Only used when check_exact is False.
5 digits (False) or 3 digits (True) after decimal points are compared.
Expand Down Expand Up @@ -1118,11 +1116,10 @@ def assert_series_equal(
# instance validation
_check_isinstance(left, right, Series)

if check_series_type:
# ToDo: There are some tests using rhs is sparse
# lhs is dense. Should use assert_class_equal in future
assert isinstance(left, type(right))
# assert_class_equal(left, right, obj=obj)
# TODO: There are some tests using rhs is sparse
# lhs is dense. Should use assert_class_equal in future
assert isinstance(left, type(right))
# assert_class_equal(left, right, obj=obj)

# length comparison
if len(left) != len(right):
Expand All @@ -1147,47 +1144,40 @@ def assert_series_equal(
# is False. We'll still raise if only one is a `Categorical`,
# regardless of `check_categorical`
if (
is_categorical_dtype(left)
and is_categorical_dtype(right)
is_categorical_dtype(left.dtype)
and is_categorical_dtype(right.dtype)
and not check_categorical
):
pass
else:
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")

if check_exact:
if not is_numeric_dtype(left.dtype):
raise AssertionError("check_exact may only be used with numeric Series")

assert_numpy_array_equal(
left._internal_get_values(),
right._internal_get_values(),
check_dtype=check_dtype,
obj=str(obj),
left._values, right._values, check_dtype=check_dtype, obj=str(obj)
)
elif check_datetimelike_compat:
elif check_datetimelike_compat and (
needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
):
# we want to check only if we have compat dtypes
# e.g. integer and M|m are NOT compat, but we can simply check
# the values in that case
if needs_i8_conversion(left) or needs_i8_conversion(right):

# datetimelike may have different objects (e.g. datetime.datetime
# vs Timestamp) but will compare equal
if not Index(left._values).equals(Index(right._values)):
msg = (
f"[datetimelike_compat=True] {left._values} "
f"is not equal to {right._values}."
)
raise AssertionError(msg)
else:
assert_numpy_array_equal(
left._internal_get_values(),
right._internal_get_values(),
check_dtype=check_dtype,

# datetimelike may have different objects (e.g. datetime.datetime
# vs Timestamp) but will compare equal
if not Index(left._values).equals(Index(right._values)):
msg = (
f"[datetimelike_compat=True] {left._values} "
f"is not equal to {right._values}."
)
elif is_interval_dtype(left) or is_interval_dtype(right):
raise AssertionError(msg)
elif is_interval_dtype(left.dtype) or is_interval_dtype(right.dtype):
assert_interval_array_equal(left.array, right.array)
elif is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype):
elif is_datetime64tz_dtype(left.dtype):
# .values is an ndarray, but ._values is the ExtensionArray.
# TODO: Use .array
assert is_extension_array_dtype(right.dtype)
assert_extension_array_equal(left._values, right._values)
elif (
is_extension_array_dtype(left)
Expand Down
14 changes: 0 additions & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5212,20 +5212,6 @@ def _arith_op(left, right):

return new_data

def _combine_match_index(self, other: Series, func):
# at this point we have `self.index.equals(other.index)`

if ops.should_series_dispatch(self, other, func):
# operate column-wise; avoid costly object-casting in `.values`
new_data = ops.dispatch_to_series(self, other, func)
else:
# fastpath --> operate directly on values
other_vals = other.values.reshape(-1, 1)
with np.errstate(all="ignore"):
new_data = func(self.values, other_vals)
new_data = dispatch_fill_zeros(func, self.values, other_vals, new_data)
return new_data

def _construct_result(self, result) -> "DataFrame":
"""
Wrap the result of an arithmetic, comparison, or logical operation.
Expand Down
29 changes: 29 additions & 0 deletions pandas/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
is_array_like,
is_bool_dtype,
is_extension_array_dtype,
is_integer,
is_integer_dtype,
is_list_like,
)
Expand All @@ -20,6 +21,34 @@
# Indexer Identification


def is_valid_positional_slice(slc: slice) -> bool:
"""
Check if a slice object can be interpreted as a positional indexer.

Parameters
----------
slc : slice

Returns
-------
bool

Notes
-----
A valid positional slice may also be interpreted as a label-based slice
depending on the index being sliced.
"""

def is_int_or_none(val):
return val is None or is_integer(val)

return (
is_int_or_none(slc.start)
and is_int_or_none(slc.stop)
and is_int_or_none(slc.step)
)


def is_list_like_indexer(key) -> bool:
"""
Check if we have a list-like indexer that is *not* a NamedTuple.
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3137,8 +3137,18 @@ def is_int(v):
pass

if com.is_null_slice(key):
# It doesn't matter if we are positional or label based
indexer = key
elif is_positional:
if kind == "loc":
# GH#16121, GH#24612, GH#31810
warnings.warn(
"Slicing a positional slice with .loc is not supported, "
"and will raise TypeError in a future version. "
"Use .loc with labels or .iloc with positions instead.",
FutureWarning,
stacklevel=6,
)
indexer = key
else:
indexer = self.slice_indexer(start, stop, step, kind=kind)
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from pandas.core.algorithms import take_1d
from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs
import pandas.core.common as com
from pandas.core.indexers import is_valid_positional_slice
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import (
Index,
Expand Down Expand Up @@ -866,7 +867,16 @@ def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray:

def _convert_slice_indexer(self, key: slice, kind: str):
if not (key.step is None or key.step == 1):
raise ValueError("cannot support not-default step in a slice")
# GH#31658 if label-based, we require step == 1,
# if positional, we disallow float start/stop
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
if kind == "loc":
raise ValueError(msg)
elif kind == "getitem":
if not is_valid_positional_slice(key):
# i.e. this cannot be interpreted as a positional slice
raise ValueError(msg)

return super()._convert_slice_indexer(key, kind)

@Appender(Index.where.__doc__)
Expand Down
Loading