Skip to content

Commit c5978f2

Browse files
authored
Merge pull request #84 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 93fc052 + edb863e commit c5978f2

32 files changed

+448
-171
lines changed

asv_bench/benchmarks/arithmetic.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,36 @@ def time_frame_op_with_scalar(self, dtype, scalar, op):
5050
op(self.df, scalar)
5151

5252

53+
class MixedFrameWithSeriesAxis0:
54+
params = [
55+
[
56+
"eq",
57+
"ne",
58+
"lt",
59+
"le",
60+
"ge",
61+
"gt",
62+
"add",
63+
"sub",
64+
"div",
65+
"floordiv",
66+
"mul",
67+
"pow",
68+
]
69+
]
70+
param_names = ["opname"]
71+
72+
def setup(self, opname):
73+
arr = np.arange(10 ** 6).reshape(100, -1)
74+
df = DataFrame(arr)
75+
df["C"] = 1.0
76+
self.df = df
77+
self.ser = df[0]
78+
79+
def time_frame_op_with_series_axis0(self, opname):
80+
getattr(self.df, opname)(self.ser, axis=0)
81+
82+
5383
class Ops:
5484

5585
params = [[True, False], ["default", 1]]

doc/source/whatsnew/v1.1.0.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ Other enhancements
6767
- When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
6868
- `OptionError` is now exposed in `pandas.errors` (:issue:`27553`)
6969
- :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`)
70+
- Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`)
7071
-
7172

7273
.. ---------------------------------------------------------------------------
@@ -172,7 +173,7 @@ Deprecations
172173
~~~~~~~~~~~~
173174
- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
174175
- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
175-
-
176+
- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
176177
-
177178

178179
.. ---------------------------------------------------------------------------
@@ -185,7 +186,7 @@ Performance improvements
185186

186187
- Performance improvement in :class:`Timedelta` constructor (:issue:`30543`)
187188
- Performance improvement in :class:`Timestamp` constructor (:issue:`30543`)
188-
-
189+
- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
189190
-
190191

191192
.. ---------------------------------------------------------------------------
@@ -248,7 +249,6 @@ Strings
248249

249250
Interval
250251
^^^^^^^^
251-
252252
-
253253
-
254254

pandas/_libs/ops.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def scalar_compare(object[:] values, object val, object op):
100100

101101
@cython.wraparound(False)
102102
@cython.boundscheck(False)
103-
def vec_compare(object[:] left, object[:] right, object op):
103+
def vec_compare(ndarray[object] left, ndarray[object] right, object op):
104104
"""
105105
Compare the elements of `left` with the elements of `right` pointwise,
106106
with the comparison operation described by `op`.

pandas/_testing.py

Lines changed: 24 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
is_interval_dtype,
3535
is_list_like,
3636
is_number,
37+
is_numeric_dtype,
3738
is_period_dtype,
3839
is_sequence,
3940
is_timedelta64_dtype,
@@ -1064,7 +1065,6 @@ def assert_series_equal(
10641065
right,
10651066
check_dtype=True,
10661067
check_index_type="equiv",
1067-
check_series_type=True,
10681068
check_less_precise=False,
10691069
check_names=True,
10701070
check_exact=False,
@@ -1085,8 +1085,6 @@ def assert_series_equal(
10851085
check_index_type : bool or {'equiv'}, default 'equiv'
10861086
Whether to check the Index class, dtype and inferred_type
10871087
are identical.
1088-
check_series_type : bool, default True
1089-
Whether to check the Series class is identical.
10901088
check_less_precise : bool or int, default False
10911089
Specify comparison precision. Only used when check_exact is False.
10921090
5 digits (False) or 3 digits (True) after decimal points are compared.
@@ -1118,11 +1116,10 @@ def assert_series_equal(
11181116
# instance validation
11191117
_check_isinstance(left, right, Series)
11201118

1121-
if check_series_type:
1122-
# ToDo: There are some tests using rhs is sparse
1123-
# lhs is dense. Should use assert_class_equal in future
1124-
assert isinstance(left, type(right))
1125-
# assert_class_equal(left, right, obj=obj)
1119+
# TODO: There are some tests using rhs is sparse
1120+
# lhs is dense. Should use assert_class_equal in future
1121+
assert isinstance(left, type(right))
1122+
# assert_class_equal(left, right, obj=obj)
11261123

11271124
# length comparison
11281125
if len(left) != len(right):
@@ -1147,47 +1144,40 @@ def assert_series_equal(
11471144
# is False. We'll still raise if only one is a `Categorical`,
11481145
# regardless of `check_categorical`
11491146
if (
1150-
is_categorical_dtype(left)
1151-
and is_categorical_dtype(right)
1147+
is_categorical_dtype(left.dtype)
1148+
and is_categorical_dtype(right.dtype)
11521149
and not check_categorical
11531150
):
11541151
pass
11551152
else:
11561153
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
11571154

11581155
if check_exact:
1156+
if not is_numeric_dtype(left.dtype):
1157+
raise AssertionError("check_exact may only be used with numeric Series")
1158+
11591159
assert_numpy_array_equal(
1160-
left._internal_get_values(),
1161-
right._internal_get_values(),
1162-
check_dtype=check_dtype,
1163-
obj=str(obj),
1160+
left._values, right._values, check_dtype=check_dtype, obj=str(obj)
11641161
)
1165-
elif check_datetimelike_compat:
1162+
elif check_datetimelike_compat and (
1163+
needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)
1164+
):
11661165
# we want to check only if we have compat dtypes
11671166
# e.g. integer and M|m are NOT compat, but we can simply check
11681167
# the values in that case
1169-
if needs_i8_conversion(left) or needs_i8_conversion(right):
1170-
1171-
# datetimelike may have different objects (e.g. datetime.datetime
1172-
# vs Timestamp) but will compare equal
1173-
if not Index(left._values).equals(Index(right._values)):
1174-
msg = (
1175-
f"[datetimelike_compat=True] {left._values} "
1176-
f"is not equal to {right._values}."
1177-
)
1178-
raise AssertionError(msg)
1179-
else:
1180-
assert_numpy_array_equal(
1181-
left._internal_get_values(),
1182-
right._internal_get_values(),
1183-
check_dtype=check_dtype,
1168+
1169+
# datetimelike may have different objects (e.g. datetime.datetime
1170+
# vs Timestamp) but will compare equal
1171+
if not Index(left._values).equals(Index(right._values)):
1172+
msg = (
1173+
f"[datetimelike_compat=True] {left._values} "
1174+
f"is not equal to {right._values}."
11841175
)
1185-
elif is_interval_dtype(left) or is_interval_dtype(right):
1176+
raise AssertionError(msg)
1177+
elif is_interval_dtype(left.dtype) or is_interval_dtype(right.dtype):
11861178
assert_interval_array_equal(left.array, right.array)
1187-
elif is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype):
1179+
elif is_datetime64tz_dtype(left.dtype):
11881180
# .values is an ndarray, but ._values is the ExtensionArray.
1189-
# TODO: Use .array
1190-
assert is_extension_array_dtype(right.dtype)
11911181
assert_extension_array_equal(left._values, right._values)
11921182
elif (
11931183
is_extension_array_dtype(left)

pandas/core/frame.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5212,20 +5212,6 @@ def _arith_op(left, right):
52125212

52135213
return new_data
52145214

5215-
def _combine_match_index(self, other: Series, func):
5216-
# at this point we have `self.index.equals(other.index)`
5217-
5218-
if ops.should_series_dispatch(self, other, func):
5219-
# operate column-wise; avoid costly object-casting in `.values`
5220-
new_data = ops.dispatch_to_series(self, other, func)
5221-
else:
5222-
# fastpath --> operate directly on values
5223-
other_vals = other.values.reshape(-1, 1)
5224-
with np.errstate(all="ignore"):
5225-
new_data = func(self.values, other_vals)
5226-
new_data = dispatch_fill_zeros(func, self.values, other_vals, new_data)
5227-
return new_data
5228-
52295215
def _construct_result(self, result) -> "DataFrame":
52305216
"""
52315217
Wrap the result of an arithmetic, comparison, or logical operation.

pandas/core/indexers.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
is_array_like,
1212
is_bool_dtype,
1313
is_extension_array_dtype,
14+
is_integer,
1415
is_integer_dtype,
1516
is_list_like,
1617
)
@@ -20,6 +21,34 @@
2021
# Indexer Identification
2122

2223

24+
def is_valid_positional_slice(slc: slice) -> bool:
25+
"""
26+
Check if a slice object can be interpreted as a positional indexer.
27+
28+
Parameters
29+
----------
30+
slc : slice
31+
32+
Returns
33+
-------
34+
bool
35+
36+
Notes
37+
-----
38+
A valid positional slice may also be interpreted as a label-based slice
39+
depending on the index being sliced.
40+
"""
41+
42+
def is_int_or_none(val):
43+
return val is None or is_integer(val)
44+
45+
return (
46+
is_int_or_none(slc.start)
47+
and is_int_or_none(slc.stop)
48+
and is_int_or_none(slc.step)
49+
)
50+
51+
2352
def is_list_like_indexer(key) -> bool:
2453
"""
2554
Check if we have a list-like indexer that is *not* a NamedTuple.

pandas/core/indexes/base.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3137,8 +3137,18 @@ def is_int(v):
31373137
pass
31383138

31393139
if com.is_null_slice(key):
3140+
# It doesn't matter if we are positional or label based
31403141
indexer = key
31413142
elif is_positional:
3143+
if kind == "loc":
3144+
# GH#16121, GH#24612, GH#31810
3145+
warnings.warn(
3146+
"Slicing a positional slice with .loc is not supported, "
3147+
"and will raise TypeError in a future version. "
3148+
"Use .loc with labels or .iloc with positions instead.",
3149+
FutureWarning,
3150+
stacklevel=6,
3151+
)
31423152
indexer = key
31433153
else:
31443154
indexer = self.slice_indexer(start, stop, step, kind=kind)

pandas/core/indexes/interval.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from pandas.core.algorithms import take_1d
4040
from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs
4141
import pandas.core.common as com
42+
from pandas.core.indexers import is_valid_positional_slice
4243
import pandas.core.indexes.base as ibase
4344
from pandas.core.indexes.base import (
4445
Index,
@@ -866,7 +867,16 @@ def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray:
866867

867868
def _convert_slice_indexer(self, key: slice, kind: str):
868869
if not (key.step is None or key.step == 1):
869-
raise ValueError("cannot support not-default step in a slice")
870+
# GH#31658 if label-based, we require step == 1,
871+
# if positional, we disallow float start/stop
872+
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
873+
if kind == "loc":
874+
raise ValueError(msg)
875+
elif kind == "getitem":
876+
if not is_valid_positional_slice(key):
877+
# i.e. this cannot be interpreted as a positional slice
878+
raise ValueError(msg)
879+
870880
return super()._convert_slice_indexer(key, kind)
871881

872882
@Appender(Index.where.__doc__)

0 commit comments

Comments
 (0)