Skip to content

Commit 45f0705

Browse files
authored
PERF: slow tests (#44727)
1 parent 4f3b32b commit 45f0705

File tree

5 files changed

+37
-12
lines changed

5 files changed

+37
-12
lines changed

pandas/core/arrays/datetimelike.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
Timestamp,
3636
delta_to_nanoseconds,
3737
iNaT,
38+
ints_to_pydatetime,
3839
to_offset,
3940
)
4041
from pandas._libs.tslibs.fields import (
@@ -406,6 +407,19 @@ def astype(self, dtype, copy: bool = True):
406407
dtype = pandas_dtype(dtype)
407408

408409
if is_object_dtype(dtype):
410+
if self.dtype.kind == "M":
411+
# *much* faster than self._box_values
412+
# for e.g. test_get_loc_tuple_monotonic_above_size_cutoff
413+
i8data = self.asi8.ravel()
414+
converted = ints_to_pydatetime(
415+
i8data,
416+
# error: "DatetimeLikeArrayMixin" has no attribute "tz"
417+
tz=self.tz, # type: ignore[attr-defined]
418+
freq=self.freq,
419+
box="timestamp",
420+
)
421+
return converted.reshape(self.shape)
422+
409423
return self._box_values(self.asi8.ravel()).reshape(self.shape)
410424

411425
elif isinstance(dtype, ExtensionDtype):

pandas/core/dtypes/common.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,15 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
528528
return CategoricalDtype.is_dtype(arr_or_dtype)
529529

530530

531+
def is_string_or_object_np_dtype(dtype: np.dtype) -> bool:
532+
"""
533+
Faster alternative to is_string_dtype, assumes we have a np.dtype object.
534+
"""
535+
# error: Non-overlapping equality check (left operand type: "dtype[Any]",
536+
# right operand type: "Type[object]")
537+
return dtype == object or dtype.kind in "SU" # type: ignore[comparison-overlap]
538+
539+
531540
def is_string_dtype(arr_or_dtype) -> bool:
532541
"""
533542
Check whether the provided array or dtype is of the string dtype.

pandas/core/dtypes/missing.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
is_integer_dtype,
3434
is_object_dtype,
3535
is_scalar,
36-
is_string_dtype,
36+
is_string_or_object_np_dtype,
3737
needs_i8_conversion,
3838
)
3939
from pandas.core.dtypes.dtypes import (
@@ -242,7 +242,7 @@ def _isna_array(values: ArrayLike, inf_as_na: bool = False):
242242
result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
243243
else:
244244
result = values.isna()
245-
elif is_string_dtype(dtype):
245+
elif is_string_or_object_np_dtype(values.dtype):
246246
result = _isna_string_dtype(values, inf_as_na=inf_as_na)
247247
elif needs_i8_conversion(dtype):
248248
# this is the NaT pattern
@@ -376,7 +376,10 @@ def isna_compat(arr, fill_value=np.nan) -> bool:
376376

377377

378378
def array_equivalent(
379-
left, right, strict_nan: bool = False, dtype_equal: bool = False
379+
left,
380+
right,
381+
strict_nan: bool = False,
382+
dtype_equal: bool = False,
380383
) -> bool:
381384
"""
382385
True if two arrays, left and right, have equal non-NaN elements, and NaNs
@@ -421,13 +424,13 @@ def array_equivalent(
421424

422425
if dtype_equal:
423426
# fastpath when we require that the dtypes match (Block.equals)
424-
if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype):
427+
if left.dtype.kind in ["f", "c"]:
425428
return _array_equivalent_float(left, right)
426429
elif is_datetimelike_v_numeric(left.dtype, right.dtype):
427430
return False
428431
elif needs_i8_conversion(left.dtype):
429432
return _array_equivalent_datetimelike(left, right)
430-
elif is_string_dtype(left.dtype):
433+
elif is_string_or_object_np_dtype(left.dtype):
431434
# TODO: fastpath for pandas' StringDtype
432435
return _array_equivalent_object(left, right, strict_nan)
433436
else:

pandas/tests/dtypes/test_common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def get_is_dtype_funcs():
154154
155155
"""
156156
fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))]
157+
fnames.remove("is_string_or_object_np_dtype") # fastpath requires np.dtype obj
157158
return [getattr(com, fname) for fname in fnames]
158159

159160

pandas/tests/indexing/test_chaining_and_caching.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@
2020

2121

2222
def random_text(nobs=100):
23-
df = []
24-
for i in range(nobs):
25-
idx = np.random.randint(len(letters), size=2)
26-
idx.sort()
23+
# Construct a DataFrame where each row is a random slice from 'letters'
24+
idxs = np.random.randint(len(letters), size=(nobs, 2))
25+
idxs.sort(axis=1)
26+
strings = [letters[x[0] : x[1]] for x in idxs]
2727

28-
df.append([letters[idx[0] : idx[1]]])
29-
30-
return DataFrame(df, columns=["letters"])
28+
return DataFrame(strings, columns=["letters"])
3129

3230

3331
class TestCaching:

0 commit comments

Comments
 (0)