Skip to content

Commit d88f8bf

Browse files
authored
PERF: avoid unnecessary check in concat (#52535)
1 parent ebe484a commit d88f8bf

File tree

1 file changed

+17
-29
lines changed

1 file changed

+17
-29
lines changed

pandas/core/internals/concat.py

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pandas._libs import (
1414
NaT,
1515
internals as libinternals,
16+
lib,
1617
)
1718
from pandas._libs.missing import NA
1819
from pandas.util._decorators import cache_readonly
@@ -403,56 +404,41 @@ def __init__(self, block: Block, shape: Shape, indexers=None) -> None:
403404
# Note: block is None implies indexers is None, but not vice-versa
404405
if indexers is None:
405406
indexers = {}
407+
# Otherwise we may have only {0: np.array(...)} and only non-negative
408+
# entries.
406409
self.block = block
407410
self.indexers = indexers
408411
self.shape = shape
409412

410413
def __repr__(self) -> str:
411414
return f"{type(self).__name__}({repr(self.block)}, {self.indexers})"
412415

413-
@cache_readonly
414-
def needs_filling(self) -> bool:
415-
for indexer in self.indexers.values():
416-
# FIXME: cache results of indexer == -1 checks.
417-
if (indexer == -1).any():
418-
return True
419-
420-
return False
421-
422-
@cache_readonly
423-
def dtype(self) -> DtypeObj:
424-
blk = self.block
425-
if blk.values.dtype.kind == "V":
426-
raise AssertionError("Block is None, no dtype")
427-
428-
if not self.needs_filling:
429-
return blk.dtype
430-
return ensure_dtype_can_hold_na(blk.dtype)
431-
432416
def _is_valid_na_for(self, dtype: DtypeObj) -> bool:
433417
"""
434418
Check that we are all-NA of a type/dtype that is compatible with this dtype.
435419
Augments `self.is_na` with an additional check of the type of NA values.
436420
"""
437421
if not self.is_na:
438422
return False
439-
if self.block.dtype.kind == "V":
423+
424+
blk = self.block
425+
if blk.dtype.kind == "V":
440426
return True
441427

442-
if self.dtype == object:
443-
values = self.block.values
428+
if blk.dtype == object:
429+
values = blk.values
444430
return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K"))
445431

446-
na_value = self.block.fill_value
447-
if na_value is NaT and not is_dtype_equal(self.dtype, dtype):
432+
na_value = blk.fill_value
433+
if na_value is NaT and not is_dtype_equal(blk.dtype, dtype):
448434
# e.g. we are dt64 and other is td64
449-
# fill_values match but we should not cast self.block.values to dtype
435+
# fill_values match but we should not cast blk.values to dtype
450436
# TODO: this will need updating if we ever have non-nano dt64/td64
451437
return False
452438

453439
if na_value is NA and needs_i8_conversion(dtype):
454440
# FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat
455-
# e.g. self.dtype == "Int64" and dtype is td64, we dont want
441+
# e.g. blk.dtype == "Int64" and dtype is td64, we dont want
456442
# to consider these as matching
457443
return False
458444

@@ -663,9 +649,11 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
663649

664650
has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units)
665651

666-
dtypes = [unit.dtype for unit in join_units if not unit.is_na]
652+
dtypes = [unit.block.dtype for unit in join_units if not unit.is_na]
667653
if not len(dtypes):
668-
dtypes = [unit.dtype for unit in join_units if unit.block.dtype.kind != "V"]
654+
dtypes = [
655+
unit.block.dtype for unit in join_units if unit.block.dtype.kind != "V"
656+
]
669657

670658
dtype = find_common_type(dtypes)
671659
if has_none_blocks:
@@ -712,7 +700,7 @@ def _is_uniform_reindex(join_units) -> bool:
712700
return (
713701
# TODO: should this be ju.block._can_hold_na?
714702
all(ju.block.is_extension for ju in join_units)
715-
and len({ju.block.dtype.name for ju in join_units}) == 1
703+
and lib.dtypes_all_equal([ju.block.dtype for ju in join_units])
716704
)
717705

718706

0 commit comments

Comments
 (0)