Skip to content

Commit 0b318b4

Browse files
authored
Merge pull request #236 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents e459c8e + baf9e4b commit 0b318b4

File tree

19 files changed

+219
-88
lines changed

19 files changed

+219
-88
lines changed

doc/source/whatsnew/v1.3.1.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ Fixed regressions
2626
- Regression in :func:`concat` between objects with bool dtype and integer dtype casting to object instead of to integer (:issue:`42092`)
2727
- Bug in :class:`Series` constructor not accepting a ``dask.Array`` (:issue:`38645`)
2828
- Fixed regression for ``SettingWithCopyWarning`` displaying incorrect stacklevel (:issue:`42570`)
29+
- Fixed regression for :func:`merge_asof` raising ``KeyError`` when one of the ``by`` columns is in the index (:issue:`34488`)
2930
- Fixed regression in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)
30-
31+
- Fixed regression in :meth:`SeriesGroupBy.value_counts` that resulted in an ``IndexError`` when called on a Series with one row (:issue:`42618`)
3132

3233
.. ---------------------------------------------------------------------------
3334
@@ -37,6 +38,7 @@ Bug fixes
3738
~~~~~~~~~
3839
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
3940
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
41+
- Fixed bug in :meth:`.Styler.set_sticky` not handling index names correctly for single index columns case (:issue:`42537`)
4042
- Fixed bug in :meth:`DataFrame.copy` failing to consolidate blocks in the result (:issue:`42579`)
4143

4244
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.4.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ Deprecations
155155
- Deprecated treating integer keys in :meth:`Series.__setitem__` as positional when the index is a :class:`Float64Index` not containing the key, a :class:`IntervalIndex` with no entries containing the key, or a :class:`MultiIndex` with leading :class:`Float64Index` level not containing the key (:issue:`33469`)
156156
- Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`)
157157
- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
158+
- Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
158159

159160
.. ---------------------------------------------------------------------------
160161
@@ -164,6 +165,7 @@ Performance improvements
164165
~~~~~~~~~~~~~~~~~~~~~~~~
165166
- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
166167
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
168+
- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`)
167169

168170
.. ---------------------------------------------------------------------------
169171

pandas/core/groupby/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ def apply_series_value_counts():
758758
# new values are where sorted labels change
759759
lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
760760
inc = np.r_[True, lchanges]
761-
if not len(lchanges):
761+
if not len(val):
762762
inc = lchanges
763763
inc[idx] = True # group boundaries are also new values
764764
out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts

pandas/core/indexes/numeric.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,22 +80,21 @@
8080

8181

8282
class NumericIndex(Index):
83-
"""
84-
Provide numeric type operations.
85-
86-
This is an abstract class.
87-
"""
88-
8983
_index_descr_args = {
9084
"klass": "NumericIndex",
9185
"ltype": "integer or float",
9286
"dtype": "inferred",
9387
"extra": "",
9488
}
95-
_values: np.ndarray
96-
_default_dtype: np.dtype
97-
_dtype_validation_metadata: tuple[Callable[..., bool], str]
89+
__doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args
9890

91+
_typ = "numericindex"
92+
_values: np.ndarray
93+
_default_dtype: np.dtype | None = None
94+
_dtype_validation_metadata: tuple[Callable[..., bool], str] = (
95+
is_numeric_dtype,
96+
"numeric type",
97+
)
9998
_is_numeric_dtype = True
10099
_can_hold_strings = False
101100

pandas/core/internals/blocks.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from pandas._libs.internals import BlockPlacement
2626
from pandas._typing import (
2727
ArrayLike,
28-
Dtype,
2928
DtypeObj,
3029
F,
3130
Shape,
@@ -52,7 +51,6 @@
5251
is_list_like,
5352
is_sparse,
5453
is_string_dtype,
55-
pandas_dtype,
5654
)
5755
from pandas.core.dtypes.dtypes import (
5856
CategoricalDtype,
@@ -100,6 +98,7 @@
10098
TimedeltaArray,
10199
)
102100
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
101+
from pandas.core.arrays.sparse import SparseDtype
103102
from pandas.core.base import PandasObject
104103
import pandas.core.common as com
105104
import pandas.core.computation.expressions as expressions
@@ -326,7 +325,7 @@ def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block:
326325

327326
return type(self)(new_values, new_mgr_locs, self.ndim)
328327

329-
@property
328+
@cache_readonly
330329
def shape(self) -> Shape:
331330
return self.values.shape
332331

@@ -1842,7 +1841,7 @@ class CategoricalBlock(ExtensionBlock):
18421841
# Constructor Helpers
18431842

18441843

1845-
def maybe_coerce_values(values) -> ArrayLike:
1844+
def maybe_coerce_values(values: ArrayLike) -> ArrayLike:
18461845
"""
18471846
Input validation for values passed to __init__. Ensure that
18481847
any datetime64/timedelta64 dtypes are in nanoseconds. Ensure
@@ -1874,7 +1873,7 @@ def maybe_coerce_values(values) -> ArrayLike:
18741873
return values
18751874

18761875

1877-
def get_block_type(values, dtype: Dtype | None = None):
1876+
def get_block_type(values: ArrayLike, dtype: DtypeObj | None = None):
18781877
"""
18791878
Find the appropriate Block subclass to use for the given values and dtype.
18801879
@@ -1889,13 +1888,14 @@ def get_block_type(values, dtype: Dtype | None = None):
18891888
"""
18901889
# We use vtype and kind checks because they are much more performant
18911890
# than is_foo_dtype
1892-
dtype = cast(np.dtype, pandas_dtype(dtype) if dtype else values.dtype)
1891+
if dtype is None:
1892+
dtype = values.dtype
18931893
vtype = dtype.type
18941894
kind = dtype.kind
18951895

18961896
cls: type[Block]
18971897

1898-
if is_sparse(dtype):
1898+
if isinstance(dtype, SparseDtype):
18991899
# Need this first(ish) so that Sparse[datetime] is sparse
19001900
cls = ExtensionBlock
19011901
elif isinstance(dtype, CategoricalDtype):

pandas/core/internals/construction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,14 +331,14 @@ def ndarray_to_mgr(
331331
if dtype is None and is_object_dtype(values.dtype):
332332
arrays = [
333333
ensure_wrapped_if_datetimelike(
334-
maybe_infer_to_datetimelike(values[:, i].copy())
334+
maybe_infer_to_datetimelike(values[:, i])
335335
)
336336
for i in range(values.shape[1])
337337
]
338338
else:
339339
if is_datetime_or_timedelta_dtype(values.dtype):
340340
values = ensure_wrapped_if_datetimelike(values)
341-
arrays = [values[:, i].copy() for i in range(values.shape[1])]
341+
arrays = [values[:, i] for i in range(values.shape[1])]
342342

343343
return ArrayManager(arrays, [index, columns], verify_integrity=False)
344344

pandas/core/internals/managers.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ class BaseBlockManager(DataManager):
148148
_known_consolidated: bool
149149
_is_consolidated: bool
150150

151-
def __init__(self, blocks, axes, verify_integrity=True):
151+
def __init__(self, blocks, axes, verify_integrity: bool = True):
152152
raise NotImplementedError
153153

154154
@classmethod
@@ -889,7 +889,8 @@ def __init__(
889889
):
890890

891891
if verify_integrity:
892-
assert all(isinstance(x, Index) for x in axes)
892+
# Assertion disabled for performance
893+
# assert all(isinstance(x, Index) for x in axes)
893894

894895
for block in blocks:
895896
if self.ndim != block.ndim:
@@ -1558,8 +1559,9 @@ def __init__(
15581559
verify_integrity: bool = False,
15591560
fastpath=lib.no_default,
15601561
):
1561-
assert isinstance(block, Block), type(block)
1562-
assert isinstance(axis, Index), type(axis)
1562+
# Assertions disabled for performance
1563+
# assert isinstance(block, Block), type(block)
1564+
# assert isinstance(axis, Index), type(axis)
15631565

15641566
if fastpath is not lib.no_default:
15651567
warnings.warn(
@@ -1660,7 +1662,8 @@ def getitem_mgr(self, indexer) -> SingleBlockManager:
16601662
return type(self)(block, new_idx)
16611663

16621664
def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:
1663-
assert isinstance(slobj, slice), type(slobj)
1665+
# Assertion disabled for performance
1666+
# assert isinstance(slobj, slice), type(slobj)
16641667
if axis >= self.ndim:
16651668
raise IndexError("Requested axis not found in manager")
16661669

@@ -1778,9 +1781,10 @@ def create_block_manager_from_arrays(
17781781
axes: list[Index],
17791782
consolidate: bool = True,
17801783
) -> BlockManager:
1781-
assert isinstance(names, Index)
1782-
assert isinstance(axes, list)
1783-
assert all(isinstance(x, Index) for x in axes)
1784+
# Assertions disabled for performance
1785+
# assert isinstance(names, Index)
1786+
# assert isinstance(axes, list)
1787+
# assert all(isinstance(x, Index) for x in axes)
17841788

17851789
arrays = [_extract_array(x) for x in arrays]
17861790

@@ -1835,7 +1839,8 @@ def _form_blocks(
18351839
if names_idx.equals(axes[0]):
18361840
names_indexer = np.arange(len(names_idx))
18371841
else:
1838-
assert names_idx.intersection(axes[0]).is_unique
1842+
# Assertion disabled for performance
1843+
# assert names_idx.intersection(axes[0]).is_unique
18391844
names_indexer = names_idx.get_indexer_for(axes[0])
18401845

18411846
for i, name_idx in enumerate(names_indexer):
@@ -1863,10 +1868,9 @@ def _form_blocks(
18631868

18641869
if len(items_dict["DatetimeTZBlock"]):
18651870
dttz_blocks = [
1866-
new_block(
1871+
DatetimeTZBlock(
18671872
ensure_block_shape(extract_array(array), 2),
1868-
klass=DatetimeTZBlock,
1869-
placement=i,
1873+
placement=BlockPlacement(i),
18701874
ndim=2,
18711875
)
18721876
for i, array in items_dict["DatetimeTZBlock"]
@@ -1881,14 +1885,14 @@ def _form_blocks(
18811885

18821886
if len(items_dict["CategoricalBlock"]) > 0:
18831887
cat_blocks = [
1884-
new_block(array, klass=CategoricalBlock, placement=i, ndim=2)
1888+
CategoricalBlock(array, placement=BlockPlacement(i), ndim=2)
18851889
for i, array in items_dict["CategoricalBlock"]
18861890
]
18871891
blocks.extend(cat_blocks)
18881892

18891893
if len(items_dict["ExtensionBlock"]):
18901894
external_blocks = [
1891-
new_block(array, klass=ExtensionBlock, placement=i, ndim=2)
1895+
ExtensionBlock(array, placement=BlockPlacement(i), ndim=2)
18921896
for i, array in items_dict["ExtensionBlock"]
18931897
]
18941898

@@ -1921,7 +1925,7 @@ def _simple_blockify(tuples, dtype, consolidate: bool) -> list[Block]:
19211925
if dtype is not None and values.dtype != dtype: # pragma: no cover
19221926
values = values.astype(dtype)
19231927

1924-
block = new_block(values, placement=placement, ndim=2)
1928+
block = new_block(values, placement=BlockPlacement(placement), ndim=2)
19251929
return [block]
19261930

19271931

@@ -1944,14 +1948,14 @@ def _multi_blockify(tuples, dtype: DtypeObj | None = None, consolidate: bool = T
19441948
list(tup_block), dtype # type: ignore[arg-type]
19451949
)
19461950

1947-
block = new_block(values, placement=placement, ndim=2)
1951+
block = new_block(values, placement=BlockPlacement(placement), ndim=2)
19481952
new_blocks.append(block)
19491953

19501954
return new_blocks
19511955

19521956

19531957
def _tuples_to_blocks_no_consolidate(tuples, dtype: DtypeObj | None) -> list[Block]:
1954-
# tuples produced within _form_blocks are of the form (placement, whatever, array)
1958+
# tuples produced within _form_blocks are of the form (placement, array)
19551959
if dtype is not None:
19561960
return [
19571961
new_block(

pandas/core/reshape/merge.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,16 +1776,26 @@ def _validate_specification(self) -> None:
17761776
raise MergeError("missing right_by")
17771777

17781778
# GH#29130 Check that merge keys do not have dtype object
1779-
lo_dtype = (
1780-
self.left[self.left_on[0]].dtype
1781-
if not self.left_index
1782-
else self.left.index.dtype
1783-
)
1784-
ro_dtype = (
1785-
self.right[self.right_on[0]].dtype
1786-
if not self.right_index
1787-
else self.right.index.dtype
1788-
)
1779+
if not self.left_index:
1780+
left_on = self.left_on[0]
1781+
lo_dtype = (
1782+
self.left[left_on].dtype
1783+
if left_on in self.left.columns
1784+
else self.left.index.get_level_values(left_on)
1785+
)
1786+
else:
1787+
lo_dtype = self.left.index.dtype
1788+
1789+
if not self.right_index:
1790+
right_on = self.right_on[0]
1791+
ro_dtype = (
1792+
self.right[right_on].dtype
1793+
if right_on in self.right.columns
1794+
else self.right.index.get_level_values(right_on)
1795+
)
1796+
else:
1797+
ro_dtype = self.right.index.dtype
1798+
17891799
if is_object_dtype(lo_dtype) or is_object_dtype(ro_dtype):
17901800
raise MergeError(
17911801
f"Incompatible merge dtype, {repr(ro_dtype)} and "

pandas/core/series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,10 +354,10 @@ def __init__(
354354
"The default dtype for empty Series will be 'object' instead "
355355
"of 'float64' in a future version. Specify a dtype explicitly "
356356
"to silence this warning.",
357-
DeprecationWarning,
357+
FutureWarning,
358358
stacklevel=2,
359359
)
360-
# uncomment the line below when removing the DeprecationWarning
360+
# uncomment the line below when removing the FutureWarning
361361
# dtype = np.dtype(object)
362362

363363
if index is not None:

0 commit comments

Comments
 (0)