Skip to content

Commit 5185bb3

Browse files
committed
Merge remote-tracking branch 'upstream/2.3.x' into remove-read_json-futurewarning
2 parents 75c69b1 + c079337 commit 5185bb3

38 files changed

+398
-184
lines changed

.circleci/config.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ jobs:
1515
- checkout
1616
- run: .circleci/setup_env.sh
1717
- run: |
18-
sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
1918
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH \
2019
LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD \
2120
ci/run_tests.sh

pandas/_libs/index.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ class MaskedUInt16Engine(MaskedIndexEngine): ...
6868
class MaskedUInt8Engine(MaskedIndexEngine): ...
6969
class MaskedBoolEngine(MaskedUInt8Engine): ...
7070

71+
class StringObjectEngine(ObjectEngine):
72+
def __init__(self, values: object, na_value) -> None: ...
73+
7174
class BaseMultiIndexCodesEngine:
7275
levels: list[np.ndarray]
7376
offsets: np.ndarray # ndarray[uint64_t, ndim=1]

pandas/_libs/index.pyx

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,32 @@ cdef class ObjectEngine(IndexEngine):
532532
return loc
533533

534534

535+
cdef class StringObjectEngine(ObjectEngine):
536+
537+
cdef:
538+
object na_value
539+
bint uses_na
540+
541+
def __init__(self, ndarray values, na_value):
542+
super().__init__(values)
543+
self.na_value = na_value
544+
self.uses_na = na_value is C_NA
545+
546+
cdef bint _checknull(self, object val):
547+
if self.uses_na:
548+
return val is C_NA
549+
else:
550+
return util.is_nan(val)
551+
552+
cdef _check_type(self, object val):
553+
if isinstance(val, str):
554+
return val
555+
elif self._checknull(val):
556+
return self.na_value
557+
else:
558+
raise KeyError(val)
559+
560+
535561
cdef class DatetimeEngine(Int64Engine):
536562

537563
cdef:

pandas/_libs/lib.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ def maybe_convert_objects(
8686
safe: bool = ...,
8787
convert_numeric: bool = ...,
8888
convert_non_numeric: Literal[False] = ...,
89+
convert_string: Literal[False] = ...,
8990
convert_to_nullable_dtype: Literal[False] = ...,
9091
dtype_if_all_nat: DtypeObj | None = ...,
9192
) -> npt.NDArray[np.object_ | np.number]: ...
@@ -97,6 +98,7 @@ def maybe_convert_objects(
9798
safe: bool = ...,
9899
convert_numeric: bool = ...,
99100
convert_non_numeric: bool = ...,
101+
convert_string: bool = ...,
100102
convert_to_nullable_dtype: Literal[True] = ...,
101103
dtype_if_all_nat: DtypeObj | None = ...,
102104
) -> ArrayLike: ...
@@ -108,6 +110,7 @@ def maybe_convert_objects(
108110
safe: bool = ...,
109111
convert_numeric: bool = ...,
110112
convert_non_numeric: bool = ...,
113+
convert_string: bool = ...,
111114
convert_to_nullable_dtype: bool = ...,
112115
dtype_if_all_nat: DtypeObj | None = ...,
113116
) -> ArrayLike: ...

pandas/_libs/lib.pyx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2498,6 +2498,7 @@ def maybe_convert_objects(ndarray[object] objects,
24982498
bint convert_numeric=True, # NB: different default!
24992499
bint convert_to_nullable_dtype=False,
25002500
bint convert_non_numeric=False,
2501+
bint convert_string=True,
25012502
object dtype_if_all_nat=None) -> "ArrayLike":
25022503
"""
25032504
Type inference function-- convert object array to proper dtype
@@ -2747,7 +2748,11 @@ def maybe_convert_objects(ndarray[object] objects,
27472748
dtype = StringDtype()
27482749
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27492750

2750-
elif using_string_dtype() and is_string_array(objects, skipna=True):
2751+
elif (
2752+
convert_string
2753+
and using_string_dtype()
2754+
and is_string_array(objects, skipna=True)
2755+
):
27512756
from pandas.core.arrays.string_ import StringDtype
27522757

27532758
dtype = StringDtype(na_value=np.nan)

pandas/_testing/__init__.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
TYPE_CHECKING,
99
Callable,
1010
ContextManager,
11-
cast,
1211
)
1312
import warnings
1413

@@ -23,8 +22,6 @@
2322

2423
from pandas.compat import pa_version_under10p1
2524

26-
from pandas.core.dtypes.common import is_string_dtype
27-
2825
import pandas as pd
2926
from pandas import (
3027
ArrowDtype,
@@ -83,8 +80,8 @@
8380
with_csv_dialect,
8481
)
8582
from pandas.core.arrays import (
83+
ArrowExtensionArray,
8684
BaseMaskedArray,
87-
ExtensionArray,
8885
NumpyExtensionArray,
8986
)
9087
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
@@ -96,7 +93,6 @@
9693
NpDtype,
9794
)
9895

99-
from pandas.core.arrays import ArrowExtensionArray
10096

10197
UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"]
10298
UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"]
@@ -530,24 +526,18 @@ def shares_memory(left, right) -> bool:
530526
if isinstance(left, pd.core.arrays.IntervalArray):
531527
return shares_memory(left._left, right) or shares_memory(left._right, right)
532528

533-
if (
534-
isinstance(left, ExtensionArray)
535-
and is_string_dtype(left.dtype)
536-
and left.dtype.storage == "pyarrow" # type: ignore[attr-defined]
537-
):
538-
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
539-
left = cast("ArrowExtensionArray", left)
540-
if (
541-
isinstance(right, ExtensionArray)
542-
and is_string_dtype(right.dtype)
543-
and right.dtype.storage == "pyarrow" # type: ignore[attr-defined]
544-
):
545-
right = cast("ArrowExtensionArray", right)
529+
if isinstance(left, ArrowExtensionArray):
530+
if isinstance(right, ArrowExtensionArray):
531+
# https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669
546532
left_pa_data = left._pa_array
547533
right_pa_data = right._pa_array
548534
left_buf1 = left_pa_data.chunk(0).buffers()[1]
549535
right_buf1 = right_pa_data.chunk(0).buffers()[1]
550-
return left_buf1 == right_buf1
536+
return left_buf1.address == right_buf1.address
537+
else:
538+
# if we have one one ArrowExtensionArray and one other array, assume
539+
# they can only share memory if they share the same numpy buffer
540+
return np.shares_memory(left, right)
551541

552542
if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray):
553543
# By convention, we'll say these share memory if they share *either*

pandas/core/dtypes/cast.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@
8787

8888
if TYPE_CHECKING:
8989
from collections.abc import (
90+
Collection,
9091
Sequence,
91-
Sized,
9292
)
9393

9494
from pandas._typing import (
@@ -1586,7 +1586,7 @@ def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
15861586
return _maybe_unbox_datetimelike(value, dtype)
15871587

15881588

1589-
def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
1589+
def construct_1d_object_array_from_listlike(values: Collection) -> np.ndarray:
15901590
"""
15911591
Transform any list-like object in a 1-dimensional numpy array of object
15921592
dtype.
@@ -1604,10 +1604,11 @@ def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
16041604
-------
16051605
1-dimensional numpy array of dtype object
16061606
"""
1607-
# numpy will try to interpret nested lists as further dimensions, hence
1608-
# making a 1D array that contains list-likes is a bit tricky:
1607+
# numpy will try to interpret nested lists as further dimensions in np.array(),
1608+
# hence explicitly making a 1D array using np.fromiter
16091609
result = np.empty(len(values), dtype="object")
1610-
result[:] = values
1610+
for i, obj in enumerate(values):
1611+
result[i] = obj
16111612
return result
16121613

16131614

pandas/core/indexes/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,8 @@ def _engine(
884884
# error: Item "ExtensionArray" of "Union[ExtensionArray,
885885
# ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr]
886886
target_values = self._data._ndarray # type: ignore[union-attr]
887+
elif is_string_dtype(self.dtype) and not is_object_dtype(self.dtype):
888+
return libindex.StringObjectEngine(target_values, self.dtype.na_value) # type: ignore[union-attr]
887889

888890
# error: Argument 1 to "ExtensionEngine" has incompatible type
889891
# "ndarray[Any, Any]"; expected "ExtensionArray"
@@ -6133,7 +6135,6 @@ def _should_fallback_to_positional(self) -> bool:
61336135
def get_indexer_non_unique(
61346136
self, target
61356137
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6136-
target = ensure_index(target)
61376138
target = self._maybe_cast_listlike_indexer(target)
61386139

61396140
if not self._should_compare(target) and not self._should_partial_index(target):

pandas/core/internals/blocks.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,12 @@ def _maybe_downcast(
563563
return blocks
564564

565565
nbs = extend_blocks(
566-
[blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
566+
[
567+
blk.convert(
568+
using_cow=using_cow, copy=not using_cow, convert_string=False
569+
)
570+
for blk in blocks
571+
]
567572
)
568573
if caller == "fillna":
569574
if len(nbs) != len(blocks) or not all(
@@ -636,6 +641,7 @@ def convert(
636641
*,
637642
copy: bool = True,
638643
using_cow: bool = False,
644+
convert_string: bool = True,
639645
) -> list[Block]:
640646
"""
641647
Attempt to coerce any object types to better types. Return a copy
@@ -648,7 +654,10 @@ def convert(
648654

649655
if self.ndim != 1 and self.shape[0] != 1:
650656
blocks = self.split_and_operate(
651-
Block.convert, copy=copy, using_cow=using_cow
657+
Block.convert,
658+
copy=copy,
659+
using_cow=using_cow,
660+
convert_string=convert_string,
652661
)
653662
if all(blk.dtype.kind == "O" for blk in blocks):
654663
# Avoid fragmenting the block if convert is a no-op
@@ -666,6 +675,7 @@ def convert(
666675
res_values = lib.maybe_convert_objects(
667676
values, # type: ignore[arg-type]
668677
convert_non_numeric=True,
678+
convert_string=convert_string,
669679
)
670680
refs = None
671681
if (
@@ -851,6 +861,7 @@ def replace(
851861
mask: npt.NDArray[np.bool_] | None = None,
852862
using_cow: bool = False,
853863
already_warned=None,
864+
convert_string=None,
854865
) -> list[Block]:
855866
"""
856867
replace the to_replace value with value, possible to create new
@@ -915,7 +926,11 @@ def replace(
915926
if get_option("future.no_silent_downcasting") is True:
916927
blocks = [blk]
917928
else:
918-
blocks = blk.convert(copy=False, using_cow=using_cow)
929+
blocks = blk.convert(
930+
copy=False,
931+
using_cow=using_cow,
932+
convert_string=convert_string or self.dtype != _dtype_obj,
933+
)
919934
if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
920935
warnings.warn(
921936
# GH#54710
@@ -944,6 +959,7 @@ def replace(
944959
inplace=True,
945960
mask=mask,
946961
using_cow=using_cow,
962+
convert_string=convert_string,
947963
)
948964

949965
else:
@@ -958,6 +974,7 @@ def replace(
958974
inplace=True,
959975
mask=mask[i : i + 1],
960976
using_cow=using_cow,
977+
convert_string=convert_string,
961978
)
962979
)
963980
return blocks
@@ -970,6 +987,7 @@ def _replace_regex(
970987
inplace: bool = False,
971988
mask=None,
972989
using_cow: bool = False,
990+
convert_string: bool = True,
973991
already_warned=None,
974992
) -> list[Block]:
975993
"""
@@ -1029,7 +1047,9 @@ def _replace_regex(
10291047
)
10301048
already_warned.warned_already = True
10311049

1032-
nbs = block.convert(copy=False, using_cow=using_cow)
1050+
nbs = block.convert(
1051+
copy=False, using_cow=using_cow, convert_string=convert_string
1052+
)
10331053
opt = get_option("future.no_silent_downcasting")
10341054
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
10351055
warnings.warn(
@@ -1068,6 +1088,8 @@ def replace_list(
10681088
values._replace(to_replace=src_list, value=dest_list, inplace=True)
10691089
return [blk]
10701090

1091+
convert_string = self.dtype != _dtype_obj
1092+
10711093
# Exclude anything that we know we won't contain
10721094
pairs = [
10731095
(x, y)
@@ -1152,6 +1174,7 @@ def replace_list(
11521174
inplace=inplace,
11531175
regex=regex,
11541176
using_cow=using_cow,
1177+
convert_string=convert_string,
11551178
)
11561179

11571180
if using_cow and i != src_len:
@@ -1174,7 +1197,9 @@ def replace_list(
11741197
nbs = []
11751198
for res_blk in result:
11761199
converted = res_blk.convert(
1177-
copy=True and not using_cow, using_cow=using_cow
1200+
copy=True and not using_cow,
1201+
using_cow=using_cow,
1202+
convert_string=convert_string,
11781203
)
11791204
if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
11801205
warnings.warn(
@@ -1204,6 +1229,7 @@ def _replace_coerce(
12041229
inplace: bool = True,
12051230
regex: bool = False,
12061231
using_cow: bool = False,
1232+
convert_string: bool = True,
12071233
) -> list[Block]:
12081234
"""
12091235
Replace value corresponding to the given boolean array with another
@@ -1233,6 +1259,7 @@ def _replace_coerce(
12331259
inplace=inplace,
12341260
mask=mask,
12351261
using_cow=using_cow,
1262+
convert_string=convert_string,
12361263
)
12371264
else:
12381265
if value is None:
@@ -1256,6 +1283,7 @@ def _replace_coerce(
12561283
inplace=inplace,
12571284
mask=mask,
12581285
using_cow=using_cow,
1286+
convert_string=convert_string,
12591287
)
12601288

12611289
# ---------------------------------------------------------------------

pandas/plotting/_matplotlib/boxplot.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
)
88
import warnings
99

10+
import matplotlib as mpl
1011
from matplotlib.artist import setp
1112
import numpy as np
1213

@@ -20,6 +21,7 @@
2021

2122
import pandas as pd
2223
import pandas.core.common as com
24+
from pandas.util.version import Version
2325

2426
from pandas.io.formats.printing import pprint_thing
2527
from pandas.plotting._matplotlib.core import (
@@ -54,7 +56,8 @@ def _set_ticklabels(ax: Axes, labels: list[str], is_vertical: bool, **kwargs) ->
5456
ticks = ax.get_xticks() if is_vertical else ax.get_yticks()
5557
if len(ticks) != len(labels):
5658
i, remainder = divmod(len(ticks), len(labels))
57-
assert remainder == 0, remainder
59+
if Version(mpl.__version__) < Version("3.10"):
60+
assert remainder == 0, remainder
5861
labels *= i
5962
if is_vertical:
6063
ax.set_xticklabels(labels, **kwargs)

pandas/plotting/_matplotlib/tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def format_date_labels(ax: Axes, rot) -> None:
5757
fig = ax.get_figure()
5858
if fig is not None:
5959
# should always be a Figure but can technically be None
60-
maybe_adjust_figure(fig, bottom=0.2)
60+
maybe_adjust_figure(fig, bottom=0.2) # type: ignore[arg-type]
6161

6262

6363
def table(

0 commit comments

Comments
 (0)