Skip to content

Commit 4f4a1db

Browse files
committed
Simplify the code for extension dtypes by not attempting to convert them
1 parent 510369f commit 4f4a1db

File tree

3 files changed

+35
-48
lines changed

3 files changed

+35
-48
lines changed

pandas/core/dtypes/cast.py

Lines changed: 30 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,46 +1051,43 @@ def convert_dtypes(
10511051
dtype
10521052
new dtype
10531053
"""
1054+
inferred_dtype = input_array.dtype
1055+
10541056
if convert_string or convert_integer or convert_boolean:
1055-
try:
1056-
inferred_dtype = lib.infer_dtype(input_array)
1057-
except ValueError:
1058-
# Required to catch due to Period. Can remove once GH 23553 is fixed
1059-
inferred_dtype = input_array.dtype
1057+
if not is_extension_array_dtype(input_array.dtype):
1058+
try:
1059+
inferred_dtype = lib.infer_dtype(input_array)
1060+
except ValueError:
1061+
# Required to catch due to Period. Can remove once GH 23553 is fixed
1062+
inferred_dtype = input_array.dtype
10601063

1061-
if not convert_string and is_string_dtype(inferred_dtype):
1062-
inferred_dtype = input_array.dtype
1064+
if not convert_string and is_string_dtype(inferred_dtype):
1065+
inferred_dtype = input_array.dtype
10631066

1064-
if convert_integer:
1065-
target_int_dtype = "Int64"
1067+
if convert_integer:
1068+
target_int_dtype = "Int64"
10661069

1067-
if is_integer_dtype(input_array.dtype) and not is_extension_array_dtype(
1068-
input_array.dtype
1069-
):
1070-
from pandas.core.arrays.integer import _dtypes
1070+
if is_integer_dtype(input_array.dtype):
1071+
from pandas.core.arrays.integer import _dtypes
10711072

1072-
inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype)
1073-
if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
1074-
input_array.dtype
1075-
):
1076-
inferred_dtype = target_int_dtype
1073+
inferred_dtype = _dtypes.get(
1074+
input_array.dtype.name, target_int_dtype
1075+
)
1076+
if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
1077+
input_array.dtype
1078+
):
1079+
inferred_dtype = target_int_dtype
10771080

1078-
else:
1079-
if is_integer_dtype(inferred_dtype):
1080-
inferred_dtype = input_array.dtype
1081-
1082-
if convert_boolean:
1083-
if is_bool_dtype(input_array.dtype) and (
1084-
(not is_extension_array_dtype(input_array.dtype))
1085-
or (input_array.dtype == pd.BooleanDtype())
1086-
):
1087-
inferred_dtype = "boolean"
1088-
else:
1089-
if isinstance(inferred_dtype, str) and inferred_dtype == "boolean":
1090-
inferred_dtype = input_array.dtype
1081+
else:
1082+
if is_integer_dtype(inferred_dtype):
1083+
inferred_dtype = input_array.dtype
10911084

1092-
else:
1093-
inferred_dtype = input_array.dtype
1085+
if convert_boolean:
1086+
if is_bool_dtype(input_array.dtype):
1087+
inferred_dtype = "boolean"
1088+
else:
1089+
if isinstance(inferred_dtype, str) and inferred_dtype == "boolean":
1090+
inferred_dtype = input_array.dtype
10941091

10951092
return inferred_dtype
10961093

pandas/core/series.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4344,11 +4344,9 @@ def _convert_dtypes(
43444344
convert_integer: bool = True,
43454345
convert_boolean: bool = True,
43464346
) -> "Series":
4347-
input_series = self
4347+
input_series = self.copy()
43484348
if infer_objects:
43494349
input_series = input_series.infer_objects()
4350-
if is_object_dtype(input_series):
4351-
input_series = input_series.copy()
43524350

43534351
if convert_string or convert_integer or convert_boolean:
43544352
inferred_dtype = convert_dtypes(
@@ -4357,9 +4355,9 @@ def _convert_dtypes(
43574355
try:
43584356
result = input_series.astype(inferred_dtype)
43594357
except TypeError:
4360-
result = input_series.copy()
4358+
result = input_series
43614359
else:
4362-
result = input_series.copy()
4360+
result = input_series
43634361
return result
43644362

43654363
@Appender(generic._shared_docs["isna"] % _shared_doc_kwargs)

pandas/tests/series/methods/test_convert_dtypes.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -282,13 +282,5 @@ def test_convert_string_dtype(self):
282282

283283
def test_convert_bool_dtype(self):
284284
# GH32287
285-
df = pd.DataFrame([["abc", 123, True]])
286-
exp_dtypes = pd.Series([np.object, np.int64, np.bool])
287-
tm.assert_series_equal(df.dtypes, exp_dtypes)
288-
289-
df_1 = df.convert_dtypes()
290-
exp_dtypes_1 = pd.Series([pd.StringDtype(), pd.Int64Dtype(), pd.BooleanDtype()])
291-
tm.assert_series_equal(df_1.dtypes, exp_dtypes_1)
292-
293-
df_2 = df_1.convert_dtypes()
294-
tm.assert_series_equal(df_2.dtypes, exp_dtypes_1)
285+
df = pd.DataFrame({"A": pd.array([True])})
286+
tm.assert_frame_equal(df, df.convert_dtypes())

0 commit comments

Comments
 (0)