Skip to content

Commit f89c25d

Browse files
TomAugspurgerjorisvandenbossche
authored andcommitted
BUG: Coerce to object for mixed concat with extension arrays (#20799)
1 parent 41db527 commit f89c25d

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

pandas/core/dtypes/concat.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas.core.dtypes.common import (
99
is_categorical_dtype,
1010
is_sparse,
11+
is_extension_array_dtype,
1112
is_datetimetz,
1213
is_datetime64_dtype,
1314
is_timedelta64_dtype,
@@ -173,6 +174,10 @@ def is_nonempty(x):
173174
elif 'sparse' in typs:
174175
return _concat_sparse(to_concat, axis=axis, typs=typs)
175176

177+
extensions = [is_extension_array_dtype(x) for x in to_concat]
178+
if any(extensions):
179+
to_concat = [np.atleast_2d(x.astype('object')) for x in to_concat]
180+
176181
if not nonempty:
177182
# we have all empties, but may need to coerce the result dtype to
178183
# object if we have non-numeric type operands (numpy would otherwise
@@ -210,7 +215,7 @@ def _concat_categorical(to_concat, axis=0):
210215

211216
def _concat_asobject(to_concat):
212217
to_concat = [x.get_values() if is_categorical_dtype(x.dtype)
213-
else x.ravel() for x in to_concat]
218+
else np.asarray(x).ravel() for x in to_concat]
214219
res = _concat_compat(to_concat)
215220
if axis == 1:
216221
return res.reshape(1, len(res))
@@ -548,6 +553,8 @@ def convert_sparse(x, axis):
548553
# coerce to native type
549554
if isinstance(x, SparseArray):
550555
x = x.get_values()
556+
else:
557+
x = np.asarray(x)
551558
x = x.ravel()
552559
if axis > 0:
553560
x = np.atleast_2d(x)

pandas/tests/extension/base/reshaping.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,29 @@ def test_concat_all_na_block(self, data_missing, in_frame):
4141
expected = pd.Series(data_missing.take([1, 1, 0, 0]))
4242
self.assert_series_equal(result, expected)
4343

44+
def test_concat_mixed_dtypes(self, data):
45+
# https://github.com/pandas-dev/pandas/issues/20762
46+
df1 = pd.DataFrame({'A': data[:3]})
47+
df2 = pd.DataFrame({"A": [1, 2, 3]})
48+
df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
49+
df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])})
50+
dfs = [df1, df2, df3, df4]
51+
52+
# dataframes
53+
result = pd.concat(dfs)
54+
expected = pd.concat([x.astype(object) for x in dfs])
55+
self.assert_frame_equal(result, expected)
56+
57+
# series
58+
result = pd.concat([x['A'] for x in dfs])
59+
expected = pd.concat([x['A'].astype(object) for x in dfs])
60+
self.assert_series_equal(result, expected)
61+
62+
# simple test for just EA and one other
63+
result = pd.concat([df1, df2])
64+
expected = pd.concat([df1.astype('object'), df2.astype('object')])
65+
self.assert_frame_equal(result, expected)
66+
4467
def test_align(self, data, na_value):
4568
a = data[:3]
4669
b = data[2:5]

0 commit comments

Comments
 (0)