Skip to content

Commit 7ac5a11

Browse files
committed
fixes #13247
1 parent 2e3c82e commit 7ac5a11

File tree

5 files changed

+54
-8
lines changed

5 files changed

+54
-8
lines changed

doc/source/whatsnew/v0.19.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Backwards incompatible API changes
4343
.. _whatsnew_0190.api:
4444

4545

46-
46+
- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`)
4747

4848

4949

pandas/core/internals.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
array_equivalent, _is_na_compat,
2020
_maybe_convert_string_to_object,
2121
_maybe_convert_scalar,
22+
is_float_dtype, is_numeric_dtype,
2223
is_categorical, is_datetimelike_v_numeric,
2324
is_numeric_v_string_like, is_extension_type)
2425
import pandas.core.algorithms as algos
@@ -4443,6 +4444,8 @@ def _lcd_dtype(l):
44434444
return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
44444445
return lcd
44454446

4447+
elif have_int and have_float and not have_complex:
4448+
return np.dtype('float64')
44464449
elif have_complex:
44474450
return np.dtype('c16')
44484451
else:
@@ -4785,6 +4788,8 @@ def get_empty_dtype_and_na(join_units):
47854788
upcast_cls = 'datetime'
47864789
elif is_timedelta64_dtype(dtype):
47874790
upcast_cls = 'timedelta'
4791+
elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
4792+
upcast_cls = dtype.name
47884793
else:
47894794
upcast_cls = 'float'
47904795

@@ -4809,8 +4814,6 @@ def get_empty_dtype_and_na(join_units):
48094814
return np.dtype(np.bool_), None
48104815
elif 'category' in upcast_classes:
48114816
return np.dtype(np.object_), np.nan
4812-
elif 'float' in upcast_classes:
4813-
return np.dtype(np.float64), np.nan
48144817
elif 'datetimetz' in upcast_classes:
48154818
dtype = upcast_classes['datetimetz']
48164819
return dtype[0], tslib.iNaT
@@ -4819,7 +4822,17 @@ def get_empty_dtype_and_na(join_units):
48194822
elif 'timedelta' in upcast_classes:
48204823
return np.dtype('m8[ns]'), tslib.iNaT
48214824
else: # pragma
4822-
raise AssertionError("invalid dtype determination in get_concat_dtype")
4825+
g = np.find_common_type(upcast_classes, [])
4826+
if is_float_dtype(g):
4827+
return g, g.type(np.nan)
4828+
elif is_numeric_dtype(g):
4829+
if has_none_blocks:
4830+
return np.float64, np.nan
4831+
else:
4832+
return g, None
4833+
else:
4834+
msg = "invalid dtype determination in get_concat_dtype"
4835+
raise AssertionError(msg)
48234836

48244837

48254838
def concatenate_join_units(join_units, concat_axis, copy):
@@ -5083,7 +5096,6 @@ def is_null(self):
50835096
return True
50845097

50855098
def get_reindexed_values(self, empty_dtype, upcasted_na):
5086-
50875099
if upcasted_na is None:
50885100
# No upcasting is necessary
50895101
fill_value = self.block.fill_value

pandas/tests/indexing/test_indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4035,11 +4035,11 @@ def f():
40354035

40364036
self.assertRaises(ValueError, f)
40374037

4038-
# these are coerced to float unavoidably (as its a list-like to begin)
4038+
# these are coerced to object unavoidably (as its a list-like to begin)
40394039
df = DataFrame(columns=['A', 'B'])
40404040
df.loc[3] = [6, 7]
40414041
assert_frame_equal(df, DataFrame(
4042-
[[6, 7]], index=[3], columns=['A', 'B'], dtype='float64'))
4042+
[[6, 7]], index=[3], columns=['A', 'B'], dtype='object'))
40434043

40444044
def test_partial_setting_with_datetimelike_dtype(self):
40454045

pandas/tests/test_internals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ def test_interleave(self):
655655
mgr = create_mgr('a: f8; b: i8')
656656
self.assertEqual(mgr.as_matrix().dtype, 'f8')
657657
mgr = create_mgr('a: f4; b: i8')
658-
self.assertEqual(mgr.as_matrix().dtype, 'f4')
658+
self.assertEqual(mgr.as_matrix().dtype, 'f8')
659659
mgr = create_mgr('a: f4; b: i8; d: object')
660660
self.assertEqual(mgr.as_matrix().dtype, 'object')
661661
mgr = create_mgr('a: bool; b: i8')

pandas/tools/tests/test_concat.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1031,6 +1031,40 @@ def test_concat_invalid_first_argument(self):
10311031
expected = read_csv(StringIO(data))
10321032
assert_frame_equal(result, expected)
10331033

1034+
def test_concat_no_unnecessary_upcasts(self):
1035+
# fixes #13247
1036+
1037+
for pdt in [pd.Series, pd.DataFrame, pd.Panel, pd.Panel4D]:
1038+
dims = pdt().ndim
1039+
for dt in np.sctypes['float']:
1040+
dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
1041+
pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
1042+
pdt(np.array([5], dtype=dt, ndmin=dims))]
1043+
x = pd.concat(dfs)
1044+
self.assertTrue(x.values.dtype == dt)
1045+
1046+
for dt in (np.sctypes['int'] + np.sctypes['uint']):
1047+
dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
1048+
pdt(np.array([5], dtype=dt, ndmin=dims))]
1049+
x = pd.concat(dfs)
1050+
self.assertTrue(x.values.dtype == dt)
1051+
1052+
objs = []
1053+
objs.append(pdt(np.array([1], dtype=np.float32, ndmin=dims)))
1054+
objs.append(pdt(np.array([1], dtype=np.float16, ndmin=dims)))
1055+
self.assertTrue(pd.concat(objs).values.dtype == np.float32)
1056+
1057+
objs = []
1058+
objs.append(pdt(np.array([1], dtype=np.int32, ndmin=dims)))
1059+
objs.append(pdt(np.array([1], dtype=np.int64, ndmin=dims)))
1060+
self.assertTrue(pd.concat(objs).values.dtype == np.int64)
1061+
1062+
# not sure what is the best answer here
1063+
objs = []
1064+
objs.append(pdt(np.array([1], dtype=np.int32, ndmin=dims)))
1065+
objs.append(pdt(np.array([1], dtype=np.float16, ndmin=dims)))
1066+
self.assertTrue(pd.concat(objs).values.dtype == np.float64)
1067+
10341068

10351069
if __name__ == '__main__':
10361070
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)