pandas-dev · jaehoonhwang · Mar 6, 2017 · Mar 8, 2017 · Mar 8, 2017 · Mar 8, 2017
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -886,3 +886,5 @@ Bug Fixes
 - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
 - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
 - Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`)
+
+- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -21,6 +21,7 @@
                                  is_datetime64tz_dtype,
                                  is_object_dtype,
                                  is_datetimelike_v_numeric,
+                                 is_float_dtype, is_numeric_dtype,
                                  is_numeric_v_string_like, is_extension_type,
                                  is_list_like,
                                  is_re,
@@ -4522,6 +4523,8 @@ def _interleaved_dtype(blocks):
             return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
         return lcd
 
+    elif have_int and have_float and not have_complex:
+        return np.dtype('float64')
     elif have_complex:
         return np.dtype('c16')
     else:
@@ -4891,6 +4894,8 @@ def get_empty_dtype_and_na(join_units):
             upcast_cls = 'datetime'
         elif is_timedelta64_dtype(dtype):
             upcast_cls = 'timedelta'
+        elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
+            upcast_cls = dtype.name
         else:
             upcast_cls = 'float'
 
@@ -4915,8 +4920,6 @@ def get_empty_dtype_and_na(join_units):
             return np.dtype(np.bool_), None
     elif 'category' in upcast_classes:
         return np.dtype(np.object_), np.nan
-    elif 'float' in upcast_classes:
-        return np.dtype(np.float64), np.nan
     elif 'datetimetz' in upcast_classes:
         dtype = upcast_classes['datetimetz']
         return dtype[0], tslib.iNaT
@@ -4925,7 +4928,17 @@ def get_empty_dtype_and_na(join_units):
     elif 'timedelta' in upcast_classes:
         return np.dtype('m8[ns]'), tslib.iNaT
     else:  # pragma
-        raise AssertionError("invalid dtype determination in get_concat_dtype")
+        g = np.find_common_type(upcast_classes, [])
+        if is_float_dtype(g):
+            return g, g.type(np.nan)
+        elif is_numeric_dtype(g):
+            if has_none_blocks:
+                return np.float64, np.nan
+            else:
+                return g, None
+        else:
+            msg = "invalid dtype determination in get_concat_dtype"
+            raise AssertionError(msg)
 
 
 def concatenate_join_units(join_units, concat_axis, copy):
@@ -5190,7 +5203,6 @@ def is_null(self):
         return True
 
     def get_reindexed_values(self, empty_dtype, upcasted_na):
-
         if upcasted_na is None:
             # No upcasting is necessary
             fill_value = self.block.fill_value

diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
@@ -210,7 +210,7 @@ def f():
         df.loc[3] = [6, 7]
 
         exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'],
-                        dtype='float64')
+                        dtype='object')
         tm.assert_frame_equal(df, exp)
 
     def test_series_partial_set(self):

diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
@@ -651,7 +651,7 @@ def test_interleave(self):
         mgr = create_mgr('a: f8; b: i8')
         self.assertEqual(mgr.as_matrix().dtype, 'f8')
         mgr = create_mgr('a: f4; b: i8')
-        self.assertEqual(mgr.as_matrix().dtype, 'f4')
+        self.assertEqual(mgr.as_matrix().dtype, 'f8')
         mgr = create_mgr('a: f4; b: i8; d: object')
         self.assertEqual(mgr.as_matrix().dtype, 'object')
         mgr = create_mgr('a: bool; b: i8')

diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py
@@ -250,6 +250,7 @@ def test_basic_types(self):
         self.assertEqual(type(r), exp_df_type)
 
         r = get_dummies(s_df, sparse=self.sparse, columns=['a'])
+        exp_blk_type = pd.core.internals.IntBlock
         self.assertEqual(type(r[['a_0']]._data.blocks[0]), exp_blk_type)
         self.assertEqual(type(r[['a_1']]._data.blocks[0]), exp_blk_type)
         self.assertEqual(type(r[['a_2']]._data.blocks[0]), exp_blk_type)

diff --git a/pandas/tests/tools/test_concat.py b/pandas/tests/tools/test_concat.py
@@ -13,6 +13,8 @@
                                  makeCustomDataframe as mkdf,
                                  assert_almost_equal)
 
+import pytest
+
 
 class ConcatenateBase(tm.TestCase):
 
@@ -1899,3 +1901,15 @@ def test_concat_multiindex_dfs_with_deepcopy(self):
         tm.assert_frame_equal(result_copy, expected)
         result_no_copy = pd.concat(example_dict, names=['testname'])
         tm.assert_frame_equal(result_no_copy, expected)
+
+
+@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
+@pytest.mark.parametrize('dt', np.sctypes['float'])
+def test_concat_no_unnecessary_upcast(dt, pdt):
+    # GH 13247
+    dims = pdt().ndim
+    dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
+           pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
+           pdt(np.array([5], dtype=dt, ndmin=dims))]
+    x = pd.concat(dfs)
+    assert x.values.dtype == dt