pandas-dev · jennolsen84 · May 31, 2016 · jreback · Jun 8, 2016 · jennolsen84
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -43,7 +43,7 @@ Backwards incompatible API changes
 .. _whatsnew_0190.api:
 
 
-
+- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`)
 
 
 

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -19,6 +19,7 @@
                                 array_equivalent, _is_na_compat,
                                 _maybe_convert_string_to_object,
                                 _maybe_convert_scalar,
+                                is_float_dtype, is_numeric_dtype,
                                 is_categorical, is_datetimelike_v_numeric,
                                 is_numeric_v_string_like, is_extension_type)
 import pandas.core.algorithms as algos
@@ -4443,6 +4444,8 @@ def _lcd_dtype(l):
             return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
         return lcd
 
+    elif have_int and have_float and not have_complex:
+        return np.dtype('float64')
     elif have_complex:
         return np.dtype('c16')
     else:
@@ -4785,6 +4788,8 @@ def get_empty_dtype_and_na(join_units):
             upcast_cls = 'datetime'
         elif is_timedelta64_dtype(dtype):
             upcast_cls = 'timedelta'
+        elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
+            upcast_cls = dtype.name
         else:
             upcast_cls = 'float'
 
@@ -4809,8 +4814,6 @@ def get_empty_dtype_and_na(join_units):
             return np.dtype(np.bool_), None
     elif 'category' in upcast_classes:
         return np.dtype(np.object_), np.nan
-    elif 'float' in upcast_classes:
-        return np.dtype(np.float64), np.nan
     elif 'datetimetz' in upcast_classes:
         dtype = upcast_classes['datetimetz']
         return dtype[0], tslib.iNaT
@@ -4819,7 +4822,17 @@ def get_empty_dtype_and_na(join_units):
     elif 'timedelta' in upcast_classes:
         return np.dtype('m8[ns]'), tslib.iNaT
     else:  # pragma
-        raise AssertionError("invalid dtype determination in get_concat_dtype")
+        g = np.find_common_type(upcast_classes, [])
+        if is_float_dtype(g):
+            return g, g.type(np.nan)
+        elif is_numeric_dtype(g):
+            if has_none_blocks:
+                return np.float64, np.nan
+            else:
+                return g, None
+        else:
+            msg = "invalid dtype determination in get_concat_dtype"
+            raise AssertionError(msg)
 
 
 def concatenate_join_units(join_units, concat_axis, copy):
@@ -5083,7 +5096,6 @@ def is_null(self):
         return True
 
     def get_reindexed_values(self, empty_dtype, upcasted_na):
-
         if upcasted_na is None:
             # No upcasting is necessary
             fill_value = self.block.fill_value

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -4035,11 +4035,11 @@ def f():
 
         self.assertRaises(ValueError, f)
 
-        # these are coerced to float unavoidably (as its a list-like to begin)
+        # these are coerced to object unavoidably (as its a list-like to begin)
         df = DataFrame(columns=['A', 'B'])
         df.loc[3] = [6, 7]
         assert_frame_equal(df, DataFrame(
-            [[6, 7]], index=[3], columns=['A', 'B'], dtype='float64'))
+            [[6, 7]], index=[3], columns=['A', 'B'], dtype='object'))
 
     def test_partial_setting_with_datetimelike_dtype(self):
 

diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
@@ -655,7 +655,7 @@ def test_interleave(self):
         mgr = create_mgr('a: f8; b: i8')
         self.assertEqual(mgr.as_matrix().dtype, 'f8')
         mgr = create_mgr('a: f4; b: i8')
-        self.assertEqual(mgr.as_matrix().dtype, 'f4')
+        self.assertEqual(mgr.as_matrix().dtype, 'f8')
         mgr = create_mgr('a: f4; b: i8; d: object')
         self.assertEqual(mgr.as_matrix().dtype, 'object')
         mgr = create_mgr('a: bool; b: i8')

diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py
@@ -1031,6 +1031,40 @@ def test_concat_invalid_first_argument(self):
         expected = read_csv(StringIO(data))
         assert_frame_equal(result, expected)
 
+    def test_concat_no_unnecessary_upcasts(self):
+        # fixes #13247
+
+        for pdt in [pd.Series, pd.DataFrame, pd.Panel, pd.Panel4D]:
+            dims = pdt().ndim
+            for dt in np.sctypes['float']:
+                dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
+                       pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
+                       pdt(np.array([5], dtype=dt, ndmin=dims))]
+                x = pd.concat(dfs)
+                self.assertTrue(x.values.dtype == dt)
+
+            for dt in (np.sctypes['int'] + np.sctypes['uint']):
+                dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
+                       pdt(np.array([5], dtype=dt, ndmin=dims))]
+                x = pd.concat(dfs)
+                self.assertTrue(x.values.dtype == dt)
+
+            objs = []
+            objs.append(pdt(np.array([1], dtype=np.float32, ndmin=dims)))
+            objs.append(pdt(np.array([1], dtype=np.float16, ndmin=dims)))
+            self.assertTrue(pd.concat(objs).values.dtype == np.float32)
+
+            objs = []
+            objs.append(pdt(np.array([1], dtype=np.int32, ndmin=dims)))
+            objs.append(pdt(np.array([1], dtype=np.int64, ndmin=dims)))
+            self.assertTrue(pd.concat(objs).values.dtype == np.int64)
+
+            # not sure what is the best answer here
+            objs = []
+            objs.append(pdt(np.array([1], dtype=np.int32, ndmin=dims)))
+            objs.append(pdt(np.array([1], dtype=np.float16, ndmin=dims)))
+            self.assertTrue(pd.concat(objs).values.dtype == np.float64)
+
 
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Original file line number	Diff line number	Diff line change
Expand Up		@@ -43,7 +43,7 @@ Backwards incompatible API changes
		.. _whatsnew_0190.api:



		- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`)
Copy link Contributor jreback Jun 8, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. did you put this in 0.19.0 on purpose? I think this is prob ok for 0.18.2 Copy link Contributor Author jennolsen84 Jun 9, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Yes, because was tagged with "Next major release", so I thought it was supposed to go in 0.19. #13247. I can easily move back. Copy link Contributor jreback Nov 16, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. move to 0.20.0



Expand Down