Use numpy to determine common dtypes.

sstanovnik · sstanovnik · commit c7fb0f252fe2 · 2016-08-08T21:35:18.000+02:00
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -4435,17 +4435,6 @@ def _interleaved_dtype(blocks):
     for x in blocks:
         counts[type(x)].append(x)
 
-    def _lcd_dtype(l):
-        """ find the lowest dtype that can accomodate the given types """
-        m = l[0].dtype
-        for x in l[1:]:
-            # the new dtype must either be wider or a strict subtype
-            if (x.dtype.itemsize > m.itemsize or
-                    (np.issubdtype(m, x.dtype) and
-                     not np.issubdtype(x.dtype, m))):
-                m = x.dtype
-        return m
-
     have_int = len(counts[IntBlock]) > 0
     have_bool = len(counts[BoolBlock]) > 0
     have_object = len(counts[ObjectBlock]) > 0
@@ -4458,7 +4447,6 @@ def _lcd_dtype(l):
     # TODO: have_sparse is not used
     have_sparse = len(counts[SparseBlock]) > 0  # noqa
     have_numeric = have_float or have_complex or have_int
-
     has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat
 
     if (have_object or
@@ -4470,10 +4458,9 @@ def _lcd_dtype(l):
     elif have_bool:
         return np.dtype(bool)
     elif have_int and not have_float and not have_complex:
-
         # if we are mixing unsigned and signed, then return
         # the next biggest int type (if we can)
-        lcd = _lcd_dtype(counts[IntBlock])
+        lcd = np.find_common_type([b.dtype for b in counts[IntBlock]], [])
         kinds = set([i.dtype.kind for i in counts[IntBlock]])
         if len(kinds) == 1:
             return lcd
@@ -4489,7 +4476,8 @@ def _lcd_dtype(l):
     elif have_complex:
         return np.dtype('c16')
     else:
-        return _lcd_dtype(counts[FloatBlock] + counts[SparseBlock])
+        introspection_blks = counts[FloatBlock] + counts[SparseBlock]
+        return np.find_common_type([b.dtype for b in introspection_blks], [])
 
 
 def _consolidate(blocks):
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -104,15 +104,20 @@ def test_as_matrix_lcd(self):
         values = self.mixed_float.as_matrix(['C'])
         self.assertEqual(values.dtype, np.float16)
 
+        # B uint64 forces float because there are other signed int types
         values = self.mixed_int.as_matrix(['A', 'B', 'C', 'D'])
-        self.assertEqual(values.dtype, np.int64)
+        self.assertEqual(values.dtype, np.float64)
 
         values = self.mixed_int.as_matrix(['A', 'D'])
         self.assertEqual(values.dtype, np.int64)
 
-        # guess all ints are cast to uints....
+        # B uint64 forces float because there are other signed int types
         values = self.mixed_int.as_matrix(['A', 'B', 'C'])
-        self.assertEqual(values.dtype, np.int64)
+        self.assertEqual(values.dtype, np.float64)
+
+        # as B and C are both unsigned, no forcing to float is needed
+        values = self.mixed_int.as_matrix(['B', 'C'])
+        self.assertEqual(values.dtype, np.uint64)
 
         values = self.mixed_int.as_matrix(['A', 'C'])
         self.assertEqual(values.dtype, np.int32)