Skip to content

Commit c7fb0f2

Browse files
committed
Use numpy to determine common dtypes.
1 parent fb6237c commit c7fb0f2

File tree

2 files changed

+11
-18
lines changed

2 files changed

+11
-18
lines changed

pandas/core/internals.py

+3-15
Original file line numberDiff line numberDiff line change
@@ -4435,17 +4435,6 @@ def _interleaved_dtype(blocks):
44354435
for x in blocks:
44364436
counts[type(x)].append(x)
44374437

4438-
def _lcd_dtype(l):
4439-
""" find the lowest dtype that can accomodate the given types """
4440-
m = l[0].dtype
4441-
for x in l[1:]:
4442-
# the new dtype must either be wider or a strict subtype
4443-
if (x.dtype.itemsize > m.itemsize or
4444-
(np.issubdtype(m, x.dtype) and
4445-
not np.issubdtype(x.dtype, m))):
4446-
m = x.dtype
4447-
return m
4448-
44494438
have_int = len(counts[IntBlock]) > 0
44504439
have_bool = len(counts[BoolBlock]) > 0
44514440
have_object = len(counts[ObjectBlock]) > 0
@@ -4458,7 +4447,6 @@ def _lcd_dtype(l):
44584447
# TODO: have_sparse is not used
44594448
have_sparse = len(counts[SparseBlock]) > 0 # noqa
44604449
have_numeric = have_float or have_complex or have_int
4461-
44624450
has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat
44634451

44644452
if (have_object or
@@ -4470,10 +4458,9 @@ def _lcd_dtype(l):
44704458
elif have_bool:
44714459
return np.dtype(bool)
44724460
elif have_int and not have_float and not have_complex:
4473-
44744461
# if we are mixing unsigned and signed, then return
44754462
# the next biggest int type (if we can)
4476-
lcd = _lcd_dtype(counts[IntBlock])
4463+
lcd = np.find_common_type([b.dtype for b in counts[IntBlock]], [])
44774464
kinds = set([i.dtype.kind for i in counts[IntBlock]])
44784465
if len(kinds) == 1:
44794466
return lcd
@@ -4489,7 +4476,8 @@ def _lcd_dtype(l):
44894476
elif have_complex:
44904477
return np.dtype('c16')
44914478
else:
4492-
return _lcd_dtype(counts[FloatBlock] + counts[SparseBlock])
4479+
introspection_blks = counts[FloatBlock] + counts[SparseBlock]
4480+
return np.find_common_type([b.dtype for b in introspection_blks], [])
44934481

44944482

44954483
def _consolidate(blocks):

pandas/tests/frame/test_block_internals.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -104,15 +104,20 @@ def test_as_matrix_lcd(self):
104104
values = self.mixed_float.as_matrix(['C'])
105105
self.assertEqual(values.dtype, np.float16)
106106

107+
# B uint64 forces float because there are other signed int types
107108
values = self.mixed_int.as_matrix(['A', 'B', 'C', 'D'])
108-
self.assertEqual(values.dtype, np.int64)
109+
self.assertEqual(values.dtype, np.float64)
109110

110111
values = self.mixed_int.as_matrix(['A', 'D'])
111112
self.assertEqual(values.dtype, np.int64)
112113

113-
# guess all ints are cast to uints....
114+
# B uint64 forces float because there are other signed int types
114115
values = self.mixed_int.as_matrix(['A', 'B', 'C'])
115-
self.assertEqual(values.dtype, np.int64)
116+
self.assertEqual(values.dtype, np.float64)
117+
118+
# as B and C are both unsigned, no forcing to float is needed
119+
values = self.mixed_int.as_matrix(['B', 'C'])
120+
self.assertEqual(values.dtype, np.uint64)
116121

117122
values = self.mixed_int.as_matrix(['A', 'C'])
118123
self.assertEqual(values.dtype, np.int32)

0 commit comments

Comments
 (0)