Skip to content

Commit 7d34d4d

Browse files
jaehoonhwangjreback
authored andcommitted
BUG: upcasting on reshaping ops pandas-dev#13247
Original work done by @jennolsen84, in pandas-dev#13337 closes pandas-dev#13247 Author: Jaehoon Hwang <[email protected]> Author: Jae <[email protected]> Closes pandas-dev#15594 from jaehoonhwang/Bug13247 and squashes the following commits: 3cd1734 [Jaehoon Hwang] Pass the non-related tests in test_partial and test_reshape 1fa578b [Jaehoon Hwang] Applying request changes removing unnecessary test and renameing 6744636 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 5bb72c7 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 a1d5d40 [Jaehoon Hwang] Completed pytest 8122359 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 0e52b74 [Jaehoon Hwang] Working: Except for pytest 8fec07c [Jaehoon Hwang] Fix: test_concat.py and internals.py 4f6c03e [Jaehoon Hwang] Fix: is_float_dtypes and is_numeric_dtype wrong place d3476c0 [Jaehoon Hwang] Merge branch 'master' into Bug13247 b977615 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' 4b1e5c6 [Jaehoon Hwang] Merge remote-tracking branch 'pandas-dev/master' into Bug13247 45f7ae9 [Jaehoon Hwang] Added pytest function 468baee [Jae] BUG: upcasting on reshaping ops pandas-dev#13247
1 parent 05d70f4 commit 7d34d4d

File tree

6 files changed

+35
-6
lines changed

6 files changed

+35
-6
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -886,3 +886,5 @@ Bug Fixes
886886
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
887887
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
888888
- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`)
889+
890+
- Concating multiple objects will no longer result in automatically upcast to `float64`, and instead try to find the smallest `dtype` that would suffice (:issue:`13247`)

pandas/core/internals.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
is_datetime64tz_dtype,
2222
is_object_dtype,
2323
is_datetimelike_v_numeric,
24+
is_float_dtype, is_numeric_dtype,
2425
is_numeric_v_string_like, is_extension_type,
2526
is_list_like,
2627
is_re,
@@ -4522,6 +4523,8 @@ def _interleaved_dtype(blocks):
45224523
return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
45234524
return lcd
45244525

4526+
elif have_int and have_float and not have_complex:
4527+
return np.dtype('float64')
45254528
elif have_complex:
45264529
return np.dtype('c16')
45274530
else:
@@ -4891,6 +4894,8 @@ def get_empty_dtype_and_na(join_units):
48914894
upcast_cls = 'datetime'
48924895
elif is_timedelta64_dtype(dtype):
48934896
upcast_cls = 'timedelta'
4897+
elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
4898+
upcast_cls = dtype.name
48944899
else:
48954900
upcast_cls = 'float'
48964901

@@ -4915,8 +4920,6 @@ def get_empty_dtype_and_na(join_units):
49154920
return np.dtype(np.bool_), None
49164921
elif 'category' in upcast_classes:
49174922
return np.dtype(np.object_), np.nan
4918-
elif 'float' in upcast_classes:
4919-
return np.dtype(np.float64), np.nan
49204923
elif 'datetimetz' in upcast_classes:
49214924
dtype = upcast_classes['datetimetz']
49224925
return dtype[0], tslib.iNaT
@@ -4925,7 +4928,17 @@ def get_empty_dtype_and_na(join_units):
49254928
elif 'timedelta' in upcast_classes:
49264929
return np.dtype('m8[ns]'), tslib.iNaT
49274930
else: # pragma
4928-
raise AssertionError("invalid dtype determination in get_concat_dtype")
4931+
g = np.find_common_type(upcast_classes, [])
4932+
if is_float_dtype(g):
4933+
return g, g.type(np.nan)
4934+
elif is_numeric_dtype(g):
4935+
if has_none_blocks:
4936+
return np.float64, np.nan
4937+
else:
4938+
return g, None
4939+
else:
4940+
msg = "invalid dtype determination in get_concat_dtype"
4941+
raise AssertionError(msg)
49294942

49304943

49314944
def concatenate_join_units(join_units, concat_axis, copy):
@@ -5190,7 +5203,6 @@ def is_null(self):
51905203
return True
51915204

51925205
def get_reindexed_values(self, empty_dtype, upcasted_na):
5193-
51945206
if upcasted_na is None:
51955207
# No upcasting is necessary
51965208
fill_value = self.block.fill_value

pandas/tests/indexing/test_partial.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def f():
210210
df.loc[3] = [6, 7]
211211

212212
exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'],
213-
dtype='float64')
213+
dtype='object')
214214
tm.assert_frame_equal(df, exp)
215215

216216
def test_series_partial_set(self):

pandas/tests/test_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ def test_interleave(self):
651651
mgr = create_mgr('a: f8; b: i8')
652652
self.assertEqual(mgr.as_matrix().dtype, 'f8')
653653
mgr = create_mgr('a: f4; b: i8')
654-
self.assertEqual(mgr.as_matrix().dtype, 'f4')
654+
self.assertEqual(mgr.as_matrix().dtype, 'f8')
655655
mgr = create_mgr('a: f4; b: i8; d: object')
656656
self.assertEqual(mgr.as_matrix().dtype, 'object')
657657
mgr = create_mgr('a: bool; b: i8')

pandas/tests/test_reshape.py

+1
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ def test_basic_types(self):
250250
self.assertEqual(type(r), exp_df_type)
251251

252252
r = get_dummies(s_df, sparse=self.sparse, columns=['a'])
253+
exp_blk_type = pd.core.internals.IntBlock
253254
self.assertEqual(type(r[['a_0']]._data.blocks[0]), exp_blk_type)
254255
self.assertEqual(type(r[['a_1']]._data.blocks[0]), exp_blk_type)
255256
self.assertEqual(type(r[['a_2']]._data.blocks[0]), exp_blk_type)

pandas/tests/tools/test_concat.py

+14
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
makeCustomDataframe as mkdf,
1414
assert_almost_equal)
1515

16+
import pytest
17+
1618

1719
class ConcatenateBase(tm.TestCase):
1820

@@ -1899,3 +1901,15 @@ def test_concat_multiindex_dfs_with_deepcopy(self):
18991901
tm.assert_frame_equal(result_copy, expected)
19001902
result_no_copy = pd.concat(example_dict, names=['testname'])
19011903
tm.assert_frame_equal(result_no_copy, expected)
1904+
1905+
1906+
@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
1907+
@pytest.mark.parametrize('dt', np.sctypes['float'])
1908+
def test_concat_no_unnecessary_upcast(dt, pdt):
1909+
# GH 13247
1910+
dims = pdt().ndim
1911+
dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
1912+
pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
1913+
pdt(np.array([5], dtype=dt, ndmin=dims))]
1914+
x = pd.concat(dfs)
1915+
assert x.values.dtype == dt

0 commit comments

Comments
 (0)