diff --git a/doc/source/release.rst b/doc/source/release.rst index 99b8bfc460068..9650089279f12 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -118,6 +118,7 @@ Bug Fixes - Bug in rolling skew/kurtosis when passed a Series with bad data (:issue:`5749`) - Bug in scipy ``interpolate`` methods with a datetime index (:issue:`5975`) - Bug in NaT comparison if a mixed datetime/np.datetime64 with NaT were passed (:issue:`5968`) + - Fixed bug with ``pd.concat`` losing dtype information if all inputs are empty (:issue:`5742`) pandas 0.13.0 ------------- diff --git a/pandas/core/common.py b/pandas/core/common.py index e8bcfa71fe32a..cd78f35aabdf9 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2326,20 +2326,23 @@ def _check_as_is(x): def _concat_compat(to_concat, axis=0): # filter empty arrays - to_concat = [x for x in to_concat if x.shape[axis] > 0] - - # return the empty np array, if nothing to concatenate, #3121 - if not to_concat: - return np.array([], dtype=object) - - is_datetime64 = [x.dtype == _NS_DTYPE for x in to_concat] - if all(is_datetime64): - # work around NumPy 1.6 bug - new_values = np.concatenate([x.view(np.int64) for x in to_concat], - axis=axis) - return new_values.view(_NS_DTYPE) - elif any(is_datetime64): - to_concat = [_to_pydatetime(x) for x in to_concat] + nonempty = [x for x in to_concat if x.shape[axis] > 0] + + # If all arrays are empty, there's nothing to convert, just short-cut to + # the concatenation, #3121. + # + # Creating an empty array directly is tempting, but the winnings would be + # marginal given that it would still require shape & dtype calculation and + # np.concatenate which has them both implemented is compiled. + if nonempty: + is_datetime64 = [x.dtype == _NS_DTYPE for x in nonempty] + if all(is_datetime64): + # work around NumPy 1.6 bug + new_values = np.concatenate([x.view(np.int64) for x in nonempty], + axis=axis) + return new_values.view(_NS_DTYPE) + elif any(is_datetime64): + to_concat = [_to_pydatetime(x) for x in nonempty] return np.concatenate(to_concat, axis=axis) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index edcf7a0a491b0..3b6e4ba445ce0 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -11909,6 +11909,23 @@ def test_to_csv_date_format(self): assert_frame_equal(test, nat_frame) + def test_concat_empty_dataframe_dtypes(self): + df = DataFrame(columns=list("abc")) + df['a'] = df['a'].astype(np.bool_) + df['b'] = df['b'].astype(np.int32) + df['c'] = df['c'].astype(np.float64) + + result = pd.concat([df, df]) + self.assertEqual(result['a'].dtype, np.bool_) + self.assertEqual(result['b'].dtype, np.int32) + self.assertEqual(result['c'].dtype, np.float64) + + result = pd.concat([df, df.astype(np.float64)]) + self.assertEqual(result['a'].dtype, np.object_) + self.assertEqual(result['b'].dtype, np.float64) + self.assertEqual(result['c'].dtype, np.float64) + + def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr': try: diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 70dd38c2641ef..6b4a9a2bc4c22 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5441,6 +5441,15 @@ def test_numpy_unique(self): # it works! result = np.unique(self.ts) + def test_concat_empty_series_dtypes(self): + self.assertEqual(pd.concat([Series(dtype=np.float64)]).dtype, np.float64) + self.assertEqual(pd.concat([Series(dtype=np.int8)]).dtype, np.int8) + self.assertEqual(pd.concat([Series(dtype=np.bool_)]).dtype, np.bool_) + + self.assertEqual(pd.concat([Series(dtype=np.bool_), + Series(dtype=np.int32)]).dtype, np.int32) + + class TestSeriesNonUnique(tm.TestCase):