diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index d10d51352d0e4..f51dcf662f593 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1370,6 +1370,8 @@ Reshaping - Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) - Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`) - Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`) +- Bug in :func:`concat` which raises an error when concatenating TZ-aware dataframes and all-NaT dataframes (:issue:`12396`) +- Bug in :func:`concat` which raises an error when concatenating empty TZ-aware series (:issue:`18447`) - Bug in :func:`get_dummies`, and :func:`select_dtypes`, where duplicate column names caused incorrect behavior (:issue:`20848`) - Bug in :func:`isna`, which cannot handle ambiguous typed lists (:issue:`20675`) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 4aa74cdbbc2c0..1f4d3069838ba 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -465,8 +465,12 @@ def convert_to_pydatetime(x, axis): if _contains_datetime: if 'datetime' in typs: - new_values = np.concatenate([x.view(np.int64) for x in - to_concat], axis=axis) + to_concat = [np.array(x, copy=False).view(np.int64) + for x in to_concat] + if axis == 1: + to_concat = [np.atleast_2d(x) for x in to_concat] + + new_values = np.concatenate(to_concat, axis=axis) return new_values.view(_NS_DTYPE) else: # when to_concat has different tz, len(typs) > 1. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7837faf5b4c0f..8b1178576c6d8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2183,17 +2183,19 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan): """ Internal method to handle NA filling of take """ indices = _ensure_platform_int(indices) - # only fill if we are passing a non-None fill_value if allow_fill and fill_value is not None: if (indices < -1).any(): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') raise ValueError(msg) - taken = values.take(indices) mask = indices == -1 - if mask.any(): - taken[mask] = na_value + if mask.all(): + taken = np.full(indices.shape, fill_value=na_value) + else: + taken = values.take(indices) + if mask.any(): + taken[mask] = na_value else: taken = values.take(indices) return taken diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 474894aba65df..34f8e36f338ea 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5835,8 +5835,10 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): if len(values) and values[0] is None: fill_value = None - if getattr(self.block, 'is_datetimetz', False): - pass + if getattr(self.block, 'is_datetimetz', False) or \ + is_datetimetz(empty_dtype): + missing_arr = np.full(np.prod(self.shape), fill_value) + return DatetimeIndex(missing_arr, dtype=empty_dtype) elif getattr(self.block, 'is_categorical', False): pass elif getattr(self.block, 'is_sparse', False): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 57af67422d65f..7d4ffc964c7af 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1917,6 +1917,92 @@ def test_concat_tz_series_tzlocal(self): tm.assert_series_equal(result, pd.Series(x + y)) assert result.dtype == 'datetime64[ns, tzlocal()]' + @pytest.mark.parametrize('tz1', [None, 'UTC']) + @pytest.mark.parametrize('tz2', [None, 'UTC']) + @pytest.mark.parametrize('s', [pd.NaT, pd.Timestamp('20150101')]) + def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s): + # GH 12396 + + # tz-naive + first = pd.DataFrame([[pd.NaT], [pd.NaT]]).apply( + lambda x: x.dt.tz_localize(tz1)) + second = pd.DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2)) + + # we are all NaT so this is ok + if tz1 is None: + tz = tz2 + elif tz2 is None: + tz = tz1 + elif tz1 == tz2: + tz = tz1 + else: + tz = None + + result = pd.concat([first, second], axis=0) + expected = pd.DataFrame(pd.Series( + [pd.NaT, pd.NaT, s], index=[0, 1, 0])) + expected = expected.apply(lambda x: x.dt.tz_localize(tz)) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('tz1', [None, 'UTC']) + @pytest.mark.parametrize('tz2', [None, 'UTC']) + def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2): + # GH 12396 + + first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)) + second = pd.DataFrame(pd.Series( + [pd.NaT]).dt.tz_localize(tz2), columns=[1]) + expected = pd.DataFrame( + {0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1), + 1: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2)} + ) + result = pd.concat([first, second], axis=1) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('tz1', [None, 'UTC']) + @pytest.mark.parametrize('tz2', [None, 'UTC']) + def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): + # GH 12396 + + # tz-naive + first = pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1) + second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz=tz2)], + [pd.Timestamp('2016/01/01', tz=tz2)]], + index=[2, 3]) + + if tz1 is None and tz2 is None: + tz = None + + # we are all NaT so this is ok + elif tz1 is None: + tz = tz2 + elif tz1 == tz2: + tz = tz1 + else: + tz = None + expected = pd.DataFrame([pd.NaT, pd.NaT, + pd.Timestamp('2015/01/01', tz=tz), + pd.Timestamp('2016/01/01', tz=tz)]) + + result = pd.concat([first, second]) + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('tz', [None, 'UTC']) + def test_concat_NaT_dataframes(self, tz): + # GH 12396 + + first = pd.DataFrame([[pd.NaT], [pd.NaT]]) + first = first.apply(lambda x: x.dt.tz_localize(tz)) + second = pd.DataFrame([[pd.Timestamp('2015/01/01', tz=tz)], + [pd.Timestamp('2016/01/01', tz=tz)]], + index=[2, 3]) + expected = pd.DataFrame([pd.NaT, pd.NaT, + pd.Timestamp('2015/01/01', tz=tz), + pd.Timestamp('2016/01/01', tz=tz)]) + + result = pd.concat([first, second], axis=0) + assert_frame_equal(result, expected) + def test_concat_period_series(self): x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D')) @@ -1978,6 +2064,21 @@ def test_concat_empty_series(self): columns=['x', 0]) tm.assert_frame_equal(res, exp) + @pytest.mark.parametrize('tz', [None, 'UTC']) + @pytest.mark.parametrize('values', [[], [1, 2, 3]]) + def test_concat_empty_series_timelike(self, tz, values): + # GH 18447 + + first = Series([], dtype='M8[ns]').dt.tz_localize(tz) + second = Series(values) + expected = DataFrame( + {0: pd.Series([pd.NaT] * len(values), + dtype='M8[ns]' + ).dt.tz_localize(tz), + 1: values}) + result = concat([first, second], axis=1) + assert_frame_equal(result, expected) + def test_default_index(self): # is_series and ignore_index s1 = pd.Series([1, 2, 3], name='x')